fix folder
Showing
3 changed files
with
20 additions
and
6 deletions
This diff is collapsed.
Click to expand it.
... | @@ -48,6 +48,7 @@ class Finder: | ... | @@ -48,6 +48,7 @@ class Finder: |
48 | "其他约定与条件英文": "", | 48 | "其他约定与条件英文": "", |
49 | "其他约定与条件中文": "", | 49 | "其他约定与条件中文": "", |
50 | } | 50 | } |
51 | |||
51 | def get_line(self, ocr_results, key_string): | 52 | def get_line(self, ocr_results, key_string): |
52 | # 根据指定关键词, 找出与关键词同处一行的字符 | 53 | # 根据指定关键词, 找出与关键词同处一行的字符 |
53 | top, bottom = -1, -1 | 54 | top, bottom = -1, -1 |
... | @@ -69,6 +70,7 @@ class Finder: | ... | @@ -69,6 +70,7 @@ class Finder: |
69 | line_text = sorted(line_text, key=lambda x: x[0][0], reverse=False) | 70 | line_text = sorted(line_text, key=lambda x: x[0][0], reverse=False) |
70 | lines = ''.join([i[1] for i in line_text]) | 71 | lines = ''.join([i[1] for i in line_text]) |
71 | return lines | 72 | return lines |
73 | |||
72 | def page_predict(self, ocr_results, page_template): | 74 | def page_predict(self, ocr_results, page_template): |
73 | classes = [] | 75 | classes = [] |
74 | for pno in ocr_results: | 76 | for pno in ocr_results: |
... | @@ -82,6 +84,7 @@ class Finder: | ... | @@ -82,6 +84,7 @@ class Finder: |
82 | classes.append([pno, score]) | 84 | classes.append([pno, score]) |
83 | pred = sorted(classes, key=lambda x: x[1], reverse=True)[0] | 85 | pred = sorted(classes, key=lambda x: x[1], reverse=True)[0] |
84 | return pred | 86 | return pred |
87 | |||
85 | def get_top_key(self, ocr_results, key_string): # 加入过滤词典 | 88 | def get_top_key(self, ocr_results, key_string): # 加入过滤词典 |
86 | """找到与 key_string 最匹配的字段的 key | 89 | """找到与 key_string 最匹配的字段的 key |
87 | """ | 90 | """ |
... | @@ -90,6 +93,7 @@ class Finder: | ... | @@ -90,6 +93,7 @@ class Finder: |
90 | ratio_list = [[fuzz.ratio(key_string, ocr_results[key][1]), key] for key in ocr_results] | 93 | ratio_list = [[fuzz.ratio(key_string, ocr_results[key][1]), key] for key in ocr_results] |
91 | top_key = sorted(ratio_list, key=lambda x: x[0])[-1] | 94 | top_key = sorted(ratio_list, key=lambda x: x[0])[-1] |
92 | return top_key | 95 | return top_key |
96 | |||
93 | def get_top_iou(self, ocr_results, poly): | 97 | def get_top_iou(self, ocr_results, poly): |
94 | """求最大IoU | 98 | """求最大IoU |
95 | """ | 99 | """ |
... | @@ -108,6 +112,7 @@ class Finder: | ... | @@ -108,6 +112,7 @@ class Finder: |
108 | return -1, -1 | 112 | return -1, -1 |
109 | top_iou = sorted(iou_list, key=lambda x: x[0])[-1] | 113 | top_iou = sorted(iou_list, key=lambda x: x[0])[-1] |
110 | return top_iou | 114 | return top_iou |
115 | |||
111 | def get_key_value(self, ocr_results, key_string): | 116 | def get_key_value(self, ocr_results, key_string): |
112 | """根据 key 查找 value | 117 | """根据 key 查找 value |
113 | """ | 118 | """ |
... | @@ -139,6 +144,7 @@ class Finder: | ... | @@ -139,6 +144,7 @@ class Finder: |
139 | else: | 144 | else: |
140 | value = words | 145 | value = words |
141 | return value | 146 | return value |
147 | |||
142 | def get_contract_No(self): | 148 | def get_contract_No(self): |
143 | """提取左上角的合同编号字段 | 149 | """提取左上角的合同编号字段 |
144 | """ | 150 | """ |
... | @@ -153,6 +159,7 @@ class Finder: | ... | @@ -153,6 +159,7 @@ class Finder: |
153 | # TODO!!! | 159 | # TODO!!! |
154 | contract_No_list.append(contract_No) | 160 | contract_No_list.append(contract_No) |
155 | return contract_No_list | 161 | return contract_No_list |
162 | |||
156 | def get_info_in_page_3(self): | 163 | def get_info_in_page_3(self): |
157 | """提取第三页上的经销商名称,和经销商统一社会信用代码或公司注册号 | 164 | """提取第三页上的经销商名称,和经销商统一社会信用代码或公司注册号 |
158 | """ | 165 | """ |
... | @@ -178,6 +185,7 @@ class Finder: | ... | @@ -178,6 +185,7 @@ class Finder: |
178 | words = self.get_key_value(self.ocr_results[pno], '统一社会信用代码或公司注册号') | 185 | words = self.get_key_value(self.ocr_results[pno], '统一社会信用代码或公司注册号') |
179 | dealer_No = words.replace('O', '0') | 186 | dealer_No = words.replace('O', '0') |
180 | return dealer_name, dealer_No | 187 | return dealer_name, dealer_No |
188 | |||
181 | def get_info_in_page_38(self): | 189 | def get_info_in_page_38(self): |
182 | """提取第38页上的经销商名称 | 190 | """提取第38页上的经销商名称 |
183 | """ | 191 | """ |
... | @@ -195,6 +203,7 @@ class Finder: | ... | @@ -195,6 +203,7 @@ class Finder: |
195 | words = re.sub(r'[(())盖章《]', "", words) | 203 | words = re.sub(r'[(())盖章《]', "", words) |
196 | dealer_name = words | 204 | dealer_name = words |
197 | return dealer_name | 205 | return dealer_name |
206 | |||
198 | def get_guarantor(self): | 207 | def get_guarantor(self): |
199 | """提取第10页上保证人段落,所见即所得 | 208 | """提取第10页上保证人段落,所见即所得 |
200 | """ | 209 | """ |
... | @@ -210,6 +219,7 @@ class Finder: | ... | @@ -210,6 +219,7 @@ class Finder: |
210 | words = words.replace('【', '[').replace('】', ']').replace(',', ',').replace('(', '(').replace(')', ')') | 219 | words = words.replace('【', '[').replace('】', ']').replace(',', ',').replace('(', '(').replace(')', ')') |
211 | guarantor = words | 220 | guarantor = words |
212 | return guarantor | 221 | return guarantor |
222 | |||
213 | def get_info_in_page_39(self): | 223 | def get_info_in_page_39(self): |
214 | """提取综合授信合同上的一些字段 | 224 | """提取综合授信合同上的一些字段 |
215 | """ | 225 | """ |
... | @@ -291,6 +301,7 @@ class Finder: | ... | @@ -291,6 +301,7 @@ class Finder: |
291 | deposit_chn = f'{words}%' | 301 | deposit_chn = f'{words}%' |
292 | return amount_eng, amount_chn, term_start_eng, term_end_eng, \ | 302 | return amount_eng, amount_chn, term_start_eng, term_end_eng, \ |
293 | term_start_chn, term_end_chn, deposit_eng, deposit_chn | 303 | term_start_chn, term_end_chn, deposit_eng, deposit_chn |
304 | |||
294 | def get_other_arrangements_and_conditions(self): | 305 | def get_other_arrangements_and_conditions(self): |
295 | """获取其它约定与条件文本段落 | 306 | """获取其它约定与条件文本段落 |
296 | """ | 307 | """ |
... | @@ -311,6 +322,7 @@ class Finder: | ... | @@ -311,6 +322,7 @@ class Finder: |
311 | words = searchObj.group(1) | 322 | words = searchObj.group(1) |
312 | other_arrangements_and_conditions_chn = words | 323 | other_arrangements_and_conditions_chn = words |
313 | return other_arrangements_and_conditions_eng, other_arrangements_and_conditions_chn | 324 | return other_arrangements_and_conditions_eng, other_arrangements_and_conditions_chn |
325 | |||
314 | def get_info(self): | 326 | def get_info(self): |
315 | # 按照文档页码返回一个合同编号列表,依次表示每一页上识别到的合同编号 | 327 | # 按照文档页码返回一个合同编号列表,依次表示每一页上识别到的合同编号 |
316 | contract_No_list = self.get_contract_No() | 328 | contract_No_list = self.get_contract_No() |
... | @@ -337,6 +349,7 @@ class Finder: | ... | @@ -337,6 +349,7 @@ class Finder: |
337 | self.init_result["其他约定与条件中文"] = words_chn | 349 | self.init_result["其他约定与条件中文"] = words_chn |
338 | return self.init_result | 350 | return self.init_result |
339 | 351 | ||
352 | |||
340 | class TIFFHandler: | 353 | class TIFFHandler: |
341 | 354 | ||
342 | def __init__(self, path, img_save_path): | 355 | def __init__(self, path, img_save_path): |
... | @@ -568,6 +581,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -568,6 +581,7 @@ class Command(BaseCommand, LoggerMixin): |
568 | if len(true_file_set) == 0 and len(os_error_filename_set) > 0: | 581 | if len(true_file_set) == 0 and len(os_error_filename_set) > 0: |
569 | true_file_set.add(os_error_filename_set.pop()) | 582 | true_file_set.add(os_error_filename_set.pop()) |
570 | for name in true_file_set: | 583 | for name in true_file_set: |
584 | time.sleep(10) # 防止文件较大时,读取到不完整文件 | ||
571 | path = os.path.join(input_dir, name) | 585 | path = os.path.join(input_dir, name) |
572 | 586 | ||
573 | try: | 587 | try: | ... | ... |
... | @@ -989,7 +989,7 @@ def get_se_cms_compare_info_auto(last_obj, application_entity): | ... | @@ -989,7 +989,7 @@ def get_se_cms_compare_info_auto(last_obj, application_entity): |
989 | vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[0], hmh_name)) | 989 | vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[0], hmh_name)) |
990 | vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[1], hmh_id)) | 990 | vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[1], hmh_id)) |
991 | vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[2], first_submission_date)) | 991 | vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[2], first_submission_date)) |
992 | vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[3], consts.SE_STAMP_VALUE)) | 992 | # vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[3], consts.SE_STAMP_VALUE)) |
993 | vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[4], consts.SE_FPL_VALUE)) | 993 | vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[4], consts.SE_FPL_VALUE)) |
994 | bhsj = float(amount) / 1.13 | 994 | bhsj = float(amount) / 1.13 |
995 | vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[5], consts.SPLIT_STR.join([ | 995 | vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[5], consts.SPLIT_STR.join([ |
... | @@ -1464,7 +1464,7 @@ def get_se_cms_compare_info(last_obj, application_entity, detect_list): | ... | @@ -1464,7 +1464,7 @@ def get_se_cms_compare_info(last_obj, application_entity, detect_list): |
1464 | vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[0], hmh_name)) | 1464 | vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[0], hmh_name)) |
1465 | vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[1], hmh_id)) | 1465 | vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[1], hmh_id)) |
1466 | vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[2], first_submission_date)) | 1466 | vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[2], first_submission_date)) |
1467 | vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[3], consts.SE_STAMP_VALUE)) | 1467 | # vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[3], consts.SE_STAMP_VALUE)) |
1468 | vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[4], consts.SE_FPL_VALUE)) | 1468 | vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[4], consts.SE_FPL_VALUE)) |
1469 | bhsj = float(amount) / 1.13 | 1469 | bhsj = float(amount) / 1.13 |
1470 | vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[5], consts.SPLIT_STR.join([ | 1470 | vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[5], consts.SPLIT_STR.join([ |
... | @@ -1499,7 +1499,7 @@ def get_se_cms_compare_info(last_obj, application_entity, detect_list): | ... | @@ -1499,7 +1499,7 @@ def get_se_cms_compare_info(last_obj, application_entity, detect_list): |
1499 | gb34_field_input.append((consts.SE_GB_USED_FIELD[1], main_num)) | 1499 | gb34_field_input.append((consts.SE_GB_USED_FIELD[1], main_num)) |
1500 | gb34_field_input.append((consts.SE_GB_USED_FIELD[2], first_submission_date)) | 1500 | gb34_field_input.append((consts.SE_GB_USED_FIELD[2], first_submission_date)) |
1501 | vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[2], first_submission_date)) | 1501 | vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[2], first_submission_date)) |
1502 | vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[3], consts.SE_STAMP_VALUE)) | 1502 | # vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[3], consts.SE_STAMP_VALUE)) |
1503 | jypz_field_input.append(('vinNo', vin_no)) | 1503 | jypz_field_input.append(('vinNo', vin_no)) |
1504 | jypz_field_input.append(('vehicleTransactionAmount', amount)) | 1504 | jypz_field_input.append(('vehicleTransactionAmount', amount)) |
1505 | jypz_field_input.append((consts.SE_GB_USED_FIELD[-1], first_submission_date)) | 1505 | jypz_field_input.append((consts.SE_GB_USED_FIELD[-1], first_submission_date)) |
... | @@ -2683,9 +2683,9 @@ def se_compare_process(compare_info, ocr_res_dict, is_gsyh): | ... | @@ -2683,9 +2683,9 @@ def se_compare_process(compare_info, ocr_res_dict, is_gsyh): |
2683 | for i in cn_reason_list: | 2683 | for i in cn_reason_list: |
2684 | if i in tmp_set: | 2684 | if i in tmp_set: |
2685 | continue | 2685 | continue |
2686 | elif i in consts.BS_REASON: | 2686 | # elif i in consts.BS_REASON: |
2687 | tmp_set.add(i) | 2687 | # tmp_set.add(i) |
2688 | bs_cn_reason_list.append(i) | 2688 | # bs_cn_reason_list.append(i) |
2689 | else: | 2689 | else: |
2690 | tmp_set.add(i) | 2690 | tmp_set.add(i) |
2691 | last_cn_reason_list.append(i) | 2691 | last_cn_reason_list.append(i) | ... | ... |
-
Please register or sign in to post a comment