3b1e6657 by 周伟奇

fix folder

1 parent 6e9f7b32
...@@ -61,72 +61,114 @@ class Command(BaseCommand, LoggerMixin): ...@@ -61,72 +61,114 @@ class Command(BaseCommand, LoggerMixin):
61 def signal_handler(self, sig, frame): 61 def signal_handler(self, sig, frame):
62 self.switch = False # 停止处理文件 62 self.switch = False # 停止处理文件
63 63
64 def license1_process(self, ocr_data, license_summary, classify, img_path): 64 def license1_process(self, ocr_data, license_summary, classify):
65 # 类别:'0'身份证, '1'居住证 65 # 类别:'0'身份证, '1'居住证
66 license_data = ocr_data.get('data', []) 66 license_data = ocr_data.get('data')
67 if not license_data: 67 if not license_data:
68 return 68 return
69 if classify == consts.MVC_CLASSIFY: # 车辆登记证 3/4页结果整合 69 if isinstance(license_data, dict):
70 for mvc_dict in license_data: 70 license_data.pop('base64_img', '')
71 try: 71
72 mvc_page = mvc_dict.pop('page') 72 # 保单
73 except Exception as e: 73 if classify == consts.INSURANCE_CLASSIFY:
74 pass 74 product_result = ['', '', '']
75 else: 75 for product in license_data.get('result', {}).get('productList', []):
76 name = product.get('name', {}).get('words', '')
77 if name.find('机动车损失') != -1:
78 product_result[0] = product.get('coverage', {}).get('words', '')
79 product_result[2] = product.get('deductible_franchise', {}).get('words', '')
80 elif name.find('第三者责任') != -1:
81 product_result[1] = product.get('coverage', {}).get('words', '')
82
83 special_str = license_data.get('result', {}).get('1stBeneficiary', {}).get('words', '')
84 special = '无'
85 if special_str.find('宝马') != -1 or special_str.find('先锋国际融资租赁有限公司') != -1:
86 special = '有'
87 insurance_ocr_result = {
88 '被保险人姓名': license_data.get('result', {}).get('insured', {}).get('name', {}).get('words', ''),
89 '被保险人证件号码': license_data.get('result', {}).get('insured', {}).get('certiCode', {}).get('words', ''),
90 '车架号': license_data.get('result', {}).get('vehicle', {}).get('VIN', {}).get('words', ''),
91 '机动车损失保险金额': product_result[0],
92 '机动车第三者责任保险金额': product_result[1],
93 '机动车损失保险绝对免赔率/绝对免赔额': product_result[2],
94 '保险费合计': license_data.get('result', {}).get('premiumSum', {}).get('words', ''),
95 '保险起始日期': license_data.get('result', {}).get('startDate', {}).get('words', ''),
96 '保险截止日期': license_data.get('result', {}).get('endDate', {}).get('words', ''),
97 '保单章': license_data.get('result', {}).get('seal', {}).get('words', ''),
98 '特别约定第一受益人': special,
99 }
100 license_summary.setdefault(classify, []).append(insurance_ocr_result)
101 # DDA
102 elif classify == consts.DDA_CLASSIFY:
103 pro = ocr_data.get('confidence', 0)
104 if pro < consts.DDA_PRO_MIN:
105 return
106 dda_ocr_result = {}
107 for key, value in license_data.get('result', {}).items():
108 dda_ocr_result[key] = value.get('words', '')
109 dda_ocr_result[consts.DDA_PRO] = pro
110 license_summary.setdefault(classify, []).append(dda_ocr_result)
111 # 抵押登记豁免函
112 elif classify == consts.HMH_CLASSIFY:
113 hmh_ocr_result = {}
114 for key, value in license_data.get('words_result', {}).items():
115 hmh_ocr_result[key] = value.get('words', '')
116 license_summary.setdefault(classify, []).append(hmh_ocr_result)
117 # 二手车交易凭证
118 elif classify == consts.JYPZ_CLASSIFY:
119 jypz_ocr_result = {}
120 for key, value in license_data.get('result', {}).items():
121 jypz_ocr_result[key] = value.get('words', '')
122 license_summary.setdefault(classify, []).append(jypz_ocr_result)
123 # 车辆登记证 3/4页结果整合
124 elif classify == consts.MVC_CLASSIFY:
125 rebuild_data_dict = {}
126 mvc_page = license_data.pop('page', 'VehicleRCI')
127 mvc_res = license_data.pop('results', {})
76 if mvc_page == 'VehicleRegArea': 128 if mvc_page == 'VehicleRegArea':
77 mvc_res = mvc_dict.pop('results', {}) 129 rebuild_data_dict['机动车登记证书编号'] = mvc_res.get('机动车登记证书编号', {}).get('words', '')
78 mvc_dict['机动车登记证书编号'] = mvc_res.get('register_no', {}).get('words', '') 130 for register_info in mvc_res.get('登记信息', []):
79 for register_info in mvc_res.get('register_info', []): 131 register_info.pop('register_type', None)
80 for detail_dict in register_info.get('details', {}).values(): 132 register_info.pop('register_type_name', None)
81 mvc_dict.setdefault(detail_dict.get('chinese_key', '未知'), []).append( 133 for cn_key, detail_dict in register_info.items():
134 rebuild_data_dict.setdefault(cn_key, []).append(
82 detail_dict.get('words', '')) 135 detail_dict.get('words', ''))
83 del mvc_res
84 if classify == consts.IC_CLASSIFY:
85 for id_card_dict in license_data:
86 try:
87 base64_img = id_card_dict.pop('base64_img')
88 except Exception as e:
89 continue
90 else: 136 else:
91 card_type = -1 137 for cn_key, detail_dict in mvc_res.items():
92 json_data_4 = { 138 rebuild_data_dict[cn_key] = detail_dict.get('words', '')
93 'mode': 1, 139 del mvc_res
94 'user_info': { 140 license_summary.setdefault(classify, []).append(rebuild_data_dict)
95 'image_content': base64_img,
96 },
97 'options': {
98 'distinguish_type': 1,
99 'auto_rotate': True,
100 },
101 }
102 for times in range(consts.RETRY_TIMES):
103 try:
104 start_time = time.time()
105 ocr_4_response = requests.post(self.ocr_url_4, json=json_data_4)
106 if ocr_4_response.status_code != 200:
107 raise OCR4Exception('ocr_4 status code: {0}'.format(ocr_4_response.status_code))
108 except Exception as e:
109 self.folder_log.warn(
110 '{0} [ocr_4 failed] [times={1}] [img_path={2}] [error={3}]'.format(
111 self.log_base, times, img_path, traceback.format_exc()))
112 else:
113 ocr_4_res = ocr_4_response.json()
114 end_time = time.time()
115 speed_time = int(end_time - start_time)
116
117 if ocr_4_res.get('code') == 0 and ocr_4_res.get('result', {}).get('rtn') == 0:
118 card_type = ocr_4_res.get('result', {}).get(
119 'idcard_distinguish_result', {}).get('result', -1)
120 141
121 self.folder_log.info( 142 # 身份证真伪
122 '{0} [ocr_4 success] [img_path={1}] [speed_time={2}]'.format( 143 elif classify == consts.IC_CLASSIFY:
123 self.log_base, img_path, speed_time)) 144 id_card_dict = {}
124 break 145 card_type = license_data.get('type', '')
146 is_ic = card_type.startswith('身份证')
147 is_info_side = card_type.endswith('信息面')
148 id_card_dict['类别'] = '0' if is_ic else '1'
149 if is_ic:
150 field_map = consts.IC_MAP_0 if is_info_side else consts.IC_MAP_1
125 else: 151 else:
126 self.folder_log.warn( 152 field_map = consts.RP_MAP_0 if is_info_side else consts.RP_MAP_1
127 '{0} [ocr_4 failed] [img_path={1}]'.format(self.log_base, img_path)) 153 for write_field, search_field in field_map:
154 id_card_dict[write_field] = license_data.get('words_result', {}).get(search_field, {}).get('words', '')
155 if not is_info_side:
156 start_time = license_data.get('words_result', {}).get('签发日期', {}).get('words', '')
157 end_time = license_data.get('words_result', {}).get('失效日期', {}).get('words', '')
158 id_card_dict['有效期限'] = '{0}-{1}'.format(start_time, end_time)
128 159
129 id_card_dict[consts.IC_TURE_OR_FALSE] = consts.IC_RES_MAPPING.get(card_type) 160 id_card_dict[consts.IC_TURE_OR_FALSE] = consts.IC_RES_MAPPING.get(1)
161 license_summary.setdefault(classify, []).append(id_card_dict)
162 # 购车发票 & 二手车发票
163 elif classify == consts.MVI_CLASSIFY or classify == consts.UCI_CLASSIFY:
164 rebuild_data_dict = {}
165 mvi_res = license_data.pop('result', {})
166 for en_key, detail_dict in mvi_res.items():
167 rebuild_data_dict[detail_dict.get('chinese_key', '')] = detail_dict.get('words', '')
168 rebuild_data_dict['新旧版式'] = license_data.get('layout', '')
169 license_summary.setdefault(classify, []).append(rebuild_data_dict)
170 # 其他
171 else:
130 license_summary.setdefault(classify, []).extend(license_data) 172 license_summary.setdefault(classify, []).extend(license_data)
131 173
132 @staticmethod 174 @staticmethod
...@@ -166,7 +208,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -166,7 +208,7 @@ class Command(BaseCommand, LoggerMixin):
166 if isinstance(data_list, list): 208 if isinstance(data_list, list):
167 for ocr_data in data_list: 209 for ocr_data in data_list:
168 # part_idx = part_idx + 1 210 # part_idx = part_idx + 1
169 self.license1_process(ocr_data, license_summary, classify, img_path) 211 self.license1_process(ocr_data, license_summary, classify)
170 212
171 wb = BSWorkbook(set(), set(), set(), set(), set()) 213 wb = BSWorkbook(set(), set(), set(), set(), set())
172 wb.simple_license_rebuild(license_summary, consts.DOC_SCHEME_LIST[0]) 214 wb.simple_license_rebuild(license_summary, consts.DOC_SCHEME_LIST[0])
......
...@@ -48,6 +48,7 @@ class Finder: ...@@ -48,6 +48,7 @@ class Finder:
48 "其他约定与条件英文": "", 48 "其他约定与条件英文": "",
49 "其他约定与条件中文": "", 49 "其他约定与条件中文": "",
50 } 50 }
51
51 def get_line(self, ocr_results, key_string): 52 def get_line(self, ocr_results, key_string):
52 # 根据指定关键词, 找出与关键词同处一行的字符 53 # 根据指定关键词, 找出与关键词同处一行的字符
53 top, bottom = -1, -1 54 top, bottom = -1, -1
...@@ -69,6 +70,7 @@ class Finder: ...@@ -69,6 +70,7 @@ class Finder:
69 line_text = sorted(line_text, key=lambda x: x[0][0], reverse=False) 70 line_text = sorted(line_text, key=lambda x: x[0][0], reverse=False)
70 lines = ''.join([i[1] for i in line_text]) 71 lines = ''.join([i[1] for i in line_text])
71 return lines 72 return lines
73
72 def page_predict(self, ocr_results, page_template): 74 def page_predict(self, ocr_results, page_template):
73 classes = [] 75 classes = []
74 for pno in ocr_results: 76 for pno in ocr_results:
...@@ -82,6 +84,7 @@ class Finder: ...@@ -82,6 +84,7 @@ class Finder:
82 classes.append([pno, score]) 84 classes.append([pno, score])
83 pred = sorted(classes, key=lambda x: x[1], reverse=True)[0] 85 pred = sorted(classes, key=lambda x: x[1], reverse=True)[0]
84 return pred 86 return pred
87
85 def get_top_key(self, ocr_results, key_string): # 加入过滤词典 88 def get_top_key(self, ocr_results, key_string): # 加入过滤词典
86 """找到与 key_string 最匹配的字段的 key 89 """找到与 key_string 最匹配的字段的 key
87 """ 90 """
...@@ -90,6 +93,7 @@ class Finder: ...@@ -90,6 +93,7 @@ class Finder:
90 ratio_list = [[fuzz.ratio(key_string, ocr_results[key][1]), key] for key in ocr_results] 93 ratio_list = [[fuzz.ratio(key_string, ocr_results[key][1]), key] for key in ocr_results]
91 top_key = sorted(ratio_list, key=lambda x: x[0])[-1] 94 top_key = sorted(ratio_list, key=lambda x: x[0])[-1]
92 return top_key 95 return top_key
96
93 def get_top_iou(self, ocr_results, poly): 97 def get_top_iou(self, ocr_results, poly):
94 """求最大IoU 98 """求最大IoU
95 """ 99 """
...@@ -108,6 +112,7 @@ class Finder: ...@@ -108,6 +112,7 @@ class Finder:
108 return -1, -1 112 return -1, -1
109 top_iou = sorted(iou_list, key=lambda x: x[0])[-1] 113 top_iou = sorted(iou_list, key=lambda x: x[0])[-1]
110 return top_iou 114 return top_iou
115
111 def get_key_value(self, ocr_results, key_string): 116 def get_key_value(self, ocr_results, key_string):
112 """根据 key 查找 value 117 """根据 key 查找 value
113 """ 118 """
...@@ -139,6 +144,7 @@ class Finder: ...@@ -139,6 +144,7 @@ class Finder:
139 else: 144 else:
140 value = words 145 value = words
141 return value 146 return value
147
142 def get_contract_No(self): 148 def get_contract_No(self):
143 """提取左上角的合同编号字段 149 """提取左上角的合同编号字段
144 """ 150 """
...@@ -153,6 +159,7 @@ class Finder: ...@@ -153,6 +159,7 @@ class Finder:
153 # TODO!!! 159 # TODO!!!
154 contract_No_list.append(contract_No) 160 contract_No_list.append(contract_No)
155 return contract_No_list 161 return contract_No_list
162
156 def get_info_in_page_3(self): 163 def get_info_in_page_3(self):
157 """提取第三页上的经销商名称,和经销商统一社会信用代码或公司注册号 164 """提取第三页上的经销商名称,和经销商统一社会信用代码或公司注册号
158 """ 165 """
...@@ -178,6 +185,7 @@ class Finder: ...@@ -178,6 +185,7 @@ class Finder:
178 words = self.get_key_value(self.ocr_results[pno], '统一社会信用代码或公司注册号') 185 words = self.get_key_value(self.ocr_results[pno], '统一社会信用代码或公司注册号')
179 dealer_No = words.replace('O', '0') 186 dealer_No = words.replace('O', '0')
180 return dealer_name, dealer_No 187 return dealer_name, dealer_No
188
181 def get_info_in_page_38(self): 189 def get_info_in_page_38(self):
182 """提取第38页上的经销商名称 190 """提取第38页上的经销商名称
183 """ 191 """
...@@ -195,6 +203,7 @@ class Finder: ...@@ -195,6 +203,7 @@ class Finder:
195 words = re.sub(r'[(())盖章《]', "", words) 203 words = re.sub(r'[(())盖章《]', "", words)
196 dealer_name = words 204 dealer_name = words
197 return dealer_name 205 return dealer_name
206
198 def get_guarantor(self): 207 def get_guarantor(self):
199 """提取第10页上保证人段落,所见即所得 208 """提取第10页上保证人段落,所见即所得
200 """ 209 """
...@@ -210,6 +219,7 @@ class Finder: ...@@ -210,6 +219,7 @@ class Finder:
210 words = words.replace('【', '[').replace('】', ']').replace(',', ',').replace('(', '(').replace(')', ')') 219 words = words.replace('【', '[').replace('】', ']').replace(',', ',').replace('(', '(').replace(')', ')')
211 guarantor = words 220 guarantor = words
212 return guarantor 221 return guarantor
222
213 def get_info_in_page_39(self): 223 def get_info_in_page_39(self):
214 """提取综合授信合同上的一些字段 224 """提取综合授信合同上的一些字段
215 """ 225 """
...@@ -291,6 +301,7 @@ class Finder: ...@@ -291,6 +301,7 @@ class Finder:
291 deposit_chn = f'{words}%' 301 deposit_chn = f'{words}%'
292 return amount_eng, amount_chn, term_start_eng, term_end_eng, \ 302 return amount_eng, amount_chn, term_start_eng, term_end_eng, \
293 term_start_chn, term_end_chn, deposit_eng, deposit_chn 303 term_start_chn, term_end_chn, deposit_eng, deposit_chn
304
294 def get_other_arrangements_and_conditions(self): 305 def get_other_arrangements_and_conditions(self):
295 """获取其它约定与条件文本段落 306 """获取其它约定与条件文本段落
296 """ 307 """
...@@ -311,6 +322,7 @@ class Finder: ...@@ -311,6 +322,7 @@ class Finder:
311 words = searchObj.group(1) 322 words = searchObj.group(1)
312 other_arrangements_and_conditions_chn = words 323 other_arrangements_and_conditions_chn = words
313 return other_arrangements_and_conditions_eng, other_arrangements_and_conditions_chn 324 return other_arrangements_and_conditions_eng, other_arrangements_and_conditions_chn
325
314 def get_info(self): 326 def get_info(self):
315 # 按照文档页码返回一个合同编号列表,依次表示每一页上识别到的合同编号 327 # 按照文档页码返回一个合同编号列表,依次表示每一页上识别到的合同编号
316 contract_No_list = self.get_contract_No() 328 contract_No_list = self.get_contract_No()
...@@ -337,6 +349,7 @@ class Finder: ...@@ -337,6 +349,7 @@ class Finder:
337 self.init_result["其他约定与条件中文"] = words_chn 349 self.init_result["其他约定与条件中文"] = words_chn
338 return self.init_result 350 return self.init_result
339 351
352
340 class TIFFHandler: 353 class TIFFHandler:
341 354
342 def __init__(self, path, img_save_path): 355 def __init__(self, path, img_save_path):
...@@ -568,6 +581,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -568,6 +581,7 @@ class Command(BaseCommand, LoggerMixin):
568 if len(true_file_set) == 0 and len(os_error_filename_set) > 0: 581 if len(true_file_set) == 0 and len(os_error_filename_set) > 0:
569 true_file_set.add(os_error_filename_set.pop()) 582 true_file_set.add(os_error_filename_set.pop())
570 for name in true_file_set: 583 for name in true_file_set:
584 time.sleep(10) # 防止文件较大时,读取到不完整文件
571 path = os.path.join(input_dir, name) 585 path = os.path.join(input_dir, name)
572 586
573 try: 587 try:
......
...@@ -989,7 +989,7 @@ def get_se_cms_compare_info_auto(last_obj, application_entity): ...@@ -989,7 +989,7 @@ def get_se_cms_compare_info_auto(last_obj, application_entity):
989 vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[0], hmh_name)) 989 vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[0], hmh_name))
990 vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[1], hmh_id)) 990 vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[1], hmh_id))
991 vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[2], first_submission_date)) 991 vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[2], first_submission_date))
992 vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[3], consts.SE_STAMP_VALUE)) 992 # vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[3], consts.SE_STAMP_VALUE))
993 vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[4], consts.SE_FPL_VALUE)) 993 vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[4], consts.SE_FPL_VALUE))
994 bhsj = float(amount) / 1.13 994 bhsj = float(amount) / 1.13
995 vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[5], consts.SPLIT_STR.join([ 995 vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[5], consts.SPLIT_STR.join([
...@@ -1464,7 +1464,7 @@ def get_se_cms_compare_info(last_obj, application_entity, detect_list): ...@@ -1464,7 +1464,7 @@ def get_se_cms_compare_info(last_obj, application_entity, detect_list):
1464 vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[0], hmh_name)) 1464 vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[0], hmh_name))
1465 vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[1], hmh_id)) 1465 vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[1], hmh_id))
1466 vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[2], first_submission_date)) 1466 vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[2], first_submission_date))
1467 vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[3], consts.SE_STAMP_VALUE)) 1467 # vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[3], consts.SE_STAMP_VALUE))
1468 vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[4], consts.SE_FPL_VALUE)) 1468 vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[4], consts.SE_FPL_VALUE))
1469 bhsj = float(amount) / 1.13 1469 bhsj = float(amount) / 1.13
1470 vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[5], consts.SPLIT_STR.join([ 1470 vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[5], consts.SPLIT_STR.join([
...@@ -1499,7 +1499,7 @@ def get_se_cms_compare_info(last_obj, application_entity, detect_list): ...@@ -1499,7 +1499,7 @@ def get_se_cms_compare_info(last_obj, application_entity, detect_list):
1499 gb34_field_input.append((consts.SE_GB_USED_FIELD[1], main_num)) 1499 gb34_field_input.append((consts.SE_GB_USED_FIELD[1], main_num))
1500 gb34_field_input.append((consts.SE_GB_USED_FIELD[2], first_submission_date)) 1500 gb34_field_input.append((consts.SE_GB_USED_FIELD[2], first_submission_date))
1501 vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[2], first_submission_date)) 1501 vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[2], first_submission_date))
1502 vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[3], consts.SE_STAMP_VALUE)) 1502 # vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[3], consts.SE_STAMP_VALUE))
1503 jypz_field_input.append(('vinNo', vin_no)) 1503 jypz_field_input.append(('vinNo', vin_no))
1504 jypz_field_input.append(('vehicleTransactionAmount', amount)) 1504 jypz_field_input.append(('vehicleTransactionAmount', amount))
1505 jypz_field_input.append((consts.SE_GB_USED_FIELD[-1], first_submission_date)) 1505 jypz_field_input.append((consts.SE_GB_USED_FIELD[-1], first_submission_date))
...@@ -2683,9 +2683,9 @@ def se_compare_process(compare_info, ocr_res_dict, is_gsyh): ...@@ -2683,9 +2683,9 @@ def se_compare_process(compare_info, ocr_res_dict, is_gsyh):
2683 for i in cn_reason_list: 2683 for i in cn_reason_list:
2684 if i in tmp_set: 2684 if i in tmp_set:
2685 continue 2685 continue
2686 elif i in consts.BS_REASON: 2686 # elif i in consts.BS_REASON:
2687 tmp_set.add(i) 2687 # tmp_set.add(i)
2688 bs_cn_reason_list.append(i) 2688 # bs_cn_reason_list.append(i)
2689 else: 2689 else:
2690 tmp_set.add(i) 2690 tmp_set.add(i)
2691 last_cn_reason_list.append(i) 2691 last_cn_reason_list.append(i)
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!