36ca3f9a by 周伟奇

fix hil e-contract

1 parent 560316ec
...@@ -2209,12 +2209,13 @@ FILE_NAME_PREFIX_MAP = { ...@@ -2209,12 +2209,13 @@ FILE_NAME_PREFIX_MAP = {
2209 2209
2210 ECONTRACT_KEYWORDS_MAP = { 2210 ECONTRACT_KEYWORDS_MAP = {
2211 AFC_PREFIX: [ 2211 AFC_PREFIX: [
2212 ('电子签署-汽车抵押贷款合同', CONTRACT_CLASSIFY, 0), 2212 ('抵押贷款合同', CONTRACT_CLASSIFY),
2213 # ('电子签署-抵押登记豁免函', HMH_CLASSIFY, 0), 2213 # ('电子签署-抵押登记豁免函', HMH_CLASSIFY, 0),
2214 ], 2214 ],
2215 HIL_PREFIX: [ 2215 HIL_PREFIX: [
2216 ('电子签署-售后回租合同', HIL_CONTRACT_1_CLASSIFY, HIL_CONTRACT_3_CLASSIFY), 2216 ('售后回租合同', HIL_CONTRACT_1_CLASSIFY),
2217 ('电子签署-汽车租赁抵押合同', HIL_CONTRACT_2_CLASSIFY, 0), 2217 ('租赁抵押合同', HIL_CONTRACT_2_CLASSIFY),
2218 ('车辆处置协议', HIL_CONTRACT_3_CLASSIFY),
2218 # ('电子签署-抵押登记豁免函', HMH_CLASSIFY, 0), 2219 # ('电子签署-抵押登记豁免函', HMH_CLASSIFY, 0),
2219 ] 2220 ]
2220 } 2221 }
......
...@@ -102,10 +102,10 @@ class Command(BaseCommand, LoggerMixin): ...@@ -102,10 +102,10 @@ class Command(BaseCommand, LoggerMixin):
102 info_tuple = task_str.split(consts.SPLIT_STR) 102 info_tuple = task_str.split(consts.SPLIT_STR)
103 if len(info_tuple) == 2: 103 if len(info_tuple) == 2:
104 business_type, doc_id_str = info_tuple 104 business_type, doc_id_str = info_tuple
105 classify_1_str = classify_2_str = '0' 105 classify_1_str = '0'
106 rebuild_task_str = task_str 106 rebuild_task_str = task_str
107 else: 107 else:
108 business_type, doc_id_str, classify_1_str, classify_2_str = info_tuple 108 business_type, doc_id_str, classify_1_str = info_tuple
109 rebuild_task_str = '{0}{1}{2}'.format(business_type, consts.SPLIT_STR, doc_id_str) 109 rebuild_task_str = '{0}{1}{2}'.format(business_type, consts.SPLIT_STR, doc_id_str)
110 doc_id = int(doc_id_str) 110 doc_id = int(doc_id_str)
111 doc_class = HILDoc if business_type == consts.HIL_PREFIX else AFCDoc 111 doc_class = HILDoc if business_type == consts.HIL_PREFIX else AFCDoc
...@@ -130,7 +130,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -130,7 +130,7 @@ class Command(BaseCommand, LoggerMixin):
130 else: 130 else:
131 self.online_log.info('{0} [get_doc_info] [db save end] [task_str={1}] [is_priority={2}]'.format( 131 self.online_log.info('{0} [get_doc_info] [db save end] [task_str={1}] [is_priority={2}]'.format(
132 self.log_base, task_str, is_priority)) 132 self.log_base, task_str, is_priority))
133 return doc, business_type, rebuild_task_str, classify_1_str, classify_2_str 133 return doc, business_type, rebuild_task_str, classify_1_str
134 134
135 # def pdf_download(self, doc, pdf_path): 135 # def pdf_download(self, doc, pdf_path):
136 # if not doc.application_id.startswith(consts.FIXED_APPLICATION_ID_PREFIX): 136 # if not doc.application_id.startswith(consts.FIXED_APPLICATION_ID_PREFIX):
...@@ -1014,7 +1014,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -1014,7 +1014,7 @@ class Command(BaseCommand, LoggerMixin):
1014 while self.switch: 1014 while self.switch:
1015 try: 1015 try:
1016 # 1. 从队列获取文件信息 1016 # 1. 从队列获取文件信息
1017 doc, business_type, task_str, classify_1_str, classify_2_str = self.get_doc_info() 1017 doc, business_type, task_str, classify_1_str = self.get_doc_info()
1018 # 队列为空时的处理 1018 # 队列为空时的处理
1019 if doc is None: 1019 if doc is None:
1020 time.sleep(self.sleep_time_doc_get) 1020 time.sleep(self.sleep_time_doc_get)
...@@ -1188,10 +1188,8 @@ class Command(BaseCommand, LoggerMixin): ...@@ -1188,10 +1188,8 @@ class Command(BaseCommand, LoggerMixin):
1188 'page_num': page_num, 1188 'page_num': page_num,
1189 'page_info': page_info 1189 'page_info': page_info
1190 } 1190 }
1191
1192 else: 1191 else:
1193 file_type_1 = consts.HIL_CONTRACT_TYPE_MAP.get(classify_1_str) 1192 file_type_1 = consts.HIL_CONTRACT_TYPE_MAP.get(classify_1_str)
1194 file_type_2 = consts.HIL_CONTRACT_TYPE_MAP.get(classify_2_str)
1195 ocr_result_1 = hil_predict(pdf_handler.pdf_info, file_type_1) 1193 ocr_result_1 = hil_predict(pdf_handler.pdf_info, file_type_1)
1196 rebuild_res_1 = {} 1194 rebuild_res_1 = {}
1197 page_res = {} 1195 page_res = {}
...@@ -1205,19 +1203,6 @@ class Command(BaseCommand, LoggerMixin): ...@@ -1205,19 +1203,6 @@ class Command(BaseCommand, LoggerMixin):
1205 'page_num': page_num, 1203 'page_num': page_num,
1206 'page_info': page_info 1204 'page_info': page_info
1207 } 1205 }
1208 if isinstance(file_type_2, int):
1209 rebuild_res_2 = {}
1210 ocr_result_2 = hil_predict(pdf_handler.pdf_info, file_type_2)
1211 for field_name, field_info in ocr_result_2.items():
1212 page_num = field_info.pop('page', 'page_1')
1213 rebuild_res_2.setdefault(page_num, dict())[field_name] = field_info
1214 for page_num, page_info in ocr_result_2.items():
1215 if isinstance(page_num, str) and page_num.startswith('page_'):
1216 page_res[page_num] = {
1217 'classify': int(classify_2_str),
1218 'page_num': page_num,
1219 'page_info': page_info
1220 }
1221 1206
1222 contract_res = {} 1207 contract_res = {}
1223 for img_path_tmp, page_key in pdf_handler.img_path_pno_list: 1208 for img_path_tmp, page_key in pdf_handler.img_path_pno_list:
......
...@@ -586,14 +586,13 @@ class UploadDocView(GenericView, DocHandler): ...@@ -586,14 +586,13 @@ class UploadDocView(GenericView, DocHandler):
586 # 3. 选择队列进入 586 # 3. 选择队列进入
587 is_priority = PriorityApplication.objects.filter(application_id=application_id, on_off=True).exists() 587 is_priority = PriorityApplication.objects.filter(application_id=application_id, on_off=True).exists()
588 588
589 classify_1 = classify_2 = 0 589 classify_1 = 0
590 if data_source == consts.DATA_SOURCE_LIST[-1] and document_scheme == consts.DOC_SCHEME_LIST[1]: 590 if data_source == consts.DATA_SOURCE_LIST[-1] and document_scheme == consts.DOC_SCHEME_LIST[1]:
591 for keyword, classify_1_tmp, classify_2_tmp in consts.ECONTRACT_KEYWORDS_MAP.get(prefix): 591 for keyword, classify_1_tmp in consts.ECONTRACT_KEYWORDS_MAP.get(prefix):
592 if keyword in document_name: 592 if keyword in document_name:
593 classify_1 = classify_1_tmp 593 classify_1 = classify_1_tmp
594 classify_2 = classify_2_tmp
595 break 594 break
596 task = consts.SPLIT_STR.join([prefix, str(doc.id), str(classify_1), str(classify_2)]) 595 task = consts.SPLIT_STR.join([prefix, str(doc.id), str(classify_1)])
597 enqueue_res = rh.enqueue([task], is_priority) 596 enqueue_res = rh.enqueue([task], is_priority)
598 self.running_log.info('[doc upload success] [args={0}] [business_type={1}] [doc_id={2}] ' 597 self.running_log.info('[doc upload success] [args={0}] [business_type={1}] [doc_id={2}] '
599 '[is_priority={3}] [enqueue_res={4}]'.format(args, prefix, doc.id, 598 '[is_priority={3}] [enqueue_res={4}]'.format(args, prefix, doc.id,
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!