160ac57d by 冯轩

merge

2 parents 88f01673 d619642f
...@@ -10,4 +10,7 @@ urlpatterns = [ ...@@ -10,4 +10,7 @@ urlpatterns = [
10 path(r'invoice/downloadExcel', views.InvoiceExcelView.as_view()), 10 path(r'invoice/downloadExcel', views.InvoiceExcelView.as_view()),
11 path(r'invoice/queryInfo', views.InvoiceQueryInfoView.as_view()), 11 path(r'invoice/queryInfo', views.InvoiceQueryInfoView.as_view()),
12 path(r'contract/v1', views.SEContractView.as_view()), 12 path(r'contract/v1', views.SEContractView.as_view()),
13 path(r'reocr', views.DocReOcrView.as_view()),
14 path(r'batch/reocr', views.BatchReOcrView.as_view()),
15
13 ] 16 ]
......
...@@ -100,7 +100,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -100,7 +100,7 @@ class Command(BaseCommand, LoggerMixin):
100 if len(info_tuple) == 2: 100 if len(info_tuple) == 2:
101 business_type, doc_id_str = info_tuple 101 business_type, doc_id_str = info_tuple
102 else: 102 else:
103 business_type, doc_id_str, classify_1_str = info_tuple 103 business_type, doc_id_str, classify_1_str, re_ocr_flag = info_tuple
104 doc_id = int(doc_id_str) 104 doc_id = int(doc_id_str)
105 doc_class = HILDoc if business_type == consts.HIL_PREFIX else AFCDoc 105 doc_class = HILDoc if business_type == consts.HIL_PREFIX else AFCDoc
106 zip_doc = doc_class.objects.filter(id=doc_id).first() 106 zip_doc = doc_class.objects.filter(id=doc_id).first()
...@@ -124,7 +124,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -124,7 +124,7 @@ class Command(BaseCommand, LoggerMixin):
124 else: 124 else:
125 self.online_log.info('{0} [zip_2_pdfs] [db save end] [task_str={1}]'.format( 125 self.online_log.info('{0} [zip_2_pdfs] [db save end] [task_str={1}]'.format(
126 self.log_base, task_str)) 126 self.log_base, task_str))
127 return zip_doc, business_type 127 return zip_doc, business_type, re_ocr_flag
128 128
129 def get_doc_info(self, task_str, is_priority=False): 129 def get_doc_info(self, task_str, is_priority=False):
130 try: 130 try:
...@@ -135,7 +135,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -135,7 +135,7 @@ class Command(BaseCommand, LoggerMixin):
135 classify_1_str = '0' 135 classify_1_str = '0'
136 rebuild_task_str = task_str 136 rebuild_task_str = task_str
137 else: 137 else:
138 business_type, doc_id_str, classify_1_str = info_tuple 138 business_type, doc_id_str, classify_1_str, re_ocr_flag = info_tuple
139 rebuild_task_str = '{0}{1}{2}'.format(business_type, consts.SPLIT_STR, doc_id_str) 139 rebuild_task_str = '{0}{1}{2}'.format(business_type, consts.SPLIT_STR, doc_id_str)
140 doc_id = int(doc_id_str) 140 doc_id = int(doc_id_str)
141 doc_class = HILDoc if business_type == consts.HIL_PREFIX else AFCDoc 141 doc_class = HILDoc if business_type == consts.HIL_PREFIX else AFCDoc
...@@ -160,7 +160,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -160,7 +160,7 @@ class Command(BaseCommand, LoggerMixin):
160 else: 160 else:
161 self.online_log.info('{0} [get_doc_info] [db save end] [task_str={1}] [is_priority={2}]'.format( 161 self.online_log.info('{0} [get_doc_info] [db save end] [task_str={1}] [is_priority={2}]'.format(
162 self.log_base, task_str, is_priority)) 162 self.log_base, task_str, is_priority))
163 return doc, business_type, rebuild_task_str, classify_1_str 163 return doc, business_type, rebuild_task_str, classify_1_str, re_ocr_flag
164 164
165 # def pdf_download(self, doc, pdf_path): 165 # def pdf_download(self, doc, pdf_path):
166 # if not doc.application_id.startswith(consts.FIXED_APPLICATION_ID_PREFIX): 166 # if not doc.application_id.startswith(consts.FIXED_APPLICATION_ID_PREFIX):
...@@ -1202,7 +1202,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -1202,7 +1202,7 @@ class Command(BaseCommand, LoggerMixin):
1202 self.online_log.info('{0} [zip_2_pdfs] [task={1}]'.format(self.log_base, task_str)) 1202 self.online_log.info('{0} [zip_2_pdfs] [task={1}]'.format(self.log_base, task_str))
1203 1203
1204 # 2. 修改doc状态: 识别中 1204 # 2. 修改doc状态: 识别中
1205 zip_doc, business_type = self.get_zip_doc_info(task_str) 1205 zip_doc, business_type, re_ocr_flag = self.get_zip_doc_info(task_str)
1206 if zip_doc is None: 1206 if zip_doc is None:
1207 time.sleep(self.sleep_time_doc_get) 1207 time.sleep(self.sleep_time_doc_get)
1208 continue 1208 continue
...@@ -1339,7 +1339,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -1339,7 +1339,7 @@ class Command(BaseCommand, LoggerMixin):
1339 1339
1340 try: 1340 try:
1341 # 1. 从队列获取文件信息 1341 # 1. 从队列获取文件信息
1342 doc, business_type, task_str, classify_1_str = self.get_doc_info(task_str, is_priority) 1342 doc, business_type, task_str, classify_1_str, re_ocr_flag = self.get_doc_info(task_str, is_priority)
1343 # 队列为空时的处理 1343 # 队列为空时的处理
1344 if doc is None: 1344 if doc is None:
1345 time.sleep(self.sleep_time_doc_get) 1345 time.sleep(self.sleep_time_doc_get)
...@@ -1389,7 +1389,8 @@ class Command(BaseCommand, LoggerMixin): ...@@ -1389,7 +1389,8 @@ class Command(BaseCommand, LoggerMixin):
1389 self.online_log.info('{0} [pdf to img start] [task={1}] [times={2}]'.format( 1389 self.online_log.info('{0} [pdf to img start] [task={1}] [times={2}]'.format(
1390 self.log_base, task_str, times)) 1390 self.log_base, task_str, times))
1391 start_time = time.time() 1391 start_time = time.time()
1392 pdf_handler.extract_image(max_img_count) 1392 max_img_count_or_none = None if re_ocr_flag == 'Y' else max_img_count
1393 pdf_handler.extract_image(max_img_count_or_none)
1393 end_time = time.time() 1394 end_time = time.time()
1394 speed_time = int(end_time - start_time) 1395 speed_time = int(end_time - start_time)
1395 self.online_log.info('{0} [pdf to img end] [task={1}] [times={2}] [spend_time={3}] [is_new_modify={4}]'.format( 1396 self.online_log.info('{0} [pdf to img end] [task={1}] [times={2}] [spend_time={3}] [is_new_modify={4}]'.format(
...@@ -1407,7 +1408,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -1407,7 +1408,7 @@ class Command(BaseCommand, LoggerMixin):
1407 self.online_log.warn('{0} [pdf to img failed (pdf img empty)] [task={1}]'.format( 1408 self.online_log.warn('{0} [pdf to img failed (pdf img empty)] [task={1}]'.format(
1408 self.log_base, task_str)) 1409 self.log_base, task_str))
1409 raise Exception('pdf img empty') 1410 raise Exception('pdf img empty')
1410 elif pdf_handler.img_count >= max_img_count: 1411 elif re_ocr_flag == 'N' and pdf_handler.img_count >= max_img_count:
1411 self.online_log.info('{0} [too many pdf image] [task={1}] [img_count={2}]'.format( 1412 self.online_log.info('{0} [too many pdf image] [task={1}] [img_count={2}]'.format(
1412 self.log_base, task_str, pdf_handler.img_count)) 1413 self.log_base, task_str, pdf_handler.img_count))
1413 1414
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!