merge
Showing
3 changed files
with
12 additions
and
8 deletions
... | @@ -10,4 +10,7 @@ urlpatterns = [ | ... | @@ -10,4 +10,7 @@ urlpatterns = [ |
10 | path(r'invoice/downloadExcel', views.InvoiceExcelView.as_view()), | 10 | path(r'invoice/downloadExcel', views.InvoiceExcelView.as_view()), |
11 | path(r'invoice/queryInfo', views.InvoiceQueryInfoView.as_view()), | 11 | path(r'invoice/queryInfo', views.InvoiceQueryInfoView.as_view()), |
12 | path(r'contract/v1', views.SEContractView.as_view()), | 12 | path(r'contract/v1', views.SEContractView.as_view()), |
13 | path(r'reocr', views.DocReOcrView.as_view()), | ||
14 | path(r'batch/reocr', views.BatchReOcrView.as_view()), | ||
15 | |||
13 | ] | 16 | ] | ... | ... |
... | @@ -100,7 +100,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -100,7 +100,7 @@ class Command(BaseCommand, LoggerMixin): |
100 | if len(info_tuple) == 2: | 100 | if len(info_tuple) == 2: |
101 | business_type, doc_id_str = info_tuple | 101 | business_type, doc_id_str = info_tuple |
102 | else: | 102 | else: |
103 | business_type, doc_id_str, classify_1_str = info_tuple | 103 | business_type, doc_id_str, classify_1_str, re_ocr_flag = info_tuple |
104 | doc_id = int(doc_id_str) | 104 | doc_id = int(doc_id_str) |
105 | doc_class = HILDoc if business_type == consts.HIL_PREFIX else AFCDoc | 105 | doc_class = HILDoc if business_type == consts.HIL_PREFIX else AFCDoc |
106 | zip_doc = doc_class.objects.filter(id=doc_id).first() | 106 | zip_doc = doc_class.objects.filter(id=doc_id).first() |
... | @@ -124,7 +124,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -124,7 +124,7 @@ class Command(BaseCommand, LoggerMixin): |
124 | else: | 124 | else: |
125 | self.online_log.info('{0} [zip_2_pdfs] [db save end] [task_str={1}]'.format( | 125 | self.online_log.info('{0} [zip_2_pdfs] [db save end] [task_str={1}]'.format( |
126 | self.log_base, task_str)) | 126 | self.log_base, task_str)) |
127 | return zip_doc, business_type | 127 | return zip_doc, business_type, re_ocr_flag |
128 | 128 | ||
129 | def get_doc_info(self, task_str, is_priority=False): | 129 | def get_doc_info(self, task_str, is_priority=False): |
130 | try: | 130 | try: |
... | @@ -135,7 +135,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -135,7 +135,7 @@ class Command(BaseCommand, LoggerMixin): |
135 | classify_1_str = '0' | 135 | classify_1_str = '0' |
136 | rebuild_task_str = task_str | 136 | rebuild_task_str = task_str |
137 | else: | 137 | else: |
138 | business_type, doc_id_str, classify_1_str = info_tuple | 138 | business_type, doc_id_str, classify_1_str, re_ocr_flag = info_tuple |
139 | rebuild_task_str = '{0}{1}{2}'.format(business_type, consts.SPLIT_STR, doc_id_str) | 139 | rebuild_task_str = '{0}{1}{2}'.format(business_type, consts.SPLIT_STR, doc_id_str) |
140 | doc_id = int(doc_id_str) | 140 | doc_id = int(doc_id_str) |
141 | doc_class = HILDoc if business_type == consts.HIL_PREFIX else AFCDoc | 141 | doc_class = HILDoc if business_type == consts.HIL_PREFIX else AFCDoc |
... | @@ -160,7 +160,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -160,7 +160,7 @@ class Command(BaseCommand, LoggerMixin): |
160 | else: | 160 | else: |
161 | self.online_log.info('{0} [get_doc_info] [db save end] [task_str={1}] [is_priority={2}]'.format( | 161 | self.online_log.info('{0} [get_doc_info] [db save end] [task_str={1}] [is_priority={2}]'.format( |
162 | self.log_base, task_str, is_priority)) | 162 | self.log_base, task_str, is_priority)) |
163 | return doc, business_type, rebuild_task_str, classify_1_str | 163 | return doc, business_type, rebuild_task_str, classify_1_str, re_ocr_flag |
164 | 164 | ||
165 | # def pdf_download(self, doc, pdf_path): | 165 | # def pdf_download(self, doc, pdf_path): |
166 | # if not doc.application_id.startswith(consts.FIXED_APPLICATION_ID_PREFIX): | 166 | # if not doc.application_id.startswith(consts.FIXED_APPLICATION_ID_PREFIX): |
... | @@ -1202,7 +1202,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1202,7 +1202,7 @@ class Command(BaseCommand, LoggerMixin): |
1202 | self.online_log.info('{0} [zip_2_pdfs] [task={1}]'.format(self.log_base, task_str)) | 1202 | self.online_log.info('{0} [zip_2_pdfs] [task={1}]'.format(self.log_base, task_str)) |
1203 | 1203 | ||
1204 | # 2. 修改doc状态: 识别中 | 1204 | # 2. 修改doc状态: 识别中 |
1205 | zip_doc, business_type = self.get_zip_doc_info(task_str) | 1205 | zip_doc, business_type, re_ocr_flag = self.get_zip_doc_info(task_str) |
1206 | if zip_doc is None: | 1206 | if zip_doc is None: |
1207 | time.sleep(self.sleep_time_doc_get) | 1207 | time.sleep(self.sleep_time_doc_get) |
1208 | continue | 1208 | continue |
... | @@ -1339,7 +1339,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1339,7 +1339,7 @@ class Command(BaseCommand, LoggerMixin): |
1339 | 1339 | ||
1340 | try: | 1340 | try: |
1341 | # 1. 从队列获取文件信息 | 1341 | # 1. 从队列获取文件信息 |
1342 | doc, business_type, task_str, classify_1_str = self.get_doc_info(task_str, is_priority) | 1342 | doc, business_type, task_str, classify_1_str, re_ocr_flag = self.get_doc_info(task_str, is_priority) |
1343 | # 队列为空时的处理 | 1343 | # 队列为空时的处理 |
1344 | if doc is None: | 1344 | if doc is None: |
1345 | time.sleep(self.sleep_time_doc_get) | 1345 | time.sleep(self.sleep_time_doc_get) |
... | @@ -1389,7 +1389,8 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1389,7 +1389,8 @@ class Command(BaseCommand, LoggerMixin): |
1389 | self.online_log.info('{0} [pdf to img start] [task={1}] [times={2}]'.format( | 1389 | self.online_log.info('{0} [pdf to img start] [task={1}] [times={2}]'.format( |
1390 | self.log_base, task_str, times)) | 1390 | self.log_base, task_str, times)) |
1391 | start_time = time.time() | 1391 | start_time = time.time() |
1392 | pdf_handler.extract_image(max_img_count) | 1392 | max_img_count_or_none = None if re_ocr_flag == 'Y' else max_img_count |
1393 | pdf_handler.extract_image(max_img_count_or_none) | ||
1393 | end_time = time.time() | 1394 | end_time = time.time() |
1394 | speed_time = int(end_time - start_time) | 1395 | speed_time = int(end_time - start_time) |
1395 | self.online_log.info('{0} [pdf to img end] [task={1}] [times={2}] [spend_time={3}] [is_new_modify={4}]'.format( | 1396 | self.online_log.info('{0} [pdf to img end] [task={1}] [times={2}] [spend_time={3}] [is_new_modify={4}]'.format( |
... | @@ -1407,7 +1408,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1407,7 +1408,7 @@ class Command(BaseCommand, LoggerMixin): |
1407 | self.online_log.warn('{0} [pdf to img failed (pdf img empty)] [task={1}]'.format( | 1408 | self.online_log.warn('{0} [pdf to img failed (pdf img empty)] [task={1}]'.format( |
1408 | self.log_base, task_str)) | 1409 | self.log_base, task_str)) |
1409 | raise Exception('pdf img empty') | 1410 | raise Exception('pdf img empty') |
1410 | elif pdf_handler.img_count >= max_img_count: | 1411 | elif re_ocr_flag == 'N' and pdf_handler.img_count >= max_img_count: |
1411 | self.online_log.info('{0} [too many pdf image] [task={1}] [img_count={2}]'.format( | 1412 | self.online_log.info('{0} [too many pdf image] [task={1}] [img_count={2}]'.format( |
1412 | self.log_base, task_str, pdf_handler.img_count)) | 1413 | self.log_base, task_str, pdf_handler.img_count)) |
1413 | 1414 | ... | ... |
This diff is collapsed.
Click to expand it.
-
Please register or sign in to post a comment