merge
Showing
3 changed files
with
296 additions
and
92 deletions
| ... | @@ -10,4 +10,7 @@ urlpatterns = [ | ... | @@ -10,4 +10,7 @@ urlpatterns = [ |
| 10 | path(r'invoice/downloadExcel', views.InvoiceExcelView.as_view()), | 10 | path(r'invoice/downloadExcel', views.InvoiceExcelView.as_view()), |
| 11 | path(r'invoice/queryInfo', views.InvoiceQueryInfoView.as_view()), | 11 | path(r'invoice/queryInfo', views.InvoiceQueryInfoView.as_view()), |
| 12 | path(r'contract/v1', views.SEContractView.as_view()), | 12 | path(r'contract/v1', views.SEContractView.as_view()), |
| 13 | path(r'reocr', views.DocReOcrView.as_view()), | ||
| 14 | path(r'batch/reocr', views.BatchReOcrView.as_view()), | ||
| 15 | |||
| 13 | ] | 16 | ] | ... | ... |
| ... | @@ -100,7 +100,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -100,7 +100,7 @@ class Command(BaseCommand, LoggerMixin): |
| 100 | if len(info_tuple) == 2: | 100 | if len(info_tuple) == 2: |
| 101 | business_type, doc_id_str = info_tuple | 101 | business_type, doc_id_str = info_tuple |
| 102 | else: | 102 | else: |
| 103 | business_type, doc_id_str, classify_1_str = info_tuple | 103 | business_type, doc_id_str, classify_1_str, re_ocr_flag = info_tuple |
| 104 | doc_id = int(doc_id_str) | 104 | doc_id = int(doc_id_str) |
| 105 | doc_class = HILDoc if business_type == consts.HIL_PREFIX else AFCDoc | 105 | doc_class = HILDoc if business_type == consts.HIL_PREFIX else AFCDoc |
| 106 | zip_doc = doc_class.objects.filter(id=doc_id).first() | 106 | zip_doc = doc_class.objects.filter(id=doc_id).first() |
| ... | @@ -124,7 +124,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -124,7 +124,7 @@ class Command(BaseCommand, LoggerMixin): |
| 124 | else: | 124 | else: |
| 125 | self.online_log.info('{0} [zip_2_pdfs] [db save end] [task_str={1}]'.format( | 125 | self.online_log.info('{0} [zip_2_pdfs] [db save end] [task_str={1}]'.format( |
| 126 | self.log_base, task_str)) | 126 | self.log_base, task_str)) |
| 127 | return zip_doc, business_type | 127 | return zip_doc, business_type, re_ocr_flag |
| 128 | 128 | ||
| 129 | def get_doc_info(self, task_str, is_priority=False): | 129 | def get_doc_info(self, task_str, is_priority=False): |
| 130 | try: | 130 | try: |
| ... | @@ -135,7 +135,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -135,7 +135,7 @@ class Command(BaseCommand, LoggerMixin): |
| 135 | classify_1_str = '0' | 135 | classify_1_str = '0' |
| 136 | rebuild_task_str = task_str | 136 | rebuild_task_str = task_str |
| 137 | else: | 137 | else: |
| 138 | business_type, doc_id_str, classify_1_str = info_tuple | 138 | business_type, doc_id_str, classify_1_str, re_ocr_flag = info_tuple |
| 139 | rebuild_task_str = '{0}{1}{2}'.format(business_type, consts.SPLIT_STR, doc_id_str) | 139 | rebuild_task_str = '{0}{1}{2}'.format(business_type, consts.SPLIT_STR, doc_id_str) |
| 140 | doc_id = int(doc_id_str) | 140 | doc_id = int(doc_id_str) |
| 141 | doc_class = HILDoc if business_type == consts.HIL_PREFIX else AFCDoc | 141 | doc_class = HILDoc if business_type == consts.HIL_PREFIX else AFCDoc |
| ... | @@ -160,7 +160,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -160,7 +160,7 @@ class Command(BaseCommand, LoggerMixin): |
| 160 | else: | 160 | else: |
| 161 | self.online_log.info('{0} [get_doc_info] [db save end] [task_str={1}] [is_priority={2}]'.format( | 161 | self.online_log.info('{0} [get_doc_info] [db save end] [task_str={1}] [is_priority={2}]'.format( |
| 162 | self.log_base, task_str, is_priority)) | 162 | self.log_base, task_str, is_priority)) |
| 163 | return doc, business_type, rebuild_task_str, classify_1_str | 163 | return doc, business_type, rebuild_task_str, classify_1_str, re_ocr_flag |
| 164 | 164 | ||
| 165 | # def pdf_download(self, doc, pdf_path): | 165 | # def pdf_download(self, doc, pdf_path): |
| 166 | # if not doc.application_id.startswith(consts.FIXED_APPLICATION_ID_PREFIX): | 166 | # if not doc.application_id.startswith(consts.FIXED_APPLICATION_ID_PREFIX): |
| ... | @@ -1202,7 +1202,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1202,7 +1202,7 @@ class Command(BaseCommand, LoggerMixin): |
| 1202 | self.online_log.info('{0} [zip_2_pdfs] [task={1}]'.format(self.log_base, task_str)) | 1202 | self.online_log.info('{0} [zip_2_pdfs] [task={1}]'.format(self.log_base, task_str)) |
| 1203 | 1203 | ||
| 1204 | # 2. 修改doc状态: 识别中 | 1204 | # 2. 修改doc状态: 识别中 |
| 1205 | zip_doc, business_type = self.get_zip_doc_info(task_str) | 1205 | zip_doc, business_type, re_ocr_flag = self.get_zip_doc_info(task_str) |
| 1206 | if zip_doc is None: | 1206 | if zip_doc is None: |
| 1207 | time.sleep(self.sleep_time_doc_get) | 1207 | time.sleep(self.sleep_time_doc_get) |
| 1208 | continue | 1208 | continue |
| ... | @@ -1339,7 +1339,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1339,7 +1339,7 @@ class Command(BaseCommand, LoggerMixin): |
| 1339 | 1339 | ||
| 1340 | try: | 1340 | try: |
| 1341 | # 1. 从队列获取文件信息 | 1341 | # 1. 从队列获取文件信息 |
| 1342 | doc, business_type, task_str, classify_1_str = self.get_doc_info(task_str, is_priority) | 1342 | doc, business_type, task_str, classify_1_str, re_ocr_flag = self.get_doc_info(task_str, is_priority) |
| 1343 | # 队列为空时的处理 | 1343 | # 队列为空时的处理 |
| 1344 | if doc is None: | 1344 | if doc is None: |
| 1345 | time.sleep(self.sleep_time_doc_get) | 1345 | time.sleep(self.sleep_time_doc_get) |
| ... | @@ -1389,7 +1389,8 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1389,7 +1389,8 @@ class Command(BaseCommand, LoggerMixin): |
| 1389 | self.online_log.info('{0} [pdf to img start] [task={1}] [times={2}]'.format( | 1389 | self.online_log.info('{0} [pdf to img start] [task={1}] [times={2}]'.format( |
| 1390 | self.log_base, task_str, times)) | 1390 | self.log_base, task_str, times)) |
| 1391 | start_time = time.time() | 1391 | start_time = time.time() |
| 1392 | pdf_handler.extract_image(max_img_count) | 1392 | max_img_count_or_none = None if re_ocr_flag == 'Y' else max_img_count |
| 1393 | pdf_handler.extract_image(max_img_count_or_none) | ||
| 1393 | end_time = time.time() | 1394 | end_time = time.time() |
| 1394 | speed_time = int(end_time - start_time) | 1395 | speed_time = int(end_time - start_time) |
| 1395 | self.online_log.info('{0} [pdf to img end] [task={1}] [times={2}] [spend_time={3}] [is_new_modify={4}]'.format( | 1396 | self.online_log.info('{0} [pdf to img end] [task={1}] [times={2}] [spend_time={3}] [is_new_modify={4}]'.format( |
| ... | @@ -1407,7 +1408,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1407,7 +1408,7 @@ class Command(BaseCommand, LoggerMixin): |
| 1407 | self.online_log.warn('{0} [pdf to img failed (pdf img empty)] [task={1}]'.format( | 1408 | self.online_log.warn('{0} [pdf to img failed (pdf img empty)] [task={1}]'.format( |
| 1408 | self.log_base, task_str)) | 1409 | self.log_base, task_str)) |
| 1409 | raise Exception('pdf img empty') | 1410 | raise Exception('pdf img empty') |
| 1410 | elif pdf_handler.img_count >= max_img_count: | 1411 | elif re_ocr_flag == 'N' and pdf_handler.img_count >= max_img_count: |
| 1411 | self.online_log.info('{0} [too many pdf image] [task={1}] [img_count={2}]'.format( | 1412 | self.online_log.info('{0} [too many pdf image] [task={1}] [img_count={2}]'.format( |
| 1412 | self.log_base, task_str, pdf_handler.img_count)) | 1413 | self.log_base, task_str, pdf_handler.img_count)) |
| 1413 | 1414 | ... | ... |
| ... | @@ -593,6 +593,16 @@ invoice_download_args = { | ... | @@ -593,6 +593,16 @@ invoice_download_args = { |
| 593 | 'application_ids': fields.Str(required=True), | 593 | 'application_ids': fields.Str(required=True), |
| 594 | } | 594 | } |
| 595 | 595 | ||
| 596 | doc_reocr_args = { | ||
| 597 | 'doc_id': fields.Int(required=True), | ||
| 598 | 'application_entity': fields.Str(required=True), | ||
| 599 | } | ||
| 600 | |||
| 601 | batch_reocr_args = { | ||
| 602 | 'application_entity': fields.Str(required=True), | ||
| 603 | } | ||
| 604 | |||
| 605 | |||
| 596 | class UploadDocView(GenericView, DocHandler): | 606 | class UploadDocView(GenericView, DocHandler): |
| 597 | # permission_classes = [] | 607 | # permission_classes = [] |
| 598 | # authentication_classes = [] | 608 | # authentication_classes = [] |
| ... | @@ -709,7 +719,7 @@ class UploadDocView(GenericView, DocHandler): | ... | @@ -709,7 +719,7 @@ class UploadDocView(GenericView, DocHandler): |
| 709 | or document_name.endswith('.RAR'): | 719 | or document_name.endswith('.RAR'): |
| 710 | is_zip = True | 720 | is_zip = True |
| 711 | 721 | ||
| 712 | task = consts.SPLIT_STR.join([prefix, str(doc.id), str(classify_1)]) | 722 | task = consts.SPLIT_STR.join([prefix, str(doc.id), str(classify_1), 'N']) |
| 713 | enqueue_res = rh.enqueue([task], is_priority, is_zip) | 723 | enqueue_res = rh.enqueue([task], is_priority, is_zip) |
| 714 | self.running_log.info('[doc upload success] [args={0}] [business_type={1}] [doc_id={2}] ' | 724 | self.running_log.info('[doc upload success] [args={0}] [business_type={1}] [doc_id={2}] ' |
| 715 | '[is_priority={3}] [enqueue_res={4}] [is_fsm={5} [classify_1={6}]]'.format(args, prefix, doc.id, | 725 | '[is_priority={3}] [enqueue_res={4}] [is_fsm={5} [classify_1={6}]]'.format(args, prefix, doc.id, |
| ... | @@ -1294,7 +1304,7 @@ class DocView(DocGenericView, DocHandler): | ... | @@ -1294,7 +1304,7 @@ class DocView(DocGenericView, DocHandler): |
| 1294 | is_zip = True | 1304 | is_zip = True |
| 1295 | 1305 | ||
| 1296 | # tasks = ['{0}{1}{2}'.format(prefix, consts.SPLIT_STR, doc.id)] | 1306 | # tasks = ['{0}{1}{2}'.format(prefix, consts.SPLIT_STR, doc.id)] |
| 1297 | task = consts.SPLIT_STR.join([prefix, str(doc.id), str(classify_1)]) | 1307 | task = consts.SPLIT_STR.join([prefix, str(doc.id), str(classify_1), 'N']) |
| 1298 | enqueue_res = rh.enqueue([task], is_priority, is_zip) | 1308 | enqueue_res = rh.enqueue([task], is_priority, is_zip) |
| 1299 | 1309 | ||
| 1300 | self.running_log.info('[mock doc upload success] [args={0}] [business_type={1}] [doc_id={2}] ' | 1310 | self.running_log.info('[mock doc upload success] [args={0}] [business_type={1}] [doc_id={2}] ' |
| ... | @@ -1983,88 +1993,88 @@ class GoView(GenericView): | ... | @@ -1983,88 +1993,88 @@ class GoView(GenericView): |
| 1983 | else: | 1993 | else: |
| 1984 | return response.error_msg(msg='识别错误') | 1994 | return response.error_msg(msg='识别错误') |
| 1985 | 1995 | ||
| 1986 | class InvoiceExcelView(GenericView): | 1996 | # class InvoiceExcelView(GenericView): |
| 1987 | #permission_classes = [IsAuthenticated] | 1997 | # #permission_classes = [IsAuthenticated] |
| 1988 | #authentication_classes = [OAuth2AuthenticationWithUser] | 1998 | # #authentication_classes = [OAuth2AuthenticationWithUser] |
| 1989 | 1999 | ||
| 1990 | # 下载发票excel | 2000 | # # 下载发票excel |
| 1991 | @use_args(invoice_download_args, location='data') | 2001 | # @use_args(invoice_download_args, location='data') |
| 1992 | def post(self, request, args): | 2002 | # def post(self, request, args): |
| 1993 | application_ids = args.get('application_ids') | 2003 | # application_ids = args.get('application_ids') |
| 1994 | application_entity = args.get('application_entity') | 2004 | # application_entity = args.get('application_entity') |
| 1995 | self.running_log.info('[InvoiceExcelView] [user_role={0}] '.format('111222333')) | 2005 | # self.running_log.info('[InvoiceExcelView] [user_role={0}] '.format('111222333')) |
| 1996 | # 角色权限不符,返回异常 | 2006 | # # 角色权限不符,返回异常 |
| 1997 | token = request.META.get("HTTP_AUTHORIZATION") | 2007 | # token = request.META.get("HTTP_AUTHORIZATION") |
| 1998 | user_role = rh.get_token(token[-11:]) | 2008 | # user_role = rh.get_token(token[-11:]) |
| 1999 | self.running_log.info('[InvoiceExcelView] [user_role={0}] '.format(user_role)) | 2009 | # self.running_log.info('[InvoiceExcelView] [user_role={0}] '.format(user_role)) |
| 2000 | if user_role is None or user_role == '-1' or (user_role == '1' and application_entity == '2') or (user_role == '2' and application_entity == '1'): | 2010 | # if user_role is None or user_role == '-1' or (user_role == '1' and application_entity == '2') or (user_role == '2' and application_entity == '1'): |
| 2001 | self.running_log.info('[InvoiceExcelView no permission] [user_role={0}] [application_entity={1}]'.format(user_role, application_entity)) | 2011 | # self.running_log.info('[InvoiceExcelView no permission] [user_role={0}] [application_entity={1}]'.format(user_role, application_entity)) |
| 2002 | raise NoPermissionException('no permission') | 2012 | # raise NoPermissionException('no permission') |
| 2003 | 2013 | ||
| 2004 | url = 'http://127.0.0.1:8088/napi/invoice/downloadExcelOri' | 2014 | # url = 'http://127.0.0.1:8088/napi/invoice/downloadExcelOri' |
| 2005 | body = { | 2015 | # body = { |
| 2006 | 'applicationIds': application_ids, | 2016 | # 'applicationIds': application_ids, |
| 2007 | 'applicationEntity': application_entity | 2017 | # 'applicationEntity': application_entity |
| 2008 | } | 2018 | # } |
| 2009 | try: | 2019 | # try: |
| 2010 | self.running_log.info("request java invoice excel api, url:{0}, body:{1}".format(url, json.dumps(body))) | 2020 | # self.running_log.info("request java invoice excel api, url:{0}, body:{1}".format(url, json.dumps(body))) |
| 2011 | headers = { | 2021 | # headers = { |
| 2012 | 'Content-Type': 'application/json' | 2022 | # 'Content-Type': 'application/json' |
| 2013 | } | 2023 | # } |
| 2014 | resp = requests.post(url, headers=headers, json=body) | 2024 | # resp = requests.post(url, headers=headers, json=body) |
| 2015 | self.running_log.info("java invoice excel api finish, applicationIds:{0},{1}".format(application_ids, resp.text)) | 2025 | # self.running_log.info("java invoice excel api finish, applicationIds:{0},{1}".format(application_ids, resp.text)) |
| 2016 | res_json = json.loads(resp.text) | 2026 | # res_json = json.loads(resp.text) |
| 2017 | file_path = res_json.get('result') | 2027 | # file_path = res_json.get('result') |
| 2018 | self.running_log.info("java invoice excel after process, filePath:{0}".format(file_path)) | 2028 | # self.running_log.info("java invoice excel after process, filePath:{0}".format(file_path)) |
| 2019 | current_time = time.strftime('%Y-%m-%d_%H_%M_%S', time.localtime()) | 2029 | # current_time = time.strftime('%Y-%m-%d_%H_%M_%S', time.localtime()) |
| 2020 | download_file_name = "发票信息提取-" + current_time + ".xlsx" | 2030 | # download_file_name = "发票信息提取-" + current_time + ".xlsx" |
| 2021 | f = open(file_path,"rb") | 2031 | # f = open(file_path,"rb") |
| 2022 | response = HttpResponse(content_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet') | 2032 | # response = HttpResponse(content_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet') |
| 2023 | response['Content-Disposition'] = 'attachment; filename={0}'.format(escape_uri_path(download_file_name)) | 2033 | # response['Content-Disposition'] = 'attachment; filename={0}'.format(escape_uri_path(download_file_name)) |
| 2024 | response['Access-Control-Expose-Headers'] = 'content-disposition' | 2034 | # response['Access-Control-Expose-Headers'] = 'content-disposition' |
| 2025 | response.write(f.read()) | 2035 | # response.write(f.read()) |
| 2026 | f.close() | 2036 | # f.close() |
| 2027 | return response | 2037 | # return response |
| 2028 | except Exception as e: | 2038 | # except Exception as e: |
| 2029 | self.running_log.error("invoice excel request to java error, url:{0}, param:{1}, errorMsg:{2}".format( | 2039 | # self.running_log.error("invoice excel request to java error, url:{0}, param:{1}, errorMsg:{2}".format( |
| 2030 | url, json.dumps(body), traceback.format_exc())) | 2040 | # url, json.dumps(body), traceback.format_exc())) |
| 2031 | 2041 | ||
| 2032 | class InvoiceQueryInfoView(GenericView): | 2042 | # class InvoiceQueryInfoView(GenericView): |
| 2033 | #permission_classes = [IsAuthenticated] | 2043 | # #permission_classes = [IsAuthenticated] |
| 2034 | #authentication_classes = [OAuth2AuthenticationWithUser] | 2044 | # #authentication_classes = [OAuth2AuthenticationWithUser] |
| 2035 | 2045 | ||
| 2036 | @use_args(invoice_download_args, location='data') | 2046 | # @use_args(invoice_download_args, location='data') |
| 2037 | def post(self, request, args): | 2047 | # def post(self, request, args): |
| 2038 | application_ids = args.get('application_ids') | 2048 | # application_ids = args.get('application_ids') |
| 2039 | application_entity = args.get('application_entity') | 2049 | # application_entity = args.get('application_entity') |
| 2040 | self.running_log.info('[InvoiceExcelView] [user_role={0}] '.format('111222333')) | 2050 | # self.running_log.info('[InvoiceExcelView] [user_role={0}] '.format('111222333')) |
| 2041 | 2051 | ||
| 2042 | # 角色权限不符,返回异常 | 2052 | # # 角色权限不符,返回异常 |
| 2043 | token = request.META.get("HTTP_AUTHORIZATION") | 2053 | # token = request.META.get("HTTP_AUTHORIZATION") |
| 2044 | user_role = rh.get_token(token[-11:]) | 2054 | # user_role = rh.get_token(token[-11:]) |
| 2045 | self.running_log.info('[InvoiceQueryInfoView] [user_role={0}] '.format(user_role)) | 2055 | # self.running_log.info('[InvoiceQueryInfoView] [user_role={0}] '.format(user_role)) |
| 2046 | if user_role is None or user_role == '-1' or (user_role == '1' and application_entity == '2') or (user_role == '2' and application_entity == '1'): | 2056 | # if user_role is None or user_role == '-1' or (user_role == '1' and application_entity == '2') or (user_role == '2' and application_entity == '1'): |
| 2047 | self.running_log.info('[InvoiceExcelView no permission] [user_role={0}] [application_entity={1}]'.format(user_role, application_entity)) | 2057 | # self.running_log.info('[InvoiceExcelView no permission] [user_role={0}] [application_entity={1}]'.format(user_role, application_entity)) |
| 2048 | raise NoPermissionException('no permission') | 2058 | # raise NoPermissionException('no permission') |
| 2049 | 2059 | ||
| 2050 | url = 'http://127.0.0.1:8088/napi/invoice/queryInfoOri' | 2060 | # url = 'http://127.0.0.1:8088/napi/invoice/queryInfoOri' |
| 2051 | body = { | 2061 | # body = { |
| 2052 | 'applicationIds': application_ids, | 2062 | # 'applicationIds': application_ids, |
| 2053 | 'applicationEntity': application_entity | 2063 | # 'applicationEntity': application_entity |
| 2054 | } | 2064 | # } |
| 2055 | try: | 2065 | # try: |
| 2056 | self.running_log.info("request java invoice info api, url:{0}, body:{1}".format(url, json.dumps(body))) | 2066 | # self.running_log.info("request java invoice info api, url:{0}, body:{1}".format(url, json.dumps(body))) |
| 2057 | headers = { | 2067 | # headers = { |
| 2058 | 'Content-Type': 'application/json' | 2068 | # 'Content-Type': 'application/json' |
| 2059 | } | 2069 | # } |
| 2060 | resp = requests.post(url, headers=headers, json=body) | 2070 | # resp = requests.post(url, headers=headers, json=body) |
| 2061 | self.running_log.info("java invoice info api finish, applicationIds:{0},{1}".format(application_ids, resp.text)) | 2071 | # self.running_log.info("java invoice info api finish, applicationIds:{0},{1}".format(application_ids, resp.text)) |
| 2062 | res_json = json.loads(resp.text) | 2072 | # res_json = json.loads(resp.text) |
| 2063 | java_result = res_json.get('result') | 2073 | # java_result = res_json.get('result') |
| 2064 | return response2.ok(data=java_result) | 2074 | # return response2.ok(data=java_result) |
| 2065 | except Exception as e: | 2075 | # except Exception as e: |
| 2066 | self.running_log.error("invoice info request to java error, url:{0}, param:{1}, errorMsg:{2}".format( | 2076 | # self.running_log.error("invoice info request to java error, url:{0}, param:{1}, errorMsg:{2}".format( |
| 2067 | url, json.dumps(body), traceback.format_exc())) | 2077 | # url, json.dumps(body), traceback.format_exc())) |
| 2068 | 2078 | ||
| 2069 | def notifyCmsPass(self, request): | 2079 | def notifyCmsPass(self, request): |
| 2070 | args = request.data | 2080 | args = request.data |
| ... | @@ -2191,3 +2201,193 @@ class DownloadGBHistoryFileView(GenericView): | ... | @@ -2191,3 +2201,193 @@ class DownloadGBHistoryFileView(GenericView): |
| 2191 | return response.ok(data=True) | 2201 | return response.ok(data=True) |
| 2192 | except Exception as e: | 2202 | except Exception as e: |
| 2193 | return response.ok(data=False) | 2203 | return response.ok(data=False) |
| 2204 | |||
| 2205 | class InvoiceExcelView(GenericView): | ||
| 2206 | #permission_classes = [IsAuthenticated] | ||
| 2207 | #authentication_classes = [OAuth2AuthenticationWithUser] | ||
| 2208 | |||
| 2209 | # 下载发票excel | ||
| 2210 | @use_args(invoice_download_args, location='data') | ||
| 2211 | def post(self, request, args): | ||
| 2212 | application_ids = args.get('application_ids') | ||
| 2213 | application_entity = args.get('application_entity') | ||
| 2214 | self.running_log.info('[InvoiceExcelView] [user_role={0}] '.format('111222333')) | ||
| 2215 | # 角色权限不符,返回异常 | ||
| 2216 | #token = request.META.get("HTTP_AUTHORIZATION") | ||
| 2217 | #user_role = rh.get_token(token[-11:]) | ||
| 2218 | #self.running_log.info('[InvoiceExcelView] [user_role={0}] '.format(user_role)) | ||
| 2219 | #if user_role is None or user_role == '-1' or (user_role == '1' and application_entity == '2') or (user_role == '2' and application_entity == '1'): | ||
| 2220 | # self.running_log.info('[InvoiceExcelView no permission] [user_role={0}] [application_entity={1}]'.format(user_role, application_entity)) | ||
| 2221 | # raise NoPermissionException('no permission') | ||
| 2222 | |||
| 2223 | url = 'http://127.0.0.1:8088/napi/invoice/downloadExcelOri' | ||
| 2224 | body = { | ||
| 2225 | 'applicationIds': application_ids, | ||
| 2226 | 'applicationEntity': application_entity | ||
| 2227 | } | ||
| 2228 | try: | ||
| 2229 | self.running_log.info("request java invoice excel api, url:{0}, body:{1}".format(url, json.dumps(body))) | ||
| 2230 | headers = { | ||
| 2231 | 'Content-Type': 'application/json' | ||
| 2232 | } | ||
| 2233 | resp = requests.post(url, headers=headers, json=body) | ||
| 2234 | self.running_log.info("java invoice excel api finish, applicationIds:{0},{1}".format(application_ids, resp.text)) | ||
| 2235 | res_json = json.loads(resp.text) | ||
| 2236 | file_path = res_json.get('result') | ||
| 2237 | self.running_log.info("java invoice excel after process, filePath:{0}".format(file_path)) | ||
| 2238 | current_time = time.strftime('%Y-%m-%d_%H_%M_%S', time.localtime()) | ||
| 2239 | download_file_name = "发票信息提取-" + current_time + ".xlsx" | ||
| 2240 | f = open(file_path,"rb") | ||
| 2241 | response = HttpResponse(content_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet') | ||
| 2242 | response['Content-Disposition'] = 'attachment; filename={0}'.format(escape_uri_path(download_file_name)) | ||
| 2243 | response['Access-Control-Expose-Headers'] = 'content-disposition' | ||
| 2244 | response.write(f.read()) | ||
| 2245 | f.close() | ||
| 2246 | return response | ||
| 2247 | except Exception as e: | ||
| 2248 | self.running_log.error("invoice excel request to java error, url:{0}, param:{1}, errorMsg:{2}".format( | ||
| 2249 | url, json.dumps(body), traceback.format_exc())) | ||
| 2250 | |||
| 2251 | class InvoiceQueryInfoView(GenericView): | ||
| 2252 | #permission_classes = [IsAuthenticated] | ||
| 2253 | #authentication_classes = [OAuth2AuthenticationWithUser] | ||
| 2254 | |||
| 2255 | @use_args(invoice_download_args, location='data') | ||
| 2256 | def post(self, request, args): | ||
| 2257 | application_ids = args.get('application_ids') | ||
| 2258 | application_entity = args.get('application_entity') | ||
| 2259 | self.running_log.info('[InvoiceExcelView] [user_role={0}] '.format('111222333')) | ||
| 2260 | |||
| 2261 | # 角色权限不符,返回异常 | ||
| 2262 | #token = request.META.get("HTTP_AUTHORIZATION") | ||
| 2263 | #user_role = rh.get_token(token[-11:]) | ||
| 2264 | #self.running_log.info('[InvoiceQueryInfoView] [user_role={0}] '.format(user_role)) | ||
| 2265 | #if user_role is None or user_role == '-1' or (user_role == '1' and application_entity == '2') or (user_role == '2' and application_entity == '1'): | ||
| 2266 | # self.running_log.info('[InvoiceExcelView no permission] [user_role={0}] [application_entity={1}]'.format(user_role, application_entity)) | ||
| 2267 | # raise NoPermissionException('no permission') | ||
| 2268 | |||
| 2269 | url = 'http://127.0.0.1:8088/napi/invoice/queryInfoOri' | ||
| 2270 | body = { | ||
| 2271 | 'applicationIds': application_ids, | ||
| 2272 | 'applicationEntity': application_entity | ||
| 2273 | } | ||
| 2274 | try: | ||
| 2275 | self.running_log.info("request java invoice info api, url:{0}, body:{1}".format(url, json.dumps(body))) | ||
| 2276 | headers = { | ||
| 2277 | 'Content-Type': 'application/json' | ||
| 2278 | } | ||
| 2279 | resp = requests.post(url, headers=headers, json=body) | ||
| 2280 | self.running_log.info("java invoice info api finish, applicationIds:{0},{1}".format(application_ids, resp.text)) | ||
| 2281 | res_json = json.loads(resp.text) | ||
| 2282 | java_result = res_json.get('result') | ||
| 2283 | return response2.ok(data=java_result) | ||
| 2284 | except Exception as e: | ||
| 2285 | self.running_log.error("invoice info request to java error, url:{0}, param:{1}, errorMsg:{2}".format( | ||
| 2286 | url, json.dumps(body), traceback.format_exc())) | ||
| 2287 | |||
| 2288 | class DocReOcrView(GenericView, DocHandler): | ||
| 2289 | permission_classes = [IsAuthenticated] | ||
| 2290 | authentication_classes = [OAuth2AuthenticationWithUser] | ||
| 2291 | |||
| 2292 | # required_scopes = ['write'] | ||
| 2293 | |||
| 2294 | # 现有文件重新识别接口 | ||
| 2295 | @use_args(doc_reocr_args, location='data') | ||
| 2296 | def post(self, request, args): | ||
| 2297 | start_time = time.time() | ||
| 2298 | |||
| 2299 | application_entity = args.get('application_entity') | ||
| 2300 | doc_id = args.get('doc_id') | ||
| 2301 | |||
| 2302 | doc_class, prefix = self.get_doc_class(application_entity) | ||
| 2303 | doc = doc_class.objects.filter(id=doc_id).first() | ||
| 2304 | |||
| 2305 | # 3. 选择队列进入 | ||
| 2306 | is_priority = PriorityApplication.objects.filter(application_id=doc.application_id, on_off=True).exists() | ||
| 2307 | is_zip = False | ||
| 2308 | |||
| 2309 | classify_1 = 0 | ||
| 2310 | # 电子合同 Econtract or OVP(FSM) | ||
| 2311 | if doc.data_source == consts.DATA_SOURCE_LIST[2] or doc.data_source == consts.DATA_SOURCE_LIST[3]: | ||
| 2312 | if doc.document_scheme == consts.DOC_SCHEME_LIST[1]: | ||
| 2313 | for keyword, classify_1_tmp in consts.ECONTRACT_KEYWORDS_MAP.get(prefix): | ||
| 2314 | if keyword in doc.document_name: | ||
| 2315 | classify_1 = classify_1_tmp | ||
| 2316 | break | ||
| 2317 | # FSM合同:WEP/MSI/SC/SC2 | ||
| 2318 | elif doc.data_source == consts.DATA_SOURCE_LIST[0] and doc.document_scheme == consts.DOC_SCHEME_LIST[0]: | ||
| 2319 | for keyword, classify_1_tmp in consts.FSM_ECONTRACT_KEYWORDS_MAP.get(prefix): | ||
| 2320 | if keyword in doc.document_name: | ||
| 2321 | classify_1 = classify_1_tmp | ||
| 2322 | break | ||
| 2323 | |||
| 2324 | |||
| 2325 | if doc.document_name.endswith('.zip') or doc.document_name.endswith('.rar') or doc.document_name.endswith('.ZIP') \ | ||
| 2326 | or doc.document_name.endswith('.RAR'): | ||
| 2327 | is_zip = True | ||
| 2328 | |||
| 2329 | # task = 'AFC_11001_0_Y' 'AFC_11001_0_N' 最后的Y,N表示是否是reocr,N否,Y是 | ||
| 2330 | task = consts.SPLIT_STR.join([prefix, str(doc.id), str(classify_1), 'Y']) | ||
| 2331 | enqueue_res = rh.enqueue([task], is_priority, is_zip) | ||
| 2332 | self.running_log.info('[doc upload success] [args={0}] [business_type={1}] [doc_id={2}] ' | ||
| 2333 | '[is_priority={3}] [enqueue_res={4}] [classify_1={5}]'.format(args, prefix, doc.id, | ||
| 2334 | is_priority, enqueue_res, classify_1)) | ||
| 2335 | |||
| 2336 | return response.ok() | ||
| 2337 | |||
| 2338 | class BatchReOcrView(GenericView, DocHandler): | ||
| 2339 | permission_classes = [IsAuthenticated] | ||
| 2340 | authentication_classes = [OAuth2AuthenticationWithUser] | ||
| 2341 | |||
| 2342 | # 现有文件批量重新识别接口 | ||
| 2343 | @use_args(batch_reocr_args, location='data') | ||
| 2344 | def post(self, request, args): | ||
| 2345 | start_time = time.time() | ||
| 2346 | |||
| 2347 | application_entity = args.get('application_entity') | ||
| 2348 | |||
| 2349 | today = timezone.now().date() | ||
| 2350 | start_of_day = timezone.make_aware(timezone.datetime.combine(today, timezone.datetime.min.time())) | ||
| 2351 | end_of_day = timezone.make_aware(timezone.datetime.combine(today, timezone.datetime.max.time())) | ||
| 2352 | |||
| 2353 | |||
| 2354 | doc_class, prefix = self.get_doc_class(application_entity) | ||
| 2355 | docs = doc_class.objects.filter(status=2, create_time__range=(start_of_day, end_of_day)) | ||
| 2356 | |||
| 2357 | # 遍历 | ||
| 2358 | time_stamp = time.time() | ||
| 2359 | for doc in docs.iterator(): | ||
| 2360 | self.running_log.info('[batch doc reocr timestamp={0}] [doc_id={1}]'.format(time_stamp, doc.id)) | ||
| 2361 | |||
| 2362 | # 3. 选择队列进入 | ||
| 2363 | is_priority = PriorityApplication.objects.filter(application_id=doc.application_id, on_off=True).exists() | ||
| 2364 | is_zip = False | ||
| 2365 | |||
| 2366 | classify_1 = 0 | ||
| 2367 | # 电子合同 Econtract or OVP(FSM) | ||
| 2368 | if doc.data_source == consts.DATA_SOURCE_LIST[2] or doc.data_source == consts.DATA_SOURCE_LIST[3]: | ||
| 2369 | if doc.document_scheme == consts.DOC_SCHEME_LIST[1]: | ||
| 2370 | for keyword, classify_1_tmp in consts.ECONTRACT_KEYWORDS_MAP.get(prefix): | ||
| 2371 | if keyword in doc.document_name: | ||
| 2372 | classify_1 = classify_1_tmp | ||
| 2373 | break | ||
| 2374 | # FSM合同:WEP/MSI/SC/SC2 | ||
| 2375 | elif doc.data_source == consts.DATA_SOURCE_LIST[0] and doc.document_scheme == consts.DOC_SCHEME_LIST[0]: | ||
| 2376 | for keyword, classify_1_tmp in consts.FSM_ECONTRACT_KEYWORDS_MAP.get(prefix): | ||
| 2377 | if keyword in doc.document_name: | ||
| 2378 | classify_1 = classify_1_tmp | ||
| 2379 | break | ||
| 2380 | |||
| 2381 | |||
| 2382 | if doc.document_name.endswith('.zip') or doc.document_name.endswith('.rar') or doc.document_name.endswith('.ZIP') \ | ||
| 2383 | or doc.document_name.endswith('.RAR'): | ||
| 2384 | is_zip = True | ||
| 2385 | |||
| 2386 | # task = 'AFC_11001_0_Y' 'AFC_11001_0_N' 最后的Y,N表示是否是reocr,N否,Y是 | ||
| 2387 | task = consts.SPLIT_STR.join([prefix, str(doc.id), str(classify_1), 'Y']) | ||
| 2388 | enqueue_res = rh.enqueue([task], is_priority, is_zip) | ||
| 2389 | self.running_log.info('[doc upload success] [args={0}] [business_type={1}] [doc_id={2}] ' | ||
| 2390 | '[is_priority={3}] [enqueue_res={4}] [classify_1={5}]'.format(args, prefix, doc.id, | ||
| 2391 | is_priority, enqueue_res, classify_1)) | ||
| 2392 | |||
| 2393 | return response.ok() | ||
| ... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
-
Please register or sign in to post a comment