160ac57d by 冯轩

merge

2 parents 88f01673 d619642f
...@@ -10,4 +10,7 @@ urlpatterns = [ ...@@ -10,4 +10,7 @@ urlpatterns = [
10 path(r'invoice/downloadExcel', views.InvoiceExcelView.as_view()), 10 path(r'invoice/downloadExcel', views.InvoiceExcelView.as_view()),
11 path(r'invoice/queryInfo', views.InvoiceQueryInfoView.as_view()), 11 path(r'invoice/queryInfo', views.InvoiceQueryInfoView.as_view()),
12 path(r'contract/v1', views.SEContractView.as_view()), 12 path(r'contract/v1', views.SEContractView.as_view()),
13 path(r'reocr', views.DocReOcrView.as_view()),
14 path(r'batch/reocr', views.BatchReOcrView.as_view()),
15
13 ] 16 ]
......
...@@ -100,7 +100,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -100,7 +100,7 @@ class Command(BaseCommand, LoggerMixin):
100 if len(info_tuple) == 2: 100 if len(info_tuple) == 2:
101 business_type, doc_id_str = info_tuple 101 business_type, doc_id_str = info_tuple
102 else: 102 else:
103 business_type, doc_id_str, classify_1_str = info_tuple 103 business_type, doc_id_str, classify_1_str, re_ocr_flag = info_tuple
104 doc_id = int(doc_id_str) 104 doc_id = int(doc_id_str)
105 doc_class = HILDoc if business_type == consts.HIL_PREFIX else AFCDoc 105 doc_class = HILDoc if business_type == consts.HIL_PREFIX else AFCDoc
106 zip_doc = doc_class.objects.filter(id=doc_id).first() 106 zip_doc = doc_class.objects.filter(id=doc_id).first()
...@@ -124,7 +124,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -124,7 +124,7 @@ class Command(BaseCommand, LoggerMixin):
124 else: 124 else:
125 self.online_log.info('{0} [zip_2_pdfs] [db save end] [task_str={1}]'.format( 125 self.online_log.info('{0} [zip_2_pdfs] [db save end] [task_str={1}]'.format(
126 self.log_base, task_str)) 126 self.log_base, task_str))
127 return zip_doc, business_type 127 return zip_doc, business_type, re_ocr_flag
128 128
129 def get_doc_info(self, task_str, is_priority=False): 129 def get_doc_info(self, task_str, is_priority=False):
130 try: 130 try:
...@@ -135,7 +135,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -135,7 +135,7 @@ class Command(BaseCommand, LoggerMixin):
135 classify_1_str = '0' 135 classify_1_str = '0'
136 rebuild_task_str = task_str 136 rebuild_task_str = task_str
137 else: 137 else:
138 business_type, doc_id_str, classify_1_str = info_tuple 138 business_type, doc_id_str, classify_1_str, re_ocr_flag = info_tuple
139 rebuild_task_str = '{0}{1}{2}'.format(business_type, consts.SPLIT_STR, doc_id_str) 139 rebuild_task_str = '{0}{1}{2}'.format(business_type, consts.SPLIT_STR, doc_id_str)
140 doc_id = int(doc_id_str) 140 doc_id = int(doc_id_str)
141 doc_class = HILDoc if business_type == consts.HIL_PREFIX else AFCDoc 141 doc_class = HILDoc if business_type == consts.HIL_PREFIX else AFCDoc
...@@ -160,7 +160,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -160,7 +160,7 @@ class Command(BaseCommand, LoggerMixin):
160 else: 160 else:
161 self.online_log.info('{0} [get_doc_info] [db save end] [task_str={1}] [is_priority={2}]'.format( 161 self.online_log.info('{0} [get_doc_info] [db save end] [task_str={1}] [is_priority={2}]'.format(
162 self.log_base, task_str, is_priority)) 162 self.log_base, task_str, is_priority))
163 return doc, business_type, rebuild_task_str, classify_1_str 163 return doc, business_type, rebuild_task_str, classify_1_str, re_ocr_flag
164 164
165 # def pdf_download(self, doc, pdf_path): 165 # def pdf_download(self, doc, pdf_path):
166 # if not doc.application_id.startswith(consts.FIXED_APPLICATION_ID_PREFIX): 166 # if not doc.application_id.startswith(consts.FIXED_APPLICATION_ID_PREFIX):
...@@ -1202,7 +1202,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -1202,7 +1202,7 @@ class Command(BaseCommand, LoggerMixin):
1202 self.online_log.info('{0} [zip_2_pdfs] [task={1}]'.format(self.log_base, task_str)) 1202 self.online_log.info('{0} [zip_2_pdfs] [task={1}]'.format(self.log_base, task_str))
1203 1203
1204 # 2. 修改doc状态: 识别中 1204 # 2. 修改doc状态: 识别中
1205 zip_doc, business_type = self.get_zip_doc_info(task_str) 1205 zip_doc, business_type, re_ocr_flag = self.get_zip_doc_info(task_str)
1206 if zip_doc is None: 1206 if zip_doc is None:
1207 time.sleep(self.sleep_time_doc_get) 1207 time.sleep(self.sleep_time_doc_get)
1208 continue 1208 continue
...@@ -1339,7 +1339,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -1339,7 +1339,7 @@ class Command(BaseCommand, LoggerMixin):
1339 1339
1340 try: 1340 try:
1341 # 1. 从队列获取文件信息 1341 # 1. 从队列获取文件信息
1342 doc, business_type, task_str, classify_1_str = self.get_doc_info(task_str, is_priority) 1342 doc, business_type, task_str, classify_1_str, re_ocr_flag = self.get_doc_info(task_str, is_priority)
1343 # 队列为空时的处理 1343 # 队列为空时的处理
1344 if doc is None: 1344 if doc is None:
1345 time.sleep(self.sleep_time_doc_get) 1345 time.sleep(self.sleep_time_doc_get)
...@@ -1389,7 +1389,8 @@ class Command(BaseCommand, LoggerMixin): ...@@ -1389,7 +1389,8 @@ class Command(BaseCommand, LoggerMixin):
1389 self.online_log.info('{0} [pdf to img start] [task={1}] [times={2}]'.format( 1389 self.online_log.info('{0} [pdf to img start] [task={1}] [times={2}]'.format(
1390 self.log_base, task_str, times)) 1390 self.log_base, task_str, times))
1391 start_time = time.time() 1391 start_time = time.time()
1392 pdf_handler.extract_image(max_img_count) 1392 max_img_count_or_none = None if re_ocr_flag == 'Y' else max_img_count
1393 pdf_handler.extract_image(max_img_count_or_none)
1393 end_time = time.time() 1394 end_time = time.time()
1394 speed_time = int(end_time - start_time) 1395 speed_time = int(end_time - start_time)
1395 self.online_log.info('{0} [pdf to img end] [task={1}] [times={2}] [spend_time={3}] [is_new_modify={4}]'.format( 1396 self.online_log.info('{0} [pdf to img end] [task={1}] [times={2}] [spend_time={3}] [is_new_modify={4}]'.format(
...@@ -1407,7 +1408,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -1407,7 +1408,7 @@ class Command(BaseCommand, LoggerMixin):
1407 self.online_log.warn('{0} [pdf to img failed (pdf img empty)] [task={1}]'.format( 1408 self.online_log.warn('{0} [pdf to img failed (pdf img empty)] [task={1}]'.format(
1408 self.log_base, task_str)) 1409 self.log_base, task_str))
1409 raise Exception('pdf img empty') 1410 raise Exception('pdf img empty')
1410 elif pdf_handler.img_count >= max_img_count: 1411 elif re_ocr_flag == 'N' and pdf_handler.img_count >= max_img_count:
1411 self.online_log.info('{0} [too many pdf image] [task={1}] [img_count={2}]'.format( 1412 self.online_log.info('{0} [too many pdf image] [task={1}] [img_count={2}]'.format(
1412 self.log_base, task_str, pdf_handler.img_count)) 1413 self.log_base, task_str, pdf_handler.img_count))
1413 1414
......
...@@ -593,6 +593,16 @@ invoice_download_args = { ...@@ -593,6 +593,16 @@ invoice_download_args = {
593 'application_ids': fields.Str(required=True), 593 'application_ids': fields.Str(required=True),
594 } 594 }
595 595
596 doc_reocr_args = {
597 'doc_id': fields.Int(required=True),
598 'application_entity': fields.Str(required=True),
599 }
600
601 batch_reocr_args = {
602 'application_entity': fields.Str(required=True),
603 }
604
605
596 class UploadDocView(GenericView, DocHandler): 606 class UploadDocView(GenericView, DocHandler):
597 # permission_classes = [] 607 # permission_classes = []
598 # authentication_classes = [] 608 # authentication_classes = []
...@@ -709,7 +719,7 @@ class UploadDocView(GenericView, DocHandler): ...@@ -709,7 +719,7 @@ class UploadDocView(GenericView, DocHandler):
709 or document_name.endswith('.RAR'): 719 or document_name.endswith('.RAR'):
710 is_zip = True 720 is_zip = True
711 721
712 task = consts.SPLIT_STR.join([prefix, str(doc.id), str(classify_1)]) 722 task = consts.SPLIT_STR.join([prefix, str(doc.id), str(classify_1), 'N'])
713 enqueue_res = rh.enqueue([task], is_priority, is_zip) 723 enqueue_res = rh.enqueue([task], is_priority, is_zip)
714 self.running_log.info('[doc upload success] [args={0}] [business_type={1}] [doc_id={2}] ' 724 self.running_log.info('[doc upload success] [args={0}] [business_type={1}] [doc_id={2}] '
715 '[is_priority={3}] [enqueue_res={4}] [is_fsm={5} [classify_1={6}]]'.format(args, prefix, doc.id, 725 '[is_priority={3}] [enqueue_res={4}] [is_fsm={5} [classify_1={6}]]'.format(args, prefix, doc.id,
...@@ -1294,7 +1304,7 @@ class DocView(DocGenericView, DocHandler): ...@@ -1294,7 +1304,7 @@ class DocView(DocGenericView, DocHandler):
1294 is_zip = True 1304 is_zip = True
1295 1305
1296 # tasks = ['{0}{1}{2}'.format(prefix, consts.SPLIT_STR, doc.id)] 1306 # tasks = ['{0}{1}{2}'.format(prefix, consts.SPLIT_STR, doc.id)]
1297 task = consts.SPLIT_STR.join([prefix, str(doc.id), str(classify_1)]) 1307 task = consts.SPLIT_STR.join([prefix, str(doc.id), str(classify_1), 'N'])
1298 enqueue_res = rh.enqueue([task], is_priority, is_zip) 1308 enqueue_res = rh.enqueue([task], is_priority, is_zip)
1299 1309
1300 self.running_log.info('[mock doc upload success] [args={0}] [business_type={1}] [doc_id={2}] ' 1310 self.running_log.info('[mock doc upload success] [args={0}] [business_type={1}] [doc_id={2}] '
...@@ -1983,88 +1993,88 @@ class GoView(GenericView): ...@@ -1983,88 +1993,88 @@ class GoView(GenericView):
1983 else: 1993 else:
1984 return response.error_msg(msg='识别错误') 1994 return response.error_msg(msg='识别错误')
1985 1995
1986 class InvoiceExcelView(GenericView): 1996 # class InvoiceExcelView(GenericView):
1987 #permission_classes = [IsAuthenticated] 1997 # #permission_classes = [IsAuthenticated]
1988 #authentication_classes = [OAuth2AuthenticationWithUser] 1998 # #authentication_classes = [OAuth2AuthenticationWithUser]
1989 1999
1990 # 下载发票excel 2000 # # 下载发票excel
1991 @use_args(invoice_download_args, location='data') 2001 # @use_args(invoice_download_args, location='data')
1992 def post(self, request, args): 2002 # def post(self, request, args):
1993 application_ids = args.get('application_ids') 2003 # application_ids = args.get('application_ids')
1994 application_entity = args.get('application_entity') 2004 # application_entity = args.get('application_entity')
1995 self.running_log.info('[InvoiceExcelView] [user_role={0}] '.format('111222333')) 2005 # self.running_log.info('[InvoiceExcelView] [user_role={0}] '.format('111222333'))
1996 # 角色权限不符,返回异常 2006 # # 角色权限不符,返回异常
1997 token = request.META.get("HTTP_AUTHORIZATION") 2007 # token = request.META.get("HTTP_AUTHORIZATION")
1998 user_role = rh.get_token(token[-11:]) 2008 # user_role = rh.get_token(token[-11:])
1999 self.running_log.info('[InvoiceExcelView] [user_role={0}] '.format(user_role)) 2009 # self.running_log.info('[InvoiceExcelView] [user_role={0}] '.format(user_role))
2000 if user_role is None or user_role == '-1' or (user_role == '1' and application_entity == '2') or (user_role == '2' and application_entity == '1'): 2010 # if user_role is None or user_role == '-1' or (user_role == '1' and application_entity == '2') or (user_role == '2' and application_entity == '1'):
2001 self.running_log.info('[InvoiceExcelView no permission] [user_role={0}] [application_entity={1}]'.format(user_role, application_entity)) 2011 # self.running_log.info('[InvoiceExcelView no permission] [user_role={0}] [application_entity={1}]'.format(user_role, application_entity))
2002 raise NoPermissionException('no permission') 2012 # raise NoPermissionException('no permission')
2003 2013
2004 url = 'http://127.0.0.1:8088/napi/invoice/downloadExcelOri' 2014 # url = 'http://127.0.0.1:8088/napi/invoice/downloadExcelOri'
2005 body = { 2015 # body = {
2006 'applicationIds': application_ids, 2016 # 'applicationIds': application_ids,
2007 'applicationEntity': application_entity 2017 # 'applicationEntity': application_entity
2008 } 2018 # }
2009 try: 2019 # try:
2010 self.running_log.info("request java invoice excel api, url:{0}, body:{1}".format(url, json.dumps(body))) 2020 # self.running_log.info("request java invoice excel api, url:{0}, body:{1}".format(url, json.dumps(body)))
2011 headers = { 2021 # headers = {
2012 'Content-Type': 'application/json' 2022 # 'Content-Type': 'application/json'
2013 } 2023 # }
2014 resp = requests.post(url, headers=headers, json=body) 2024 # resp = requests.post(url, headers=headers, json=body)
2015 self.running_log.info("java invoice excel api finish, applicationIds:{0},{1}".format(application_ids, resp.text)) 2025 # self.running_log.info("java invoice excel api finish, applicationIds:{0},{1}".format(application_ids, resp.text))
2016 res_json = json.loads(resp.text) 2026 # res_json = json.loads(resp.text)
2017 file_path = res_json.get('result') 2027 # file_path = res_json.get('result')
2018 self.running_log.info("java invoice excel after process, filePath:{0}".format(file_path)) 2028 # self.running_log.info("java invoice excel after process, filePath:{0}".format(file_path))
2019 current_time = time.strftime('%Y-%m-%d_%H_%M_%S', time.localtime()) 2029 # current_time = time.strftime('%Y-%m-%d_%H_%M_%S', time.localtime())
2020 download_file_name = "发票信息提取-" + current_time + ".xlsx" 2030 # download_file_name = "发票信息提取-" + current_time + ".xlsx"
2021 f = open(file_path,"rb") 2031 # f = open(file_path,"rb")
2022 response = HttpResponse(content_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet') 2032 # response = HttpResponse(content_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet')
2023 response['Content-Disposition'] = 'attachment; filename={0}'.format(escape_uri_path(download_file_name)) 2033 # response['Content-Disposition'] = 'attachment; filename={0}'.format(escape_uri_path(download_file_name))
2024 response['Access-Control-Expose-Headers'] = 'content-disposition' 2034 # response['Access-Control-Expose-Headers'] = 'content-disposition'
2025 response.write(f.read()) 2035 # response.write(f.read())
2026 f.close() 2036 # f.close()
2027 return response 2037 # return response
2028 except Exception as e: 2038 # except Exception as e:
2029 self.running_log.error("invoice excel request to java error, url:{0}, param:{1}, errorMsg:{2}".format( 2039 # self.running_log.error("invoice excel request to java error, url:{0}, param:{1}, errorMsg:{2}".format(
2030 url, json.dumps(body), traceback.format_exc())) 2040 # url, json.dumps(body), traceback.format_exc()))
2031 2041
2032 class InvoiceQueryInfoView(GenericView): 2042 # class InvoiceQueryInfoView(GenericView):
2033 #permission_classes = [IsAuthenticated] 2043 # #permission_classes = [IsAuthenticated]
2034 #authentication_classes = [OAuth2AuthenticationWithUser] 2044 # #authentication_classes = [OAuth2AuthenticationWithUser]
2035 2045
2036 @use_args(invoice_download_args, location='data') 2046 # @use_args(invoice_download_args, location='data')
2037 def post(self, request, args): 2047 # def post(self, request, args):
2038 application_ids = args.get('application_ids') 2048 # application_ids = args.get('application_ids')
2039 application_entity = args.get('application_entity') 2049 # application_entity = args.get('application_entity')
2040 self.running_log.info('[InvoiceExcelView] [user_role={0}] '.format('111222333')) 2050 # self.running_log.info('[InvoiceExcelView] [user_role={0}] '.format('111222333'))
2041 2051
2042 # 角色权限不符,返回异常 2052 # # 角色权限不符,返回异常
2043 token = request.META.get("HTTP_AUTHORIZATION") 2053 # token = request.META.get("HTTP_AUTHORIZATION")
2044 user_role = rh.get_token(token[-11:]) 2054 # user_role = rh.get_token(token[-11:])
2045 self.running_log.info('[InvoiceQueryInfoView] [user_role={0}] '.format(user_role)) 2055 # self.running_log.info('[InvoiceQueryInfoView] [user_role={0}] '.format(user_role))
2046 if user_role is None or user_role == '-1' or (user_role == '1' and application_entity == '2') or (user_role == '2' and application_entity == '1'): 2056 # if user_role is None or user_role == '-1' or (user_role == '1' and application_entity == '2') or (user_role == '2' and application_entity == '1'):
2047 self.running_log.info('[InvoiceExcelView no permission] [user_role={0}] [application_entity={1}]'.format(user_role, application_entity)) 2057 # self.running_log.info('[InvoiceExcelView no permission] [user_role={0}] [application_entity={1}]'.format(user_role, application_entity))
2048 raise NoPermissionException('no permission') 2058 # raise NoPermissionException('no permission')
2049 2059
2050 url = 'http://127.0.0.1:8088/napi/invoice/queryInfoOri' 2060 # url = 'http://127.0.0.1:8088/napi/invoice/queryInfoOri'
2051 body = { 2061 # body = {
2052 'applicationIds': application_ids, 2062 # 'applicationIds': application_ids,
2053 'applicationEntity': application_entity 2063 # 'applicationEntity': application_entity
2054 } 2064 # }
2055 try: 2065 # try:
2056 self.running_log.info("request java invoice info api, url:{0}, body:{1}".format(url, json.dumps(body))) 2066 # self.running_log.info("request java invoice info api, url:{0}, body:{1}".format(url, json.dumps(body)))
2057 headers = { 2067 # headers = {
2058 'Content-Type': 'application/json' 2068 # 'Content-Type': 'application/json'
2059 } 2069 # }
2060 resp = requests.post(url, headers=headers, json=body) 2070 # resp = requests.post(url, headers=headers, json=body)
2061 self.running_log.info("java invoice info api finish, applicationIds:{0},{1}".format(application_ids, resp.text)) 2071 # self.running_log.info("java invoice info api finish, applicationIds:{0},{1}".format(application_ids, resp.text))
2062 res_json = json.loads(resp.text) 2072 # res_json = json.loads(resp.text)
2063 java_result = res_json.get('result') 2073 # java_result = res_json.get('result')
2064 return response2.ok(data=java_result) 2074 # return response2.ok(data=java_result)
2065 except Exception as e: 2075 # except Exception as e:
2066 self.running_log.error("invoice info request to java error, url:{0}, param:{1}, errorMsg:{2}".format( 2076 # self.running_log.error("invoice info request to java error, url:{0}, param:{1}, errorMsg:{2}".format(
2067 url, json.dumps(body), traceback.format_exc())) 2077 # url, json.dumps(body), traceback.format_exc()))
2068 2078
2069 def notifyCmsPass(self, request): 2079 def notifyCmsPass(self, request):
2070 args = request.data 2080 args = request.data
...@@ -2190,4 +2200,194 @@ class DownloadGBHistoryFileView(GenericView): ...@@ -2190,4 +2200,194 @@ class DownloadGBHistoryFileView(GenericView):
2190 self.running_log.info('[DownloadGBHistoryFileView] [args={0}] '.format(args)) 2200 self.running_log.info('[DownloadGBHistoryFileView] [args={0}] '.format(args))
2191 return response.ok(data=True) 2201 return response.ok(data=True)
2192 except Exception as e: 2202 except Exception as e:
2193 return response.ok(data=False)
...\ No newline at end of file ...\ No newline at end of file
2203 return response.ok(data=False)
2204
2205 class InvoiceExcelView(GenericView):
2206 #permission_classes = [IsAuthenticated]
2207 #authentication_classes = [OAuth2AuthenticationWithUser]
2208
2209 # 下载发票excel
2210 @use_args(invoice_download_args, location='data')
2211 def post(self, request, args):
2212 application_ids = args.get('application_ids')
2213 application_entity = args.get('application_entity')
2214 self.running_log.info('[InvoiceExcelView] [user_role={0}] '.format('111222333'))
2215 # 角色权限不符,返回异常
2216 #token = request.META.get("HTTP_AUTHORIZATION")
2217 #user_role = rh.get_token(token[-11:])
2218 #self.running_log.info('[InvoiceExcelView] [user_role={0}] '.format(user_role))
2219 #if user_role is None or user_role == '-1' or (user_role == '1' and application_entity == '2') or (user_role == '2' and application_entity == '1'):
2220 # self.running_log.info('[InvoiceExcelView no permission] [user_role={0}] [application_entity={1}]'.format(user_role, application_entity))
2221 # raise NoPermissionException('no permission')
2222
2223 url = 'http://127.0.0.1:8088/napi/invoice/downloadExcelOri'
2224 body = {
2225 'applicationIds': application_ids,
2226 'applicationEntity': application_entity
2227 }
2228 try:
2229 self.running_log.info("request java invoice excel api, url:{0}, body:{1}".format(url, json.dumps(body)))
2230 headers = {
2231 'Content-Type': 'application/json'
2232 }
2233 resp = requests.post(url, headers=headers, json=body)
2234 self.running_log.info("java invoice excel api finish, applicationIds:{0},{1}".format(application_ids, resp.text))
2235 res_json = json.loads(resp.text)
2236 file_path = res_json.get('result')
2237 self.running_log.info("java invoice excel after process, filePath:{0}".format(file_path))
2238 current_time = time.strftime('%Y-%m-%d_%H_%M_%S', time.localtime())
2239 download_file_name = "发票信息提取-" + current_time + ".xlsx"
2240 f = open(file_path,"rb")
2241 response = HttpResponse(content_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet')
2242 response['Content-Disposition'] = 'attachment; filename={0}'.format(escape_uri_path(download_file_name))
2243 response['Access-Control-Expose-Headers'] = 'content-disposition'
2244 response.write(f.read())
2245 f.close()
2246 return response
2247 except Exception as e:
2248 self.running_log.error("invoice excel request to java error, url:{0}, param:{1}, errorMsg:{2}".format(
2249 url, json.dumps(body), traceback.format_exc()))
2250
2251 class InvoiceQueryInfoView(GenericView):
2252 #permission_classes = [IsAuthenticated]
2253 #authentication_classes = [OAuth2AuthenticationWithUser]
2254
2255 @use_args(invoice_download_args, location='data')
2256 def post(self, request, args):
2257 application_ids = args.get('application_ids')
2258 application_entity = args.get('application_entity')
2259 self.running_log.info('[InvoiceExcelView] [user_role={0}] '.format('111222333'))
2260
2261 # 角色权限不符,返回异常
2262 #token = request.META.get("HTTP_AUTHORIZATION")
2263 #user_role = rh.get_token(token[-11:])
2264 #self.running_log.info('[InvoiceQueryInfoView] [user_role={0}] '.format(user_role))
2265 #if user_role is None or user_role == '-1' or (user_role == '1' and application_entity == '2') or (user_role == '2' and application_entity == '1'):
2266 # self.running_log.info('[InvoiceExcelView no permission] [user_role={0}] [application_entity={1}]'.format(user_role, application_entity))
2267 # raise NoPermissionException('no permission')
2268
2269 url = 'http://127.0.0.1:8088/napi/invoice/queryInfoOri'
2270 body = {
2271 'applicationIds': application_ids,
2272 'applicationEntity': application_entity
2273 }
2274 try:
2275 self.running_log.info("request java invoice info api, url:{0}, body:{1}".format(url, json.dumps(body)))
2276 headers = {
2277 'Content-Type': 'application/json'
2278 }
2279 resp = requests.post(url, headers=headers, json=body)
2280 self.running_log.info("java invoice info api finish, applicationIds:{0},{1}".format(application_ids, resp.text))
2281 res_json = json.loads(resp.text)
2282 java_result = res_json.get('result')
2283 return response2.ok(data=java_result)
2284 except Exception as e:
2285 self.running_log.error("invoice info request to java error, url:{0}, param:{1}, errorMsg:{2}".format(
2286 url, json.dumps(body), traceback.format_exc()))
2287
2288 class DocReOcrView(GenericView, DocHandler):
2289 permission_classes = [IsAuthenticated]
2290 authentication_classes = [OAuth2AuthenticationWithUser]
2291
2292 # required_scopes = ['write']
2293
2294 # 现有文件重新识别接口
2295 @use_args(doc_reocr_args, location='data')
2296 def post(self, request, args):
2297 start_time = time.time()
2298
2299 application_entity = args.get('application_entity')
2300 doc_id = args.get('doc_id')
2301
2302 doc_class, prefix = self.get_doc_class(application_entity)
2303 doc = doc_class.objects.filter(id=doc_id).first()
2304
2305 # 3. 选择队列进入
2306 is_priority = PriorityApplication.objects.filter(application_id=doc.application_id, on_off=True).exists()
2307 is_zip = False
2308
2309 classify_1 = 0
2310 # 电子合同 Econtract or OVP(FSM)
2311 if doc.data_source == consts.DATA_SOURCE_LIST[2] or doc.data_source == consts.DATA_SOURCE_LIST[3]:
2312 if doc.document_scheme == consts.DOC_SCHEME_LIST[1]:
2313 for keyword, classify_1_tmp in consts.ECONTRACT_KEYWORDS_MAP.get(prefix):
2314 if keyword in doc.document_name:
2315 classify_1 = classify_1_tmp
2316 break
2317 # FSM合同:WEP/MSI/SC/SC2
2318 elif doc.data_source == consts.DATA_SOURCE_LIST[0] and doc.document_scheme == consts.DOC_SCHEME_LIST[0]:
2319 for keyword, classify_1_tmp in consts.FSM_ECONTRACT_KEYWORDS_MAP.get(prefix):
2320 if keyword in doc.document_name:
2321 classify_1 = classify_1_tmp
2322 break
2323
2324
2325 if doc.document_name.endswith('.zip') or doc.document_name.endswith('.rar') or doc.document_name.endswith('.ZIP') \
2326 or doc.document_name.endswith('.RAR'):
2327 is_zip = True
2328
2329 # task = 'AFC_11001_0_Y' 'AFC_11001_0_N' 最后的Y,N表示是否是reocr,N否,Y是
2330 task = consts.SPLIT_STR.join([prefix, str(doc.id), str(classify_1), 'Y'])
2331 enqueue_res = rh.enqueue([task], is_priority, is_zip)
2332 self.running_log.info('[doc upload success] [args={0}] [business_type={1}] [doc_id={2}] '
2333 '[is_priority={3}] [enqueue_res={4}] [classify_1={5}]'.format(args, prefix, doc.id,
2334 is_priority, enqueue_res, classify_1))
2335
2336 return response.ok()
2337
2338 class BatchReOcrView(GenericView, DocHandler):
2339 permission_classes = [IsAuthenticated]
2340 authentication_classes = [OAuth2AuthenticationWithUser]
2341
2342 # 现有文件批量重新识别接口
2343 @use_args(batch_reocr_args, location='data')
2344 def post(self, request, args):
2345 start_time = time.time()
2346
2347 application_entity = args.get('application_entity')
2348
2349 today = timezone.now().date()
2350 start_of_day = timezone.make_aware(timezone.datetime.combine(today, timezone.datetime.min.time()))
2351 end_of_day = timezone.make_aware(timezone.datetime.combine(today, timezone.datetime.max.time()))
2352
2353
2354 doc_class, prefix = self.get_doc_class(application_entity)
2355 docs = doc_class.objects.filter(status=2, create_time__range=(start_of_day, end_of_day))
2356
2357 # 遍历
2358 time_stamp = time.time()
2359 for doc in docs.iterator():
2360 self.running_log.info('[batch doc reocr timestamp={0}] [doc_id={1}]'.format(time_stamp, doc.id))
2361
2362 # 3. 选择队列进入
2363 is_priority = PriorityApplication.objects.filter(application_id=doc.application_id, on_off=True).exists()
2364 is_zip = False
2365
2366 classify_1 = 0
2367 # 电子合同 Econtract or OVP(FSM)
2368 if doc.data_source == consts.DATA_SOURCE_LIST[2] or doc.data_source == consts.DATA_SOURCE_LIST[3]:
2369 if doc.document_scheme == consts.DOC_SCHEME_LIST[1]:
2370 for keyword, classify_1_tmp in consts.ECONTRACT_KEYWORDS_MAP.get(prefix):
2371 if keyword in doc.document_name:
2372 classify_1 = classify_1_tmp
2373 break
2374 # FSM合同:WEP/MSI/SC/SC2
2375 elif doc.data_source == consts.DATA_SOURCE_LIST[0] and doc.document_scheme == consts.DOC_SCHEME_LIST[0]:
2376 for keyword, classify_1_tmp in consts.FSM_ECONTRACT_KEYWORDS_MAP.get(prefix):
2377 if keyword in doc.document_name:
2378 classify_1 = classify_1_tmp
2379 break
2380
2381
2382 if doc.document_name.endswith('.zip') or doc.document_name.endswith('.rar') or doc.document_name.endswith('.ZIP') \
2383 or doc.document_name.endswith('.RAR'):
2384 is_zip = True
2385
2386 # task = 'AFC_11001_0_Y' 'AFC_11001_0_N' 最后的Y,N表示是否是reocr,N否,Y是
2387 task = consts.SPLIT_STR.join([prefix, str(doc.id), str(classify_1), 'Y'])
2388 enqueue_res = rh.enqueue([task], is_priority, is_zip)
2389 self.running_log.info('[doc upload success] [args={0}] [business_type={1}] [doc_id={2}] '
2390 '[is_priority={3}] [enqueue_res={4}] [classify_1={5}]'.format(args, prefix, doc.id,
2391 is_priority, enqueue_res, classify_1))
2392
2393 return response.ok()
...\ No newline at end of file ...\ No newline at end of file
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!