ad4deaf3 by 冯轩

init:weixin e-bs

1 parent 8ddb1d4c
......@@ -1504,6 +1504,103 @@ class Command(BaseCommand, LoggerMixin):
self.log_base, traceback.format_exc()))
# error_list.append(1)
# return
elif classify_1_str == '12': # wenxin bs
self.online_log.info('{0} [pdf_2_img_2_queue weixin] [task={1}] [times={2}] [pdf_path={3}]'.format(self.log_base, task_str, times, pdf_path))
try:
# pdf下载 处理 图片存储 识别
for times in range(consts.RETRY_TIMES):
try:
if doc.application_id.startswith(consts.FIXED_APPLICATION_ID_PREFIX):
self.online_log.info('{0} [mo ni xia dan] [task={1}] [times={2}] '
'[pdf_path={3}]'.format(self.log_base, task_str,
times, pdf_path))
pdf_handler.e_contract_process()
else:
self.edms.download(pdf_path, doc.metadata_version_id, doc.document_scheme, business_type)
self.online_log.info('{0} [edms download success] [task={1}] [times={2}] '
'[pdf_path={3}]'.format(self.e_log_base, task_str, times, pdf_path))
self.online_log.info('{0} [pdf to img start] [task={1}] [times={2}]'.format(
self.e_log_base, task_str, times))
pdf_handler.e_contract_process()
self.online_log.info(
'{0} [pdf to img end] [task={1}] [times={2}]'.format(self.e_log_base, task_str, times))
except Exception as e:
self.online_log.warn('{0} [download or pdf to img failed] [task={1}] [times={2}] '
'[error={3}]'.format(self.e_log_base, task_str, times,
traceback.format_exc()))
else:
break
else:
raise Exception('download or pdf to img failed')
try:
doc.page_count = pdf_handler.page_count
doc.save()
except Exception as e:
self.online_log.error('{0} [process error (db save)] [error={1}]'.format(
self.log_base, traceback.format_exc()))
# 获取算法结果
# ocr_result = wxbs_predict(pdf_handler.pdf_info)
# page_res = {}
# for page_num, page_info in ocr_result.get('page_info', {}).items():
# if isinstance(page_num, str) and page_num.startswith('page_'):
# page_res[page_num] = {
# 'classify': int(classify_1_str),
# "is_asp": ocr_result.get('is_asp', False),
# 'page_num': page_num,
# 'page_info': page_info
# }
# contract_res = {}
# for img_path_tmp, page_key in pdf_handler.img_path_pno_list:
# if classify_1_str == str(consts.HMH_CLASSIFY):
# img_contract_res = {
# 'code': 1,
# 'data': [
# {
# 'classify': consts.HMH_CLASSIFY,
# 'data': hmh_predict(pdf_handler.pdf_info)
# }
# ]
# }
# else:
# if page_key in page_res:
# img_contract_res = {
# 'code': 1,
# 'data': [
# {
# 'classify': page_res[page_key].pop('classify', consts.OTHER_CLASSIFY),
# 'data': page_res[page_key]
# }
# ]
# }
# else:
# img_contract_res = {
# 'code': 1,
# 'data': [
# {
# 'classify': int(classify_1_str),
# }
# ]
# }
# contract_res[img_path_tmp] = img_contract_res
with lock:
# res_dict[task_str] = contract_res
res_dict[task_str] = '{"/data/AFC/OCR_Files/60671/img/page_0_img_0.png":{"code":1,"msg":"success","data":[{"classify":12,"confidence":0.9999922513961792,"data":[{"summary":["冯轩","110109199202260310",null,null,"2025-06-30","2025-06-25","2025-06-30"],"cells":[{"start_row":0,"start_column":0,"words":"交易单号"},{"start_row":0,"start_column":1,"words":"交易时间"},{"start_row":0,"start_column":2,"words":"交易类型"},{"start_row":0,"start_column":3,"words":"收/支/其他"},{"start_row":0,"start_column":4,"words":"交易方式"},{"start_row":0,"start_column":5,"words":"金额(元)"},{"start_row":0,"start_column":6,"words":"交易对方"},{"start_row":0,"start_column":7,"words":"商户单号"},{"start_row":1,"start_column":0,"words":"4200002687202506298013\n175397"},{"start_row":1,"start_column":1,"words":"2025-06-29\n13:47:15"},{"start_row":1,"start_column":2,"words":"商户消费"},{"start_row":1,"start_column":3,"words":"支出"},{"start_row":1,"start_column":4,"words":"浦发银行储\n蓄卡(9862)"},{"start_row":1,"start_column":5,"words":"333.00"},{"start_row":1,"start_column":6,"words":"美团平台商\n户"},{"start_row":1,"start_column":7,"words":"20250629134707U\n8927844819024394"},{"start_row":2,"start_column":0,"words":"1000039901017506286326\n541033022"},{"start_row":2,"start_column":1,"words":"2025-06-28\n21:07:37"},{"start_row":2,"start_column":2,"words":"微信红包"},{"start_row":2,"start_column":3,"words":"收入"},{"start_row":2,"start_column":4,"words":"/"},{"start_row":2,"start_column":5,"words":"1.20"},{"start_row":2,"start_column":6,"words":"默"},{"start_row":2,"start_column":7,"words":"1000039901202506\n286326541033022"},{"start_row":3,"start_column":0,"words":"1000039901004506276276\n744394049"},{"start_row":3,"start_column":1,"words":"2025-06-27\n23:58:18"},{"start_row":3,"start_column":2,"words":"微信红包"},{"start_row":3,"start_column":3,"words":"收入"},{"start_row":3,"start_column":4,"words":"/"},{"start_row":3,"start_column":5,"words":"1.36"},{"start_row":3,"start_column":6,"words":"九日"},{"start_row":3,"start_column":7,"words":"1000039901202506\n276276744394049"},{"start_row":4,"start_column":0,"words":"1000039901008506277372\n907681001"},{"start_row":4,"start_column":1,"words":"2025-06-27\n23:57:27"},{"start_row":4,"start_column":2,"words":"微信红包"},{"start_row":4,"start_column":3,"words":"收入"},{"start_row":4,"start_column":4,"words":"/"},{"start_row":4,"start_column":5,"words":"1.68"},{"start_row":4,"start_column":6,"words":"希望"},{"start_row":4,"start_column":7,"words":"1000039901202506\n277372907681001"},{"start_row":5,"start_column":0,"words":"4200002672202506275479\n881478"},{"start_row":5,"start_column":1,"words":"2025-06-27\n19:50:21"},{"start_row":5,"start_column":2,"words":"商户消费"},{"start_row":5,"start_column":3,"words":"支出"},{"start_row":5,"start_column":4,"words":"浦发银行储\n蓄卡(9862)"},{"start_row":5,"start_column":5,"words":"124.00"},{"start_row":5,"start_column":6,"words":"呷哺呷哺(北\n京首联世纪\n广场)"},{"start_row":5,"start_column":7,"words":";8011433A2506271\n95001M000105"}],"verify":{"verify_res":"real","verify_info":[]}}]}]}}'
finish_queue.put(task_str)
except Exception as e:
try:
doc.status = DocStatus.PROCESS_FAILED.value
doc.page_count = pdf_handler.page_count
doc.save()
self.online_log.warn('{0} [process failed (e-contract)] [task={1}] '
'[error={2}]'.format(self.e_log_base, task_str, traceback.format_exc()))
except Exception as e:
self.online_log.error('{0} [process error (db save)] [error={1}]'.format(
self.e_log_base, traceback.format_exc()))
else: # e-contract or or e-fsm-contract or e-hmh
try:
# pdf下载 处理 图片存储 识别
......@@ -1737,7 +1834,7 @@ class Command(BaseCommand, LoggerMixin):
time.sleep(self.sleep_time_task_get)
continue
else:
self.online_log.info('{0} [res_2_wb] [get task] [task={1}]'.format(self.log_base, task_str))
self.online_log.info('{0} [res_2_wb] [get task] [task={1}] [res_dict={2}]'.format(self.log_base, task_str, res_dict))
ocr_1_res = res_dict.pop(task_str, {})
business_type, doc_id_str = task_str.split(consts.SPLIT_STR)
......
......@@ -683,6 +683,9 @@ class UploadDocView(GenericView, DocHandler):
if keyword in document_name:
classify_1 = classify_1_tmp
break
elif '微信支付交易明细证明' in document_name or '微信流水' in document_name:
classify_1 = 12
self.running_log.info('[weixin bs process] [doc_id={0}]'.format(doc.id))
if document_name.endswith('.zip') or document_name.endswith('.rar') or document_name.endswith('.ZIP') \
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!