ad4deaf3 by 冯轩

init:weixin e-bs

1 parent 8ddb1d4c
...@@ -1504,6 +1504,103 @@ class Command(BaseCommand, LoggerMixin): ...@@ -1504,6 +1504,103 @@ class Command(BaseCommand, LoggerMixin):
1504 self.log_base, traceback.format_exc())) 1504 self.log_base, traceback.format_exc()))
1505 # error_list.append(1) 1505 # error_list.append(1)
1506 # return 1506 # return
1507 elif classify_1_str == '12': # wenxin bs
1508 self.online_log.info('{0} [pdf_2_img_2_queue weixin] [task={1}] [times={2}] [pdf_path={3}]'.format(self.log_base, task_str, times, pdf_path))
1509 try:
1510 # pdf下载 处理 图片存储 识别
1511 for times in range(consts.RETRY_TIMES):
1512 try:
1513 if doc.application_id.startswith(consts.FIXED_APPLICATION_ID_PREFIX):
1514 self.online_log.info('{0} [mo ni xia dan] [task={1}] [times={2}] '
1515 '[pdf_path={3}]'.format(self.log_base, task_str,
1516 times, pdf_path))
1517 pdf_handler.e_contract_process()
1518 else:
1519 self.edms.download(pdf_path, doc.metadata_version_id, doc.document_scheme, business_type)
1520 self.online_log.info('{0} [edms download success] [task={1}] [times={2}] '
1521 '[pdf_path={3}]'.format(self.e_log_base, task_str, times, pdf_path))
1522
1523 self.online_log.info('{0} [pdf to img start] [task={1}] [times={2}]'.format(
1524 self.e_log_base, task_str, times))
1525 pdf_handler.e_contract_process()
1526 self.online_log.info(
1527 '{0} [pdf to img end] [task={1}] [times={2}]'.format(self.e_log_base, task_str, times))
1528 except Exception as e:
1529 self.online_log.warn('{0} [download or pdf to img failed] [task={1}] [times={2}] '
1530 '[error={3}]'.format(self.e_log_base, task_str, times,
1531 traceback.format_exc()))
1532 else:
1533 break
1534 else:
1535 raise Exception('download or pdf to img failed')
1536
1537 try:
1538 doc.page_count = pdf_handler.page_count
1539 doc.save()
1540 except Exception as e:
1541 self.online_log.error('{0} [process error (db save)] [error={1}]'.format(
1542 self.log_base, traceback.format_exc()))
1543
1544 # 获取算法结果
1545 # ocr_result = wxbs_predict(pdf_handler.pdf_info)
1546 # page_res = {}
1547 # for page_num, page_info in ocr_result.get('page_info', {}).items():
1548 # if isinstance(page_num, str) and page_num.startswith('page_'):
1549 # page_res[page_num] = {
1550 # 'classify': int(classify_1_str),
1551 # "is_asp": ocr_result.get('is_asp', False),
1552 # 'page_num': page_num,
1553 # 'page_info': page_info
1554 # }
1555
1556 # contract_res = {}
1557 # for img_path_tmp, page_key in pdf_handler.img_path_pno_list:
1558 # if classify_1_str == str(consts.HMH_CLASSIFY):
1559 # img_contract_res = {
1560 # 'code': 1,
1561 # 'data': [
1562 # {
1563 # 'classify': consts.HMH_CLASSIFY,
1564 # 'data': hmh_predict(pdf_handler.pdf_info)
1565 # }
1566 # ]
1567 # }
1568 # else:
1569 # if page_key in page_res:
1570 # img_contract_res = {
1571 # 'code': 1,
1572 # 'data': [
1573 # {
1574 # 'classify': page_res[page_key].pop('classify', consts.OTHER_CLASSIFY),
1575 # 'data': page_res[page_key]
1576 # }
1577 # ]
1578 # }
1579 # else:
1580 # img_contract_res = {
1581 # 'code': 1,
1582 # 'data': [
1583 # {
1584 # 'classify': int(classify_1_str),
1585 # }
1586 # ]
1587 # }
1588 # contract_res[img_path_tmp] = img_contract_res
1589
1590 with lock:
1591 # res_dict[task_str] = contract_res
1592 res_dict[task_str] = '{"/data/AFC/OCR_Files/60671/img/page_0_img_0.png":{"code":1,"msg":"success","data":[{"classify":12,"confidence":0.9999922513961792,"data":[{"summary":["冯轩","110109199202260310",null,null,"2025-06-30","2025-06-25","2025-06-30"],"cells":[{"start_row":0,"start_column":0,"words":"交易单号"},{"start_row":0,"start_column":1,"words":"交易时间"},{"start_row":0,"start_column":2,"words":"交易类型"},{"start_row":0,"start_column":3,"words":"收/支/其他"},{"start_row":0,"start_column":4,"words":"交易方式"},{"start_row":0,"start_column":5,"words":"金额(元)"},{"start_row":0,"start_column":6,"words":"交易对方"},{"start_row":0,"start_column":7,"words":"商户单号"},{"start_row":1,"start_column":0,"words":"4200002687202506298013\n175397"},{"start_row":1,"start_column":1,"words":"2025-06-29\n13:47:15"},{"start_row":1,"start_column":2,"words":"商户消费"},{"start_row":1,"start_column":3,"words":"支出"},{"start_row":1,"start_column":4,"words":"浦发银行储\n蓄卡(9862)"},{"start_row":1,"start_column":5,"words":"333.00"},{"start_row":1,"start_column":6,"words":"美团平台商\n户"},{"start_row":1,"start_column":7,"words":"20250629134707U\n8927844819024394"},{"start_row":2,"start_column":0,"words":"1000039901017506286326\n541033022"},{"start_row":2,"start_column":1,"words":"2025-06-28\n21:07:37"},{"start_row":2,"start_column":2,"words":"微信红包"},{"start_row":2,"start_column":3,"words":"收入"},{"start_row":2,"start_column":4,"words":"/"},{"start_row":2,"start_column":5,"words":"1.20"},{"start_row":2,"start_column":6,"words":"默"},{"start_row":2,"start_column":7,"words":"1000039901202506\n286326541033022"},{"start_row":3,"start_column":0,"words":"1000039901004506276276\n744394049"},{"start_row":3,"start_column":1,"words":"2025-06-27\n23:58:18"},{"start_row":3,"start_column":2,"words":"微信红包"},{"start_row":3,"start_column":3,"words":"收入"},{"start_row":3,"start_column":4,"words":"/"},{"start_row":3,"start_column":5,"words":"1.36"},{"start_row":3,"start_column":6,"words":"九日"},{"start_row":3,"start_column":7,"words":"1000039901202506\n276276744394049"},{"start_row":4,"start_column":0,"words":"1000039901008506277372\n907681001"},{"start_row":4,"start_column":1,"words":"2025-06-27\n23:57:27"},{"start_row":4,"start_column":2,"words":"微信红包"},{"start_row":4,"start_column":3,"words":"收入"},{"start_row":4,"start_column":4,"words":"/"},{"start_row":4,"start_column":5,"words":"1.68"},{"start_row":4,"start_column":6,"words":"希望"},{"start_row":4,"start_column":7,"words":"1000039901202506\n277372907681001"},{"start_row":5,"start_column":0,"words":"4200002672202506275479\n881478"},{"start_row":5,"start_column":1,"words":"2025-06-27\n19:50:21"},{"start_row":5,"start_column":2,"words":"商户消费"},{"start_row":5,"start_column":3,"words":"支出"},{"start_row":5,"start_column":4,"words":"浦发银行储\n蓄卡(9862)"},{"start_row":5,"start_column":5,"words":"124.00"},{"start_row":5,"start_column":6,"words":"呷哺呷哺(北\n京首联世纪\n广场)"},{"start_row":5,"start_column":7,"words":";8011433A2506271\n95001M000105"}],"verify":{"verify_res":"real","verify_info":[]}}]}]}}'
1593 finish_queue.put(task_str)
1594 except Exception as e:
1595 try:
1596 doc.status = DocStatus.PROCESS_FAILED.value
1597 doc.page_count = pdf_handler.page_count
1598 doc.save()
1599 self.online_log.warn('{0} [process failed (e-contract)] [task={1}] '
1600 '[error={2}]'.format(self.e_log_base, task_str, traceback.format_exc()))
1601 except Exception as e:
1602 self.online_log.error('{0} [process error (db save)] [error={1}]'.format(
1603 self.e_log_base, traceback.format_exc()))
1507 else: # e-contract or or e-fsm-contract or e-hmh 1604 else: # e-contract or or e-fsm-contract or e-hmh
1508 try: 1605 try:
1509 # pdf下载 处理 图片存储 识别 1606 # pdf下载 处理 图片存储 识别
...@@ -1737,7 +1834,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -1737,7 +1834,7 @@ class Command(BaseCommand, LoggerMixin):
1737 time.sleep(self.sleep_time_task_get) 1834 time.sleep(self.sleep_time_task_get)
1738 continue 1835 continue
1739 else: 1836 else:
1740 self.online_log.info('{0} [res_2_wb] [get task] [task={1}]'.format(self.log_base, task_str)) 1837 self.online_log.info('{0} [res_2_wb] [get task] [task={1}] [res_dict={2}]'.format(self.log_base, task_str, res_dict))
1741 ocr_1_res = res_dict.pop(task_str, {}) 1838 ocr_1_res = res_dict.pop(task_str, {})
1742 1839
1743 business_type, doc_id_str = task_str.split(consts.SPLIT_STR) 1840 business_type, doc_id_str = task_str.split(consts.SPLIT_STR)
......
...@@ -683,6 +683,9 @@ class UploadDocView(GenericView, DocHandler): ...@@ -683,6 +683,9 @@ class UploadDocView(GenericView, DocHandler):
683 if keyword in document_name: 683 if keyword in document_name:
684 classify_1 = classify_1_tmp 684 classify_1 = classify_1_tmp
685 break 685 break
686 elif '微信支付交易明细证明' in document_name or '微信流水' in document_name:
687 classify_1 = 12
688 self.running_log.info('[weixin bs process] [doc_id={0}]'.format(doc.id))
686 689
687 690
688 if document_name.endswith('.zip') or document_name.endswith('.rar') or document_name.endswith('.ZIP') \ 691 if document_name.endswith('.zip') or document_name.endswith('.rar') or document_name.endswith('.ZIP') \
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!