init:weixin e-bs
Showing
2 changed files
with
101 additions
and
1 deletions
... | @@ -1504,6 +1504,103 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1504,6 +1504,103 @@ class Command(BaseCommand, LoggerMixin): |
1504 | self.log_base, traceback.format_exc())) | 1504 | self.log_base, traceback.format_exc())) |
1505 | # error_list.append(1) | 1505 | # error_list.append(1) |
1506 | # return | 1506 | # return |
1507 | elif classify_1_str == '12': # wenxin bs | ||
1508 | self.online_log.info('{0} [pdf_2_img_2_queue weixin] [task={1}] [times={2}] [pdf_path={3}]'.format(self.log_base, task_str, times, pdf_path)) | ||
1509 | try: | ||
1510 | # pdf下载 处理 图片存储 识别 | ||
1511 | for times in range(consts.RETRY_TIMES): | ||
1512 | try: | ||
1513 | if doc.application_id.startswith(consts.FIXED_APPLICATION_ID_PREFIX): | ||
1514 | self.online_log.info('{0} [mo ni xia dan] [task={1}] [times={2}] ' | ||
1515 | '[pdf_path={3}]'.format(self.log_base, task_str, | ||
1516 | times, pdf_path)) | ||
1517 | pdf_handler.e_contract_process() | ||
1518 | else: | ||
1519 | self.edms.download(pdf_path, doc.metadata_version_id, doc.document_scheme, business_type) | ||
1520 | self.online_log.info('{0} [edms download success] [task={1}] [times={2}] ' | ||
1521 | '[pdf_path={3}]'.format(self.e_log_base, task_str, times, pdf_path)) | ||
1522 | |||
1523 | self.online_log.info('{0} [pdf to img start] [task={1}] [times={2}]'.format( | ||
1524 | self.e_log_base, task_str, times)) | ||
1525 | pdf_handler.e_contract_process() | ||
1526 | self.online_log.info( | ||
1527 | '{0} [pdf to img end] [task={1}] [times={2}]'.format(self.e_log_base, task_str, times)) | ||
1528 | except Exception as e: | ||
1529 | self.online_log.warn('{0} [download or pdf to img failed] [task={1}] [times={2}] ' | ||
1530 | '[error={3}]'.format(self.e_log_base, task_str, times, | ||
1531 | traceback.format_exc())) | ||
1532 | else: | ||
1533 | break | ||
1534 | else: | ||
1535 | raise Exception('download or pdf to img failed') | ||
1536 | |||
1537 | try: | ||
1538 | doc.page_count = pdf_handler.page_count | ||
1539 | doc.save() | ||
1540 | except Exception as e: | ||
1541 | self.online_log.error('{0} [process error (db save)] [error={1}]'.format( | ||
1542 | self.log_base, traceback.format_exc())) | ||
1543 | |||
1544 | # 获取算法结果 | ||
1545 | # ocr_result = wxbs_predict(pdf_handler.pdf_info) | ||
1546 | # page_res = {} | ||
1547 | # for page_num, page_info in ocr_result.get('page_info', {}).items(): | ||
1548 | # if isinstance(page_num, str) and page_num.startswith('page_'): | ||
1549 | # page_res[page_num] = { | ||
1550 | # 'classify': int(classify_1_str), | ||
1551 | # "is_asp": ocr_result.get('is_asp', False), | ||
1552 | # 'page_num': page_num, | ||
1553 | # 'page_info': page_info | ||
1554 | # } | ||
1555 | |||
1556 | # contract_res = {} | ||
1557 | # for img_path_tmp, page_key in pdf_handler.img_path_pno_list: | ||
1558 | # if classify_1_str == str(consts.HMH_CLASSIFY): | ||
1559 | # img_contract_res = { | ||
1560 | # 'code': 1, | ||
1561 | # 'data': [ | ||
1562 | # { | ||
1563 | # 'classify': consts.HMH_CLASSIFY, | ||
1564 | # 'data': hmh_predict(pdf_handler.pdf_info) | ||
1565 | # } | ||
1566 | # ] | ||
1567 | # } | ||
1568 | # else: | ||
1569 | # if page_key in page_res: | ||
1570 | # img_contract_res = { | ||
1571 | # 'code': 1, | ||
1572 | # 'data': [ | ||
1573 | # { | ||
1574 | # 'classify': page_res[page_key].pop('classify', consts.OTHER_CLASSIFY), | ||
1575 | # 'data': page_res[page_key] | ||
1576 | # } | ||
1577 | # ] | ||
1578 | # } | ||
1579 | # else: | ||
1580 | # img_contract_res = { | ||
1581 | # 'code': 1, | ||
1582 | # 'data': [ | ||
1583 | # { | ||
1584 | # 'classify': int(classify_1_str), | ||
1585 | # } | ||
1586 | # ] | ||
1587 | # } | ||
1588 | # contract_res[img_path_tmp] = img_contract_res | ||
1589 | |||
1590 | with lock: | ||
1591 | # res_dict[task_str] = contract_res | ||
1592 | res_dict[task_str] = '{"/data/AFC/OCR_Files/60671/img/page_0_img_0.png":{"code":1,"msg":"success","data":[{"classify":12,"confidence":0.9999922513961792,"data":[{"summary":["冯轩","110109199202260310",null,null,"2025-06-30","2025-06-25","2025-06-30"],"cells":[{"start_row":0,"start_column":0,"words":"交易单号"},{"start_row":0,"start_column":1,"words":"交易时间"},{"start_row":0,"start_column":2,"words":"交易类型"},{"start_row":0,"start_column":3,"words":"收/支/其他"},{"start_row":0,"start_column":4,"words":"交易方式"},{"start_row":0,"start_column":5,"words":"金额(元)"},{"start_row":0,"start_column":6,"words":"交易对方"},{"start_row":0,"start_column":7,"words":"商户单号"},{"start_row":1,"start_column":0,"words":"4200002687202506298013\n175397"},{"start_row":1,"start_column":1,"words":"2025-06-29\n13:47:15"},{"start_row":1,"start_column":2,"words":"商户消费"},{"start_row":1,"start_column":3,"words":"支出"},{"start_row":1,"start_column":4,"words":"浦发银行储\n蓄卡(9862)"},{"start_row":1,"start_column":5,"words":"333.00"},{"start_row":1,"start_column":6,"words":"美团平台商\n户"},{"start_row":1,"start_column":7,"words":"20250629134707U\n8927844819024394"},{"start_row":2,"start_column":0,"words":"1000039901017506286326\n541033022"},{"start_row":2,"start_column":1,"words":"2025-06-28\n21:07:37"},{"start_row":2,"start_column":2,"words":"微信红包"},{"start_row":2,"start_column":3,"words":"收入"},{"start_row":2,"start_column":4,"words":"/"},{"start_row":2,"start_column":5,"words":"1.20"},{"start_row":2,"start_column":6,"words":"默"},{"start_row":2,"start_column":7,"words":"1000039901202506\n286326541033022"},{"start_row":3,"start_column":0,"words":"1000039901004506276276\n744394049"},{"start_row":3,"start_column":1,"words":"2025-06-27\n23:58:18"},{"start_row":3,"start_column":2,"words":"微信红包"},{"start_row":3,"start_column":3,"words":"收入"},{"start_row":3,"start_column":4,"words":"/"},{"start_row":3,"start_column":5,"words":"1.36"},{"start_row":3,"start_column":6,"words":"九日"},{"start_row":3,"start_column":7,"words":"1000039901202506\n276276744394049"},{"start_row":4,"start_column":0,"words":"1000039901008506277372\n907681001"},{"start_row":4,"start_column":1,"words":"2025-06-27\n23:57:27"},{"start_row":4,"start_column":2,"words":"微信红包"},{"start_row":4,"start_column":3,"words":"收入"},{"start_row":4,"start_column":4,"words":"/"},{"start_row":4,"start_column":5,"words":"1.68"},{"start_row":4,"start_column":6,"words":"希望"},{"start_row":4,"start_column":7,"words":"1000039901202506\n277372907681001"},{"start_row":5,"start_column":0,"words":"4200002672202506275479\n881478"},{"start_row":5,"start_column":1,"words":"2025-06-27\n19:50:21"},{"start_row":5,"start_column":2,"words":"商户消费"},{"start_row":5,"start_column":3,"words":"支出"},{"start_row":5,"start_column":4,"words":"浦发银行储\n蓄卡(9862)"},{"start_row":5,"start_column":5,"words":"124.00"},{"start_row":5,"start_column":6,"words":"呷哺呷哺(北\n京首联世纪\n广场)"},{"start_row":5,"start_column":7,"words":";8011433A2506271\n95001M000105"}],"verify":{"verify_res":"real","verify_info":[]}}]}]}}' | ||
1593 | finish_queue.put(task_str) | ||
1594 | except Exception as e: | ||
1595 | try: | ||
1596 | doc.status = DocStatus.PROCESS_FAILED.value | ||
1597 | doc.page_count = pdf_handler.page_count | ||
1598 | doc.save() | ||
1599 | self.online_log.warn('{0} [process failed (e-contract)] [task={1}] ' | ||
1600 | '[error={2}]'.format(self.e_log_base, task_str, traceback.format_exc())) | ||
1601 | except Exception as e: | ||
1602 | self.online_log.error('{0} [process error (db save)] [error={1}]'.format( | ||
1603 | self.e_log_base, traceback.format_exc())) | ||
1507 | else: # e-contract or or e-fsm-contract or e-hmh | 1604 | else: # e-contract or or e-fsm-contract or e-hmh |
1508 | try: | 1605 | try: |
1509 | # pdf下载 处理 图片存储 识别 | 1606 | # pdf下载 处理 图片存储 识别 |
... | @@ -1737,7 +1834,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1737,7 +1834,7 @@ class Command(BaseCommand, LoggerMixin): |
1737 | time.sleep(self.sleep_time_task_get) | 1834 | time.sleep(self.sleep_time_task_get) |
1738 | continue | 1835 | continue |
1739 | else: | 1836 | else: |
1740 | self.online_log.info('{0} [res_2_wb] [get task] [task={1}]'.format(self.log_base, task_str)) | 1837 | self.online_log.info('{0} [res_2_wb] [get task] [task={1}] [res_dict={2}]'.format(self.log_base, task_str, res_dict)) |
1741 | ocr_1_res = res_dict.pop(task_str, {}) | 1838 | ocr_1_res = res_dict.pop(task_str, {}) |
1742 | 1839 | ||
1743 | business_type, doc_id_str = task_str.split(consts.SPLIT_STR) | 1840 | business_type, doc_id_str = task_str.split(consts.SPLIT_STR) | ... | ... |
... | @@ -683,6 +683,9 @@ class UploadDocView(GenericView, DocHandler): | ... | @@ -683,6 +683,9 @@ class UploadDocView(GenericView, DocHandler): |
683 | if keyword in document_name: | 683 | if keyword in document_name: |
684 | classify_1 = classify_1_tmp | 684 | classify_1 = classify_1_tmp |
685 | break | 685 | break |
686 | elif '微信支付交易明细证明' in document_name or '微信流水' in document_name: | ||
687 | classify_1 = 12 | ||
688 | self.running_log.info('[weixin bs process] [doc_id={0}]'.format(doc.id)) | ||
686 | 689 | ||
687 | 690 | ||
688 | if document_name.endswith('.zip') or document_name.endswith('.rar') or document_name.endswith('.ZIP') \ | 691 | if document_name.endswith('.zip') or document_name.endswith('.rar') or document_name.endswith('.ZIP') \ | ... | ... |
-
Please register or sign in to post a comment