init
Showing
2 changed files
with
135 additions
and
0 deletions
... | @@ -1504,6 +1504,134 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1504,6 +1504,134 @@ class Command(BaseCommand, LoggerMixin): |
1504 | self.log_base, traceback.format_exc())) | 1504 | self.log_base, traceback.format_exc())) |
1505 | # error_list.append(1) | 1505 | # error_list.append(1) |
1506 | # return | 1506 | # return |
1507 | elif classify_1_str == '29': # e-invoice | ||
1508 | try: | ||
1509 | max_img_count = 500 | ||
1510 | for times in range(consts.RETRY_TIMES): | ||
1511 | try: | ||
1512 | if doc.application_id.startswith(consts.FIXED_APPLICATION_ID_PREFIX): | ||
1513 | self.online_log.info('{0} [mo ni xia dan] [task={1}] [times={2}] ' | ||
1514 | '[pdf_path={3}]'.format(self.log_base, task_str, | ||
1515 | times, pdf_path)) | ||
1516 | elif os.path.exists(pdf_path): | ||
1517 | self.online_log.info('{0} [pdf from zip file] [task={1}] [times={2}] ' | ||
1518 | '[pdf_path={3}]'.format(self.log_base, task_str, | ||
1519 | times, pdf_path)) | ||
1520 | else: | ||
1521 | # self.edms.download(pdf_path, doc.metadata_version_id) | ||
1522 | self.edms.download(pdf_path, doc.metadata_version_id, doc.document_scheme, | ||
1523 | business_type) | ||
1524 | self.online_log.info('{0} [ecm download success] [task={1}] [times={2}] ' | ||
1525 | '[pdf_path={3}]'.format(self.log_base, task_str, | ||
1526 | times, pdf_path)) | ||
1527 | |||
1528 | # 3.PDF文件提取图片 | ||
1529 | self.online_log.info('{0} [pdf to img start] [task={1}] [times={2}]'.format( | ||
1530 | self.log_base, task_str, times)) | ||
1531 | start_time = time.time() | ||
1532 | pdf_handler.extract_image_for_weixin(max_img_count) #沿用微信流程 | ||
1533 | end_time = time.time() | ||
1534 | speed_time = int(end_time - start_time) | ||
1535 | self.online_log.info('{0} [pdf to img end] [task={1}] [times={2}] [spend_time={3}] [is_new_modify={4}]'.format( | ||
1536 | self.log_base, task_str, times, speed_time, pdf_handler.is_new_modify)) | ||
1537 | except Exception as e: | ||
1538 | self.online_log.warn('{0} [download or pdf to img failed] [task={1}] [times={2}] ' | ||
1539 | '[error={3}]'.format(self.log_base, task_str, times, | ||
1540 | traceback.format_exc())) | ||
1541 | else: | ||
1542 | break | ||
1543 | else: | ||
1544 | raise Exception('download or pdf to img failed') | ||
1545 | if pdf_handler.img_count == 0: | ||
1546 | self.online_log.warn('{0} [pdf to img failed (pdf img empty)] [task={1}]'.format( | ||
1547 | self.log_base, task_str)) | ||
1548 | raise Exception('pdf img empty') | ||
1549 | elif pdf_handler.img_count >= max_img_count: | ||
1550 | self.online_log.info('{0} [too many pdf image] [task={1}] [img_count={2}]'.format( | ||
1551 | self.log_base, task_str, pdf_handler.img_count)) | ||
1552 | |||
1553 | try: | ||
1554 | report_table = HILOCRReport if business_type == consts.HIL_PREFIX else AFCOCRReport | ||
1555 | report_table.objects.create( | ||
1556 | case_number=doc.application_id, | ||
1557 | request_team=RequestTeam.get_value(doc.document_scheme, 0), | ||
1558 | request_trigger=RequestTrigger.get_value(doc.data_source, 0), | ||
1559 | input_file=doc.document_name, | ||
1560 | transaction_start=doc.start_time, | ||
1561 | transaction_end=doc.start_time, | ||
1562 | successful_at_this_level=False, | ||
1563 | failure_reason=FailureReason.IMG_LIMIT.value, | ||
1564 | process_name=ProcessName.ALL.value, | ||
1565 | notes='pdf page count: {0}'.format(str(pdf_handler.img_count)) | ||
1566 | ) | ||
1567 | except Exception as e: | ||
1568 | self.online_log.error('{0} [process error (report db save)] [error={1}]'.format( | ||
1569 | self.log_base, traceback.format_exc())) | ||
1570 | |||
1571 | try: | ||
1572 | doc.status = DocStatus.PROCESS_FAILED.value | ||
1573 | doc.page_count = pdf_handler.page_count | ||
1574 | doc.save() | ||
1575 | except Exception as e: | ||
1576 | self.online_log.error('{0} [process error (db save)] [error={1}]'.format( | ||
1577 | self.log_base, traceback.format_exc())) | ||
1578 | else: | ||
1579 | try: | ||
1580 | if pdf_handler.is_e_pdf: | ||
1581 | doc.metadata = pdf_handler.metadata if pdf_handler.metadata is None else \ | ||
1582 | json.dumps(pdf_handler.metadata) | ||
1583 | doc.page_count = pdf_handler.page_count | ||
1584 | doc.save() | ||
1585 | except Exception as e: | ||
1586 | self.online_log.error('{0} [process error (db save)] [error={1}]'.format( | ||
1587 | self.log_base, traceback.format_exc())) | ||
1588 | |||
1589 | with lock: | ||
1590 | todo_count_dict[task_str] = pdf_handler.img_count | ||
1591 | |||
1592 | self.online_log.info('{0} [pdf_2_img_2_queue] [{1}] [is_ebank={2}]'.format( | ||
1593 | self.log_base, task_str, pdf_handler.is_ebank | ||
1594 | )) | ||
1595 | for img_idx, img_path in enumerate(pdf_handler.img_path_list): | ||
1596 | while img_queue.full(): | ||
1597 | self.online_log.info('{0} [pdf_2_img_2_queue] [img queue full]'.format(self.log_base)) | ||
1598 | time.sleep(self.sleep_time_img_put) | ||
1599 | if pdf_handler.is_e_weixin_bs: | ||
1600 | try: | ||
1601 | #self.online_log.info('{0} [pdf_2_img_2_queue] [img_idx={1}] [page_text_list={2}]'.format(self.log_base, img_idx, pdf_handler.page_text_list)) | ||
1602 | text_list = pdf_handler.page_text_list[img_idx].pop('rebuild_text') | ||
1603 | except Exception as e: | ||
1604 | text_list = [] | ||
1605 | else: | ||
1606 | text_list = [] | ||
1607 | img_queue.put((business_type, img_path, text_list)) | ||
1608 | except Exception as e: | ||
1609 | try: | ||
1610 | end_time = timezone.now() | ||
1611 | report_table = HILOCRReport if business_type == consts.HIL_PREFIX else AFCOCRReport | ||
1612 | report_table.objects.create( | ||
1613 | case_number=doc.application_id, | ||
1614 | request_team=RequestTeam.get_value(doc.document_scheme, 0), | ||
1615 | request_trigger=RequestTrigger.get_value(doc.data_source, 0), | ||
1616 | input_file=doc.document_name, | ||
1617 | transaction_start=doc.start_time, | ||
1618 | transaction_end=end_time, | ||
1619 | successful_at_this_level=False, | ||
1620 | failure_reason=FailureReason.PDF.value, | ||
1621 | process_name=ProcessName.ALL.value, | ||
1622 | ) | ||
1623 | except Exception as e: | ||
1624 | self.online_log.error('{0} [process error (report db save)] [error={1}]'.format( | ||
1625 | self.log_base, traceback.format_exc())) | ||
1626 | try: | ||
1627 | doc.status = DocStatus.PROCESS_FAILED.value | ||
1628 | doc.page_count = pdf_handler.page_count | ||
1629 | doc.save() | ||
1630 | self.online_log.warn('{0} [process failed (pdf_2_img_2_queue)] [task={1}] ' | ||
1631 | '[error={2}]'.format(self.log_base, task_str, traceback.format_exc())) | ||
1632 | except Exception as e: | ||
1633 | self.online_log.error('{0} [process error (db save)] [error={1}]'.format( | ||
1634 | self.log_base, traceback.format_exc())) | ||
1507 | else: # e-contract or or e-fsm-contract or e-hmh | 1635 | else: # e-contract or or e-fsm-contract or e-hmh |
1508 | try: | 1636 | try: |
1509 | # pdf下载 处理 图片存储 识别 | 1637 | # pdf下载 处理 图片存储 识别 | ... | ... |
... | @@ -693,6 +693,9 @@ class UploadDocView(GenericView, DocHandler): | ... | @@ -693,6 +693,9 @@ class UploadDocView(GenericView, DocHandler): |
693 | classify_1 = classify_1_tmp | 693 | classify_1 = classify_1_tmp |
694 | break | 694 | break |
695 | 695 | ||
696 | if classify_1 == 0 and (document_name.startswith('dzfp_')): | ||
697 | classify_1 = 29 | ||
698 | self.running_log.info('[dzfp process] [doc_id={0}]'.format(doc.id)) | ||
696 | 699 | ||
697 | if document_name.endswith('.zip') or document_name.endswith('.rar') or document_name.endswith('.ZIP') \ | 700 | if document_name.endswith('.zip') or document_name.endswith('.rar') or document_name.endswith('.ZIP') \ |
698 | or document_name.endswith('.RAR'): | 701 | or document_name.endswith('.RAR'): |
... | @@ -1248,6 +1251,10 @@ class DocView(DocGenericView, DocHandler): | ... | @@ -1248,6 +1251,10 @@ class DocView(DocGenericView, DocHandler): |
1248 | classify_1 = classify_1_tmp | 1251 | classify_1 = classify_1_tmp |
1249 | break | 1252 | break |
1250 | 1253 | ||
1254 | if classify_1 == 0 and (document_name.startswith('dzfp_')): | ||
1255 | classify_1 = 29 | ||
1256 | self.running_log.info('[dzfp process] [doc_id={0}]'.format(doc.id)) | ||
1257 | |||
1251 | # tasks = ['{0}{1}{2}'.format(prefix, consts.SPLIT_STR, doc.id)] | 1258 | # tasks = ['{0}{1}{2}'.format(prefix, consts.SPLIT_STR, doc.id)] |
1252 | task = consts.SPLIT_STR.join([prefix, str(doc.id), str(classify_1)]) | 1259 | task = consts.SPLIT_STR.join([prefix, str(doc.id), str(classify_1)]) |
1253 | enqueue_res = rh.enqueue([task], is_priority) | 1260 | enqueue_res = rh.enqueue([task], is_priority) | ... | ... |
-
Please register or sign in to post a comment