init
Showing
2 changed files
with
136 additions
and
1 deletions
| ... | @@ -1504,6 +1504,134 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1504,6 +1504,134 @@ class Command(BaseCommand, LoggerMixin): |
| 1504 | self.log_base, traceback.format_exc())) | 1504 | self.log_base, traceback.format_exc())) |
| 1505 | # error_list.append(1) | 1505 | # error_list.append(1) |
| 1506 | # return | 1506 | # return |
| 1507 | elif classify_1_str == '29': # e-invoice | ||
| 1508 | try: | ||
| 1509 | max_img_count = 500 | ||
| 1510 | for times in range(consts.RETRY_TIMES): | ||
| 1511 | try: | ||
| 1512 | if doc.application_id.startswith(consts.FIXED_APPLICATION_ID_PREFIX): | ||
| 1513 | self.online_log.info('{0} [mo ni xia dan] [task={1}] [times={2}] ' | ||
| 1514 | '[pdf_path={3}]'.format(self.log_base, task_str, | ||
| 1515 | times, pdf_path)) | ||
| 1516 | elif os.path.exists(pdf_path): | ||
| 1517 | self.online_log.info('{0} [pdf from zip file] [task={1}] [times={2}] ' | ||
| 1518 | '[pdf_path={3}]'.format(self.log_base, task_str, | ||
| 1519 | times, pdf_path)) | ||
| 1520 | else: | ||
| 1521 | # self.edms.download(pdf_path, doc.metadata_version_id) | ||
| 1522 | self.edms.download(pdf_path, doc.metadata_version_id, doc.document_scheme, | ||
| 1523 | business_type) | ||
| 1524 | self.online_log.info('{0} [ecm download success] [task={1}] [times={2}] ' | ||
| 1525 | '[pdf_path={3}]'.format(self.log_base, task_str, | ||
| 1526 | times, pdf_path)) | ||
| 1527 | |||
| 1528 | # 3.PDF文件提取图片 | ||
| 1529 | self.online_log.info('{0} [pdf to img start] [task={1}] [times={2}]'.format( | ||
| 1530 | self.log_base, task_str, times)) | ||
| 1531 | start_time = time.time() | ||
| 1532 | pdf_handler.extract_image_for_weixin(max_img_count) #沿用微信流程 | ||
| 1533 | end_time = time.time() | ||
| 1534 | speed_time = int(end_time - start_time) | ||
| 1535 | self.online_log.info('{0} [pdf to img end] [task={1}] [times={2}] [spend_time={3}] [is_new_modify={4}]'.format( | ||
| 1536 | self.log_base, task_str, times, speed_time, pdf_handler.is_new_modify)) | ||
| 1537 | except Exception as e: | ||
| 1538 | self.online_log.warn('{0} [download or pdf to img failed] [task={1}] [times={2}] ' | ||
| 1539 | '[error={3}]'.format(self.log_base, task_str, times, | ||
| 1540 | traceback.format_exc())) | ||
| 1541 | else: | ||
| 1542 | break | ||
| 1543 | else: | ||
| 1544 | raise Exception('download or pdf to img failed') | ||
| 1545 | if pdf_handler.img_count == 0: | ||
| 1546 | self.online_log.warn('{0} [pdf to img failed (pdf img empty)] [task={1}]'.format( | ||
| 1547 | self.log_base, task_str)) | ||
| 1548 | raise Exception('pdf img empty') | ||
| 1549 | elif pdf_handler.img_count >= max_img_count: | ||
| 1550 | self.online_log.info('{0} [too many pdf image] [task={1}] [img_count={2}]'.format( | ||
| 1551 | self.log_base, task_str, pdf_handler.img_count)) | ||
| 1552 | |||
| 1553 | try: | ||
| 1554 | report_table = HILOCRReport if business_type == consts.HIL_PREFIX else AFCOCRReport | ||
| 1555 | report_table.objects.create( | ||
| 1556 | case_number=doc.application_id, | ||
| 1557 | request_team=RequestTeam.get_value(doc.document_scheme, 0), | ||
| 1558 | request_trigger=RequestTrigger.get_value(doc.data_source, 0), | ||
| 1559 | input_file=doc.document_name, | ||
| 1560 | transaction_start=doc.start_time, | ||
| 1561 | transaction_end=doc.start_time, | ||
| 1562 | successful_at_this_level=False, | ||
| 1563 | failure_reason=FailureReason.IMG_LIMIT.value, | ||
| 1564 | process_name=ProcessName.ALL.value, | ||
| 1565 | notes='pdf page count: {0}'.format(str(pdf_handler.img_count)) | ||
| 1566 | ) | ||
| 1567 | except Exception as e: | ||
| 1568 | self.online_log.error('{0} [process error (report db save)] [error={1}]'.format( | ||
| 1569 | self.log_base, traceback.format_exc())) | ||
| 1570 | |||
| 1571 | try: | ||
| 1572 | doc.status = DocStatus.PROCESS_FAILED.value | ||
| 1573 | doc.page_count = pdf_handler.page_count | ||
| 1574 | doc.save() | ||
| 1575 | except Exception as e: | ||
| 1576 | self.online_log.error('{0} [process error (db save)] [error={1}]'.format( | ||
| 1577 | self.log_base, traceback.format_exc())) | ||
| 1578 | else: | ||
| 1579 | try: | ||
| 1580 | if pdf_handler.is_e_pdf: | ||
| 1581 | doc.metadata = pdf_handler.metadata if pdf_handler.metadata is None else \ | ||
| 1582 | json.dumps(pdf_handler.metadata) | ||
| 1583 | doc.page_count = pdf_handler.page_count | ||
| 1584 | doc.save() | ||
| 1585 | except Exception as e: | ||
| 1586 | self.online_log.error('{0} [process error (db save)] [error={1}]'.format( | ||
| 1587 | self.log_base, traceback.format_exc())) | ||
| 1588 | |||
| 1589 | with lock: | ||
| 1590 | todo_count_dict[task_str] = pdf_handler.img_count | ||
| 1591 | |||
| 1592 | self.online_log.info('{0} [pdf_2_img_2_queue] [{1}] [is_ebank={2}]'.format( | ||
| 1593 | self.log_base, task_str, pdf_handler.is_ebank | ||
| 1594 | )) | ||
| 1595 | for img_idx, img_path in enumerate(pdf_handler.img_path_list): | ||
| 1596 | while img_queue.full(): | ||
| 1597 | self.online_log.info('{0} [pdf_2_img_2_queue] [img queue full]'.format(self.log_base)) | ||
| 1598 | time.sleep(self.sleep_time_img_put) | ||
| 1599 | if pdf_handler.is_e_weixin_bs: | ||
| 1600 | try: | ||
| 1601 | #self.online_log.info('{0} [pdf_2_img_2_queue] [img_idx={1}] [page_text_list={2}]'.format(self.log_base, img_idx, pdf_handler.page_text_list)) | ||
| 1602 | text_list = pdf_handler.page_text_list[img_idx].pop('rebuild_text') | ||
| 1603 | except Exception as e: | ||
| 1604 | text_list = [] | ||
| 1605 | else: | ||
| 1606 | text_list = [] | ||
| 1607 | img_queue.put((business_type, img_path, text_list)) | ||
| 1608 | except Exception as e: | ||
| 1609 | try: | ||
| 1610 | end_time = timezone.now() | ||
| 1611 | report_table = HILOCRReport if business_type == consts.HIL_PREFIX else AFCOCRReport | ||
| 1612 | report_table.objects.create( | ||
| 1613 | case_number=doc.application_id, | ||
| 1614 | request_team=RequestTeam.get_value(doc.document_scheme, 0), | ||
| 1615 | request_trigger=RequestTrigger.get_value(doc.data_source, 0), | ||
| 1616 | input_file=doc.document_name, | ||
| 1617 | transaction_start=doc.start_time, | ||
| 1618 | transaction_end=end_time, | ||
| 1619 | successful_at_this_level=False, | ||
| 1620 | failure_reason=FailureReason.PDF.value, | ||
| 1621 | process_name=ProcessName.ALL.value, | ||
| 1622 | ) | ||
| 1623 | except Exception as e: | ||
| 1624 | self.online_log.error('{0} [process error (report db save)] [error={1}]'.format( | ||
| 1625 | self.log_base, traceback.format_exc())) | ||
| 1626 | try: | ||
| 1627 | doc.status = DocStatus.PROCESS_FAILED.value | ||
| 1628 | doc.page_count = pdf_handler.page_count | ||
| 1629 | doc.save() | ||
| 1630 | self.online_log.warn('{0} [process failed (pdf_2_img_2_queue)] [task={1}] ' | ||
| 1631 | '[error={2}]'.format(self.log_base, task_str, traceback.format_exc())) | ||
| 1632 | except Exception as e: | ||
| 1633 | self.online_log.error('{0} [process error (db save)] [error={1}]'.format( | ||
| 1634 | self.log_base, traceback.format_exc())) | ||
| 1507 | else: # e-contract or or e-fsm-contract or e-hmh | 1635 | else: # e-contract or or e-fsm-contract or e-hmh |
| 1508 | try: | 1636 | try: |
| 1509 | # pdf下载 处理 图片存储 识别 | 1637 | # pdf下载 处理 图片存储 识别 | ... | ... |
| ... | @@ -692,7 +692,10 @@ class UploadDocView(GenericView, DocHandler): | ... | @@ -692,7 +692,10 @@ class UploadDocView(GenericView, DocHandler): |
| 692 | if keyword in document_name: | 692 | if keyword in document_name: |
| 693 | classify_1 = classify_1_tmp | 693 | classify_1 = classify_1_tmp |
| 694 | break | 694 | break |
| 695 | 695 | ||
| 696 | if classify_1 == 0 and (document_name.startswith('dzfp_')): | ||
| 697 | classify_1 = 29 | ||
| 698 | self.running_log.info('[dzfp process] [doc_id={0}]'.format(doc.id)) | ||
| 696 | 699 | ||
| 697 | if document_name.endswith('.zip') or document_name.endswith('.rar') or document_name.endswith('.ZIP') \ | 700 | if document_name.endswith('.zip') or document_name.endswith('.rar') or document_name.endswith('.ZIP') \ |
| 698 | or document_name.endswith('.RAR'): | 701 | or document_name.endswith('.RAR'): |
| ... | @@ -1248,6 +1251,10 @@ class DocView(DocGenericView, DocHandler): | ... | @@ -1248,6 +1251,10 @@ class DocView(DocGenericView, DocHandler): |
| 1248 | classify_1 = classify_1_tmp | 1251 | classify_1 = classify_1_tmp |
| 1249 | break | 1252 | break |
| 1250 | 1253 | ||
| 1254 | if classify_1 == 0 and (document_name.startswith('dzfp_')): | ||
| 1255 | classify_1 = 29 | ||
| 1256 | self.running_log.info('[dzfp process] [doc_id={0}]'.format(doc.id)) | ||
| 1257 | |||
| 1251 | # tasks = ['{0}{1}{2}'.format(prefix, consts.SPLIT_STR, doc.id)] | 1258 | # tasks = ['{0}{1}{2}'.format(prefix, consts.SPLIT_STR, doc.id)] |
| 1252 | task = consts.SPLIT_STR.join([prefix, str(doc.id), str(classify_1)]) | 1259 | task = consts.SPLIT_STR.join([prefix, str(doc.id), str(classify_1)]) |
| 1253 | enqueue_res = rh.enqueue([task], is_priority) | 1260 | enqueue_res = rh.enqueue([task], is_priority) | ... | ... |
-
Please register or sign in to post a comment