adb37243 by 冯轩

init

1 parent e08e5c00
...@@ -1504,6 +1504,134 @@ class Command(BaseCommand, LoggerMixin): ...@@ -1504,6 +1504,134 @@ class Command(BaseCommand, LoggerMixin):
1504 self.log_base, traceback.format_exc())) 1504 self.log_base, traceback.format_exc()))
1505 # error_list.append(1) 1505 # error_list.append(1)
1506 # return 1506 # return
1507 elif classify_1_str == '29': # e-invoice
1508 try:
1509 max_img_count = 500
1510 for times in range(consts.RETRY_TIMES):
1511 try:
1512 if doc.application_id.startswith(consts.FIXED_APPLICATION_ID_PREFIX):
1513 self.online_log.info('{0} [mo ni xia dan] [task={1}] [times={2}] '
1514 '[pdf_path={3}]'.format(self.log_base, task_str,
1515 times, pdf_path))
1516 elif os.path.exists(pdf_path):
1517 self.online_log.info('{0} [pdf from zip file] [task={1}] [times={2}] '
1518 '[pdf_path={3}]'.format(self.log_base, task_str,
1519 times, pdf_path))
1520 else:
1521 # self.edms.download(pdf_path, doc.metadata_version_id)
1522 self.edms.download(pdf_path, doc.metadata_version_id, doc.document_scheme,
1523 business_type)
1524 self.online_log.info('{0} [ecm download success] [task={1}] [times={2}] '
1525 '[pdf_path={3}]'.format(self.log_base, task_str,
1526 times, pdf_path))
1527
1528 # 3.PDF文件提取图片
1529 self.online_log.info('{0} [pdf to img start] [task={1}] [times={2}]'.format(
1530 self.log_base, task_str, times))
1531 start_time = time.time()
1532 pdf_handler.extract_image_for_weixin(max_img_count) #沿用微信流程
1533 end_time = time.time()
1534 speed_time = int(end_time - start_time)
1535 self.online_log.info('{0} [pdf to img end] [task={1}] [times={2}] [spend_time={3}] [is_new_modify={4}]'.format(
1536 self.log_base, task_str, times, speed_time, pdf_handler.is_new_modify))
1537 except Exception as e:
1538 self.online_log.warn('{0} [download or pdf to img failed] [task={1}] [times={2}] '
1539 '[error={3}]'.format(self.log_base, task_str, times,
1540 traceback.format_exc()))
1541 else:
1542 break
1543 else:
1544 raise Exception('download or pdf to img failed')
1545 if pdf_handler.img_count == 0:
1546 self.online_log.warn('{0} [pdf to img failed (pdf img empty)] [task={1}]'.format(
1547 self.log_base, task_str))
1548 raise Exception('pdf img empty')
1549 elif pdf_handler.img_count >= max_img_count:
1550 self.online_log.info('{0} [too many pdf image] [task={1}] [img_count={2}]'.format(
1551 self.log_base, task_str, pdf_handler.img_count))
1552
1553 try:
1554 report_table = HILOCRReport if business_type == consts.HIL_PREFIX else AFCOCRReport
1555 report_table.objects.create(
1556 case_number=doc.application_id,
1557 request_team=RequestTeam.get_value(doc.document_scheme, 0),
1558 request_trigger=RequestTrigger.get_value(doc.data_source, 0),
1559 input_file=doc.document_name,
1560 transaction_start=doc.start_time,
1561 transaction_end=doc.start_time,
1562 successful_at_this_level=False,
1563 failure_reason=FailureReason.IMG_LIMIT.value,
1564 process_name=ProcessName.ALL.value,
1565 notes='pdf page count: {0}'.format(str(pdf_handler.img_count))
1566 )
1567 except Exception as e:
1568 self.online_log.error('{0} [process error (report db save)] [error={1}]'.format(
1569 self.log_base, traceback.format_exc()))
1570
1571 try:
1572 doc.status = DocStatus.PROCESS_FAILED.value
1573 doc.page_count = pdf_handler.page_count
1574 doc.save()
1575 except Exception as e:
1576 self.online_log.error('{0} [process error (db save)] [error={1}]'.format(
1577 self.log_base, traceback.format_exc()))
1578 else:
1579 try:
1580 if pdf_handler.is_e_pdf:
1581 doc.metadata = pdf_handler.metadata if pdf_handler.metadata is None else \
1582 json.dumps(pdf_handler.metadata)
1583 doc.page_count = pdf_handler.page_count
1584 doc.save()
1585 except Exception as e:
1586 self.online_log.error('{0} [process error (db save)] [error={1}]'.format(
1587 self.log_base, traceback.format_exc()))
1588
1589 with lock:
1590 todo_count_dict[task_str] = pdf_handler.img_count
1591
1592 self.online_log.info('{0} [pdf_2_img_2_queue] [{1}] [is_ebank={2}]'.format(
1593 self.log_base, task_str, pdf_handler.is_ebank
1594 ))
1595 for img_idx, img_path in enumerate(pdf_handler.img_path_list):
1596 while img_queue.full():
1597 self.online_log.info('{0} [pdf_2_img_2_queue] [img queue full]'.format(self.log_base))
1598 time.sleep(self.sleep_time_img_put)
1599 if pdf_handler.is_e_weixin_bs:
1600 try:
1601 #self.online_log.info('{0} [pdf_2_img_2_queue] [img_idx={1}] [page_text_list={2}]'.format(self.log_base, img_idx, pdf_handler.page_text_list))
1602 text_list = pdf_handler.page_text_list[img_idx].pop('rebuild_text')
1603 except Exception as e:
1604 text_list = []
1605 else:
1606 text_list = []
1607 img_queue.put((business_type, img_path, text_list))
1608 except Exception as e:
1609 try:
1610 end_time = timezone.now()
1611 report_table = HILOCRReport if business_type == consts.HIL_PREFIX else AFCOCRReport
1612 report_table.objects.create(
1613 case_number=doc.application_id,
1614 request_team=RequestTeam.get_value(doc.document_scheme, 0),
1615 request_trigger=RequestTrigger.get_value(doc.data_source, 0),
1616 input_file=doc.document_name,
1617 transaction_start=doc.start_time,
1618 transaction_end=end_time,
1619 successful_at_this_level=False,
1620 failure_reason=FailureReason.PDF.value,
1621 process_name=ProcessName.ALL.value,
1622 )
1623 except Exception as e:
1624 self.online_log.error('{0} [process error (report db save)] [error={1}]'.format(
1625 self.log_base, traceback.format_exc()))
1626 try:
1627 doc.status = DocStatus.PROCESS_FAILED.value
1628 doc.page_count = pdf_handler.page_count
1629 doc.save()
1630 self.online_log.warn('{0} [process failed (pdf_2_img_2_queue)] [task={1}] '
1631 '[error={2}]'.format(self.log_base, task_str, traceback.format_exc()))
1632 except Exception as e:
1633 self.online_log.error('{0} [process error (db save)] [error={1}]'.format(
1634 self.log_base, traceback.format_exc()))
1507 else: # e-contract or or e-fsm-contract or e-hmh 1635 else: # e-contract or or e-fsm-contract or e-hmh
1508 try: 1636 try:
1509 # pdf下载 处理 图片存储 识别 1637 # pdf下载 处理 图片存储 识别
......
...@@ -692,7 +692,10 @@ class UploadDocView(GenericView, DocHandler): ...@@ -692,7 +692,10 @@ class UploadDocView(GenericView, DocHandler):
692 if keyword in document_name: 692 if keyword in document_name:
693 classify_1 = classify_1_tmp 693 classify_1 = classify_1_tmp
694 break 694 break
695 695
696 if classify_1 == 0 and (document_name.startswith('dzfp_')):
697 classify_1 = 29
698 self.running_log.info('[dzfp process] [doc_id={0}]'.format(doc.id))
696 699
697 if document_name.endswith('.zip') or document_name.endswith('.rar') or document_name.endswith('.ZIP') \ 700 if document_name.endswith('.zip') or document_name.endswith('.rar') or document_name.endswith('.ZIP') \
698 or document_name.endswith('.RAR'): 701 or document_name.endswith('.RAR'):
...@@ -1248,6 +1251,10 @@ class DocView(DocGenericView, DocHandler): ...@@ -1248,6 +1251,10 @@ class DocView(DocGenericView, DocHandler):
1248 classify_1 = classify_1_tmp 1251 classify_1 = classify_1_tmp
1249 break 1252 break
1250 1253
1254 if classify_1 == 0 and (document_name.startswith('dzfp_')):
1255 classify_1 = 29
1256 self.running_log.info('[dzfp process] [doc_id={0}]'.format(doc.id))
1257
1251 # tasks = ['{0}{1}{2}'.format(prefix, consts.SPLIT_STR, doc.id)] 1258 # tasks = ['{0}{1}{2}'.format(prefix, consts.SPLIT_STR, doc.id)]
1252 task = consts.SPLIT_STR.join([prefix, str(doc.id), str(classify_1)]) 1259 task = consts.SPLIT_STR.join([prefix, str(doc.id), str(classify_1)])
1253 enqueue_res = rh.enqueue([task], is_priority) 1260 enqueue_res = rh.enqueue([task], is_priority)
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!