PROD Version
Showing
2 changed files
with
24 additions
and
16 deletions
| ... | @@ -8,7 +8,7 @@ PAGE_SIZE_DEFAULT = 10 | ... | @@ -8,7 +8,7 @@ PAGE_SIZE_DEFAULT = 10 |
| 8 | 8 | ||
| 9 | FIXED_APPLICATION_ID_PREFIX = 'CH-S' | 9 | FIXED_APPLICATION_ID_PREFIX = 'CH-S' |
| 10 | 10 | ||
| 11 | DOC_SCHEME_LIST = ['ACCEPTANCE', 'SETTLEMENT', 'CONTRACT MANAGEMENT'] | 11 | DOC_SCHEME_LIST = ['ACCEPTANCE', 'SETTLEMENT', 'CONTRACTMANAGEMENT'] |
| 12 | DATA_SOURCE_LIST = ['POS', 'EAPP', 'ECONTRACT'] | 12 | DATA_SOURCE_LIST = ['POS', 'EAPP', 'ECONTRACT'] |
| 13 | 13 | ||
| 14 | HIL_PREFIX = 'HIL' | 14 | HIL_PREFIX = 'HIL' |
| ... | @@ -31,7 +31,7 @@ DOWNLOAD_ACTION_TYPE = 'Downloaded' | ... | @@ -31,7 +31,7 @@ DOWNLOAD_ACTION_TYPE = 'Downloaded' |
| 31 | DOC_SCHEMA_ID_FILL = { | 31 | DOC_SCHEMA_ID_FILL = { |
| 32 | 'ACCEPTANCE': (1, 'DFE-AutoFilingScript'), | 32 | 'ACCEPTANCE': (1, 'DFE-AutoFilingScript'), |
| 33 | 'SETTLEMENT': (20, 'DFE-AutoFilingScript'), | 33 | 'SETTLEMENT': (20, 'DFE-AutoFilingScript'), |
| 34 | 'CONTRACT MANAGEMENT': (86, 'Schema-Based') | 34 | 'CONTRACTMANAGEMENT': (86, 'Schema-Based') |
| 35 | } | 35 | } |
| 36 | BUSINESS_TYPE_DICT = { | 36 | BUSINESS_TYPE_DICT = { |
| 37 | HIL_PREFIX: 'CO00002', | 37 | HIL_PREFIX: 'CO00002', | ... | ... |
| ... | @@ -72,7 +72,12 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -72,7 +72,12 @@ class Command(BaseCommand, LoggerMixin): |
| 72 | return None, None, None | 72 | return None, None, None |
| 73 | 73 | ||
| 74 | self.cronjob_log.info('{0} [get_doc_info success] [task={1}] [is_priority={2}]'.format(self.log_base, task_str, is_priority)) | 74 | self.cronjob_log.info('{0} [get_doc_info success] [task={1}] [is_priority={2}]'.format(self.log_base, task_str, is_priority)) |
| 75 | try: | ||
| 75 | doc, business_type = self.get_doc_object(task_str) | 76 | doc, business_type = self.get_doc_object(task_str) |
| 77 | except Exception as e: | ||
| 78 | rh.enqueue([task_str], is_priority) | ||
| 79 | self.cronjob_log.error('{0} [process error (get doc info in)] [error={1}]'.format(self.log_base, traceback.format_exc())) | ||
| 80 | raise e | ||
| 76 | 81 | ||
| 77 | if doc is None: | 82 | if doc is None: |
| 78 | self.cronjob_log.warn('{0} [get_doc_info] [doc not exist] [task_str={1}] [is_priority={2}]'.format( | 83 | self.cronjob_log.warn('{0} [get_doc_info] [doc not exist] [task_str={1}] [is_priority={2}]'.format( |
| ... | @@ -364,7 +369,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -364,7 +369,7 @@ class Command(BaseCommand, LoggerMixin): |
| 364 | # summary['confidence'] = max(summary['confidence']) | 369 | # summary['confidence'] = max(summary['confidence']) |
| 365 | return merged_bs_summary | 370 | return merged_bs_summary |
| 366 | 371 | ||
| 367 | def pdf_2_img_2_queue(self, img_queue, todo_count_dict, lock): | 372 | def pdf_2_img_2_queue(self, img_queue, todo_count_dict, lock, error_list): |
| 368 | while self.switch: | 373 | while self.switch: |
| 369 | try: | 374 | try: |
| 370 | # 1. 从队列获取文件信息 | 375 | # 1. 从队列获取文件信息 |
| ... | @@ -374,8 +379,10 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -374,8 +379,10 @@ class Command(BaseCommand, LoggerMixin): |
| 374 | time.sleep(self.sleep_time_doc_get) | 379 | time.sleep(self.sleep_time_doc_get) |
| 375 | continue | 380 | continue |
| 376 | except Exception as e: | 381 | except Exception as e: |
| 377 | self.cronjob_log.error('{0} [process failed (get doc into)] [error={1}]'.format( | 382 | self.cronjob_log.error('{0} [process error (get doc info out)] [error={1}]'.format( |
| 378 | self.log_base, traceback.format_exc())) | 383 | self.log_base, traceback.format_exc())) |
| 384 | error_list.append(1) | ||
| 385 | return | ||
| 379 | else: | 386 | else: |
| 380 | try: | 387 | try: |
| 381 | # 2. 从EDMS获取PDF文件 | 388 | # 2. 从EDMS获取PDF文件 |
| ... | @@ -413,8 +420,8 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -413,8 +420,8 @@ class Command(BaseCommand, LoggerMixin): |
| 413 | self.cronjob_log.error('{0} [process failed (pdf to img)] [task={1}] [error={2}]'.format( | 420 | self.cronjob_log.error('{0} [process failed (pdf to img)] [task={1}] [error={2}]'.format( |
| 414 | self.log_base, task_str, traceback.format_exc())) | 421 | self.log_base, task_str, traceback.format_exc())) |
| 415 | 422 | ||
| 416 | def img_2_ocr_1(self, img_queue, todo_count_dict, res_dict, finish_queue, lock, url): | 423 | def img_2_ocr_1(self, img_queue, todo_count_dict, res_dict, finish_queue, lock, url, error_list): |
| 417 | while True: | 424 | while len(error_list) == 0 or not img_queue.empty(): |
| 418 | try: | 425 | try: |
| 419 | img_path = img_queue.get(block=False) | 426 | img_path = img_queue.get(block=False) |
| 420 | except Exception as e: | 427 | except Exception as e: |
| ... | @@ -478,8 +485,8 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -478,8 +485,8 @@ class Command(BaseCommand, LoggerMixin): |
| 478 | self.cronjob_log.error('{0} [process error (store ocr res)] [img_path={1}] [error={2}]'.format( | 485 | self.cronjob_log.error('{0} [process error (store ocr res)] [img_path={1}] [error={2}]'.format( |
| 479 | self.log_base, img_path, traceback.format_exc())) | 486 | self.log_base, img_path, traceback.format_exc())) |
| 480 | 487 | ||
| 481 | def res_2_wb(self, res_dict, finish_queue, lock): | 488 | def res_2_wb(self, res_dict, img_queue, finish_queue, lock, error_list): |
| 482 | while True: | 489 | while len(error_list) == 0 or not img_queue.empty() or not finish_queue.empty(): |
| 483 | try: | 490 | try: |
| 484 | task_str = finish_queue.get(block=False) | 491 | task_str = finish_queue.get(block=False) |
| 485 | except Exception as e: | 492 | except Exception as e: |
| ... | @@ -605,8 +612,8 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -605,8 +612,8 @@ class Command(BaseCommand, LoggerMixin): |
| 605 | doc, business_type = self.get_doc_object(task_str) | 612 | doc, business_type = self.get_doc_object(task_str) |
| 606 | doc_data_path = os.path.join(self.data_dir, business_type, consts.TMP_DIR_NAME, str(doc.id)) | 613 | doc_data_path = os.path.join(self.data_dir, business_type, consts.TMP_DIR_NAME, str(doc.id)) |
| 607 | excel_path = os.path.join(doc_data_path, '{0}.xlsx'.format(doc.id)) | 614 | excel_path = os.path.join(doc_data_path, '{0}.xlsx'.format(doc.id)) |
| 608 | src_excel_path = os.path.join(doc_data_path, 'src.xlsx') | 615 | # src_excel_path = os.path.join(doc_data_path, 'src.xlsx') |
| 609 | wb.save(src_excel_path) | 616 | # wb.save(src_excel_path) |
| 610 | count_list = wb.rebuild(merged_bs_summary, license_summary, res_list, doc.document_scheme) | 617 | count_list = wb.rebuild(merged_bs_summary, license_summary, res_list, doc.document_scheme) |
| 611 | wb.save(excel_path) | 618 | wb.save(excel_path) |
| 612 | except Exception as e: | 619 | except Exception as e: |
| ... | @@ -637,8 +644,8 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -637,8 +644,8 @@ class Command(BaseCommand, LoggerMixin): |
| 637 | img_save_path = os.path.join(doc_data_path, 'img') | 644 | img_save_path = os.path.join(doc_data_path, 'img') |
| 638 | write_zip_file(img_save_path, os.path.join(doc_data_path, '{0}_img.zip'.format(doc.id))) | 645 | write_zip_file(img_save_path, os.path.join(doc_data_path, '{0}_img.zip'.format(doc.id))) |
| 639 | shutil.rmtree(img_save_path, ignore_errors=True) | 646 | shutil.rmtree(img_save_path, ignore_errors=True) |
| 640 | # pdf_path = os.path.join(doc_data_path, '{0}.pdf'.format(doc.id)) | 647 | pdf_path = os.path.join(doc_data_path, '{0}.pdf'.format(doc.id)) |
| 641 | # os.remove(pdf_path) | 648 | os.remove(pdf_path) |
| 642 | # os.remove(src_excel_path) | 649 | # os.remove(src_excel_path) |
| 643 | except Exception as e: | 650 | except Exception as e: |
| 644 | self.cronjob_log.error('{0} [process error (file remove 2)] [task={1}] [error={2}]'.format( | 651 | self.cronjob_log.error('{0} [process error (file remove 2)] [task={1}] [error={2}]'.format( |
| ... | @@ -681,7 +688,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -681,7 +688,7 @@ class Command(BaseCommand, LoggerMixin): |
| 681 | setattr(doc, field, count) | 688 | setattr(doc, field, count) |
| 682 | doc.save() | 689 | doc.save() |
| 683 | self.cronjob_log.info('{0} [process complete] [task={1}]'.format(self.log_base, task_str)) | 690 | self.cronjob_log.info('{0} [process complete] [task={1}]'.format(self.log_base, task_str)) |
| 684 | # os.remove(excel_path) | 691 | os.remove(excel_path) |
| 685 | except Exception as e: | 692 | except Exception as e: |
| 686 | self.cronjob_log.error('{0} [process error (completed)] [task={1}] [error={2}]'.format( | 693 | self.cronjob_log.error('{0} [process error (completed)] [task={1}] [error={2}]'.format( |
| 687 | self.log_base, task_str, traceback.format_exc())) | 694 | self.log_base, task_str, traceback.format_exc())) |
| ... | @@ -695,21 +702,22 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -695,21 +702,22 @@ class Command(BaseCommand, LoggerMixin): |
| 695 | def handle(self, *args, **kwargs): | 702 | def handle(self, *args, **kwargs): |
| 696 | lock = Lock() | 703 | lock = Lock() |
| 697 | with Manager() as manager: | 704 | with Manager() as manager: |
| 705 | error_list = manager.list() | ||
| 698 | todo_count_dict = manager.dict() | 706 | todo_count_dict = manager.dict() |
| 699 | res_dict = manager.dict() | 707 | res_dict = manager.dict() |
| 700 | img_queue = Queue(self.img_queue_size) | 708 | img_queue = Queue(self.img_queue_size) |
| 701 | finish_queue = Queue() | 709 | finish_queue = Queue() |
| 702 | 710 | ||
| 703 | process_list = [] | 711 | process_list = [] |
| 704 | pdf_process = Process(target=self.pdf_2_img_2_queue, args=(img_queue, todo_count_dict, lock)) | 712 | pdf_process = Process(target=self.pdf_2_img_2_queue, args=(img_queue, todo_count_dict, lock, error_list)) |
| 705 | process_list.append(pdf_process) | 713 | process_list.append(pdf_process) |
| 706 | 714 | ||
| 707 | for url in self.ocr_1_urls.values(): | 715 | for url in self.ocr_1_urls.values(): |
| 708 | ocr_1_process = Process(target=self.img_2_ocr_1, args=( | 716 | ocr_1_process = Process(target=self.img_2_ocr_1, args=( |
| 709 | img_queue, todo_count_dict, res_dict, finish_queue, lock, url)) | 717 | img_queue, todo_count_dict, res_dict, finish_queue, lock, url, error_list)) |
| 710 | process_list.append(ocr_1_process) | 718 | process_list.append(ocr_1_process) |
| 711 | 719 | ||
| 712 | wb_process = Process(target=self.res_2_wb, args=(res_dict, finish_queue, lock)) | 720 | wb_process = Process(target=self.res_2_wb, args=(res_dict, img_queue, finish_queue, lock, error_list)) |
| 713 | process_list.append(wb_process) | 721 | process_list.append(wb_process) |
| 714 | 722 | ||
| 715 | for p in process_list: | 723 | for p in process_list: | ... | ... |
-
Please register or sign in to post a comment