PROD Version
Showing
2 changed files
with
24 additions
and
16 deletions
... | @@ -8,7 +8,7 @@ PAGE_SIZE_DEFAULT = 10 | ... | @@ -8,7 +8,7 @@ PAGE_SIZE_DEFAULT = 10 |
8 | 8 | ||
9 | FIXED_APPLICATION_ID_PREFIX = 'CH-S' | 9 | FIXED_APPLICATION_ID_PREFIX = 'CH-S' |
10 | 10 | ||
11 | DOC_SCHEME_LIST = ['ACCEPTANCE', 'SETTLEMENT', 'CONTRACT MANAGEMENT'] | 11 | DOC_SCHEME_LIST = ['ACCEPTANCE', 'SETTLEMENT', 'CONTRACTMANAGEMENT'] |
12 | DATA_SOURCE_LIST = ['POS', 'EAPP', 'ECONTRACT'] | 12 | DATA_SOURCE_LIST = ['POS', 'EAPP', 'ECONTRACT'] |
13 | 13 | ||
14 | HIL_PREFIX = 'HIL' | 14 | HIL_PREFIX = 'HIL' |
... | @@ -31,7 +31,7 @@ DOWNLOAD_ACTION_TYPE = 'Downloaded' | ... | @@ -31,7 +31,7 @@ DOWNLOAD_ACTION_TYPE = 'Downloaded' |
31 | DOC_SCHEMA_ID_FILL = { | 31 | DOC_SCHEMA_ID_FILL = { |
32 | 'ACCEPTANCE': (1, 'DFE-AutoFilingScript'), | 32 | 'ACCEPTANCE': (1, 'DFE-AutoFilingScript'), |
33 | 'SETTLEMENT': (20, 'DFE-AutoFilingScript'), | 33 | 'SETTLEMENT': (20, 'DFE-AutoFilingScript'), |
34 | 'CONTRACT MANAGEMENT': (86, 'Schema-Based') | 34 | 'CONTRACTMANAGEMENT': (86, 'Schema-Based') |
35 | } | 35 | } |
36 | BUSINESS_TYPE_DICT = { | 36 | BUSINESS_TYPE_DICT = { |
37 | HIL_PREFIX: 'CO00002', | 37 | HIL_PREFIX: 'CO00002', | ... | ... |
... | @@ -72,7 +72,12 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -72,7 +72,12 @@ class Command(BaseCommand, LoggerMixin): |
72 | return None, None, None | 72 | return None, None, None |
73 | 73 | ||
74 | self.cronjob_log.info('{0} [get_doc_info success] [task={1}] [is_priority={2}]'.format(self.log_base, task_str, is_priority)) | 74 | self.cronjob_log.info('{0} [get_doc_info success] [task={1}] [is_priority={2}]'.format(self.log_base, task_str, is_priority)) |
75 | try: | ||
75 | doc, business_type = self.get_doc_object(task_str) | 76 | doc, business_type = self.get_doc_object(task_str) |
77 | except Exception as e: | ||
78 | rh.enqueue([task_str], is_priority) | ||
79 | self.cronjob_log.error('{0} [process error (get doc info in)] [error={1}]'.format(self.log_base, traceback.format_exc())) | ||
80 | raise e | ||
76 | 81 | ||
77 | if doc is None: | 82 | if doc is None: |
78 | self.cronjob_log.warn('{0} [get_doc_info] [doc not exist] [task_str={1}] [is_priority={2}]'.format( | 83 | self.cronjob_log.warn('{0} [get_doc_info] [doc not exist] [task_str={1}] [is_priority={2}]'.format( |
... | @@ -364,7 +369,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -364,7 +369,7 @@ class Command(BaseCommand, LoggerMixin): |
364 | # summary['confidence'] = max(summary['confidence']) | 369 | # summary['confidence'] = max(summary['confidence']) |
365 | return merged_bs_summary | 370 | return merged_bs_summary |
366 | 371 | ||
367 | def pdf_2_img_2_queue(self, img_queue, todo_count_dict, lock): | 372 | def pdf_2_img_2_queue(self, img_queue, todo_count_dict, lock, error_list): |
368 | while self.switch: | 373 | while self.switch: |
369 | try: | 374 | try: |
370 | # 1. 从队列获取文件信息 | 375 | # 1. 从队列获取文件信息 |
... | @@ -374,8 +379,10 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -374,8 +379,10 @@ class Command(BaseCommand, LoggerMixin): |
374 | time.sleep(self.sleep_time_doc_get) | 379 | time.sleep(self.sleep_time_doc_get) |
375 | continue | 380 | continue |
376 | except Exception as e: | 381 | except Exception as e: |
377 | self.cronjob_log.error('{0} [process failed (get doc into)] [error={1}]'.format( | 382 | self.cronjob_log.error('{0} [process error (get doc info out)] [error={1}]'.format( |
378 | self.log_base, traceback.format_exc())) | 383 | self.log_base, traceback.format_exc())) |
384 | error_list.append(1) | ||
385 | return | ||
379 | else: | 386 | else: |
380 | try: | 387 | try: |
381 | # 2. 从EDMS获取PDF文件 | 388 | # 2. 从EDMS获取PDF文件 |
... | @@ -413,8 +420,8 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -413,8 +420,8 @@ class Command(BaseCommand, LoggerMixin): |
413 | self.cronjob_log.error('{0} [process failed (pdf to img)] [task={1}] [error={2}]'.format( | 420 | self.cronjob_log.error('{0} [process failed (pdf to img)] [task={1}] [error={2}]'.format( |
414 | self.log_base, task_str, traceback.format_exc())) | 421 | self.log_base, task_str, traceback.format_exc())) |
415 | 422 | ||
416 | def img_2_ocr_1(self, img_queue, todo_count_dict, res_dict, finish_queue, lock, url): | 423 | def img_2_ocr_1(self, img_queue, todo_count_dict, res_dict, finish_queue, lock, url, error_list): |
417 | while True: | 424 | while len(error_list) == 0 or not img_queue.empty(): |
418 | try: | 425 | try: |
419 | img_path = img_queue.get(block=False) | 426 | img_path = img_queue.get(block=False) |
420 | except Exception as e: | 427 | except Exception as e: |
... | @@ -478,8 +485,8 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -478,8 +485,8 @@ class Command(BaseCommand, LoggerMixin): |
478 | self.cronjob_log.error('{0} [process error (store ocr res)] [img_path={1}] [error={2}]'.format( | 485 | self.cronjob_log.error('{0} [process error (store ocr res)] [img_path={1}] [error={2}]'.format( |
479 | self.log_base, img_path, traceback.format_exc())) | 486 | self.log_base, img_path, traceback.format_exc())) |
480 | 487 | ||
481 | def res_2_wb(self, res_dict, finish_queue, lock): | 488 | def res_2_wb(self, res_dict, img_queue, finish_queue, lock, error_list): |
482 | while True: | 489 | while len(error_list) == 0 or not img_queue.empty() or not finish_queue.empty(): |
483 | try: | 490 | try: |
484 | task_str = finish_queue.get(block=False) | 491 | task_str = finish_queue.get(block=False) |
485 | except Exception as e: | 492 | except Exception as e: |
... | @@ -605,8 +612,8 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -605,8 +612,8 @@ class Command(BaseCommand, LoggerMixin): |
605 | doc, business_type = self.get_doc_object(task_str) | 612 | doc, business_type = self.get_doc_object(task_str) |
606 | doc_data_path = os.path.join(self.data_dir, business_type, consts.TMP_DIR_NAME, str(doc.id)) | 613 | doc_data_path = os.path.join(self.data_dir, business_type, consts.TMP_DIR_NAME, str(doc.id)) |
607 | excel_path = os.path.join(doc_data_path, '{0}.xlsx'.format(doc.id)) | 614 | excel_path = os.path.join(doc_data_path, '{0}.xlsx'.format(doc.id)) |
608 | src_excel_path = os.path.join(doc_data_path, 'src.xlsx') | 615 | # src_excel_path = os.path.join(doc_data_path, 'src.xlsx') |
609 | wb.save(src_excel_path) | 616 | # wb.save(src_excel_path) |
610 | count_list = wb.rebuild(merged_bs_summary, license_summary, res_list, doc.document_scheme) | 617 | count_list = wb.rebuild(merged_bs_summary, license_summary, res_list, doc.document_scheme) |
611 | wb.save(excel_path) | 618 | wb.save(excel_path) |
612 | except Exception as e: | 619 | except Exception as e: |
... | @@ -637,8 +644,8 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -637,8 +644,8 @@ class Command(BaseCommand, LoggerMixin): |
637 | img_save_path = os.path.join(doc_data_path, 'img') | 644 | img_save_path = os.path.join(doc_data_path, 'img') |
638 | write_zip_file(img_save_path, os.path.join(doc_data_path, '{0}_img.zip'.format(doc.id))) | 645 | write_zip_file(img_save_path, os.path.join(doc_data_path, '{0}_img.zip'.format(doc.id))) |
639 | shutil.rmtree(img_save_path, ignore_errors=True) | 646 | shutil.rmtree(img_save_path, ignore_errors=True) |
640 | # pdf_path = os.path.join(doc_data_path, '{0}.pdf'.format(doc.id)) | 647 | pdf_path = os.path.join(doc_data_path, '{0}.pdf'.format(doc.id)) |
641 | # os.remove(pdf_path) | 648 | os.remove(pdf_path) |
642 | # os.remove(src_excel_path) | 649 | # os.remove(src_excel_path) |
643 | except Exception as e: | 650 | except Exception as e: |
644 | self.cronjob_log.error('{0} [process error (file remove 2)] [task={1}] [error={2}]'.format( | 651 | self.cronjob_log.error('{0} [process error (file remove 2)] [task={1}] [error={2}]'.format( |
... | @@ -681,7 +688,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -681,7 +688,7 @@ class Command(BaseCommand, LoggerMixin): |
681 | setattr(doc, field, count) | 688 | setattr(doc, field, count) |
682 | doc.save() | 689 | doc.save() |
683 | self.cronjob_log.info('{0} [process complete] [task={1}]'.format(self.log_base, task_str)) | 690 | self.cronjob_log.info('{0} [process complete] [task={1}]'.format(self.log_base, task_str)) |
684 | # os.remove(excel_path) | 691 | os.remove(excel_path) |
685 | except Exception as e: | 692 | except Exception as e: |
686 | self.cronjob_log.error('{0} [process error (completed)] [task={1}] [error={2}]'.format( | 693 | self.cronjob_log.error('{0} [process error (completed)] [task={1}] [error={2}]'.format( |
687 | self.log_base, task_str, traceback.format_exc())) | 694 | self.log_base, task_str, traceback.format_exc())) |
... | @@ -695,21 +702,22 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -695,21 +702,22 @@ class Command(BaseCommand, LoggerMixin): |
695 | def handle(self, *args, **kwargs): | 702 | def handle(self, *args, **kwargs): |
696 | lock = Lock() | 703 | lock = Lock() |
697 | with Manager() as manager: | 704 | with Manager() as manager: |
705 | error_list = manager.list() | ||
698 | todo_count_dict = manager.dict() | 706 | todo_count_dict = manager.dict() |
699 | res_dict = manager.dict() | 707 | res_dict = manager.dict() |
700 | img_queue = Queue(self.img_queue_size) | 708 | img_queue = Queue(self.img_queue_size) |
701 | finish_queue = Queue() | 709 | finish_queue = Queue() |
702 | 710 | ||
703 | process_list = [] | 711 | process_list = [] |
704 | pdf_process = Process(target=self.pdf_2_img_2_queue, args=(img_queue, todo_count_dict, lock)) | 712 | pdf_process = Process(target=self.pdf_2_img_2_queue, args=(img_queue, todo_count_dict, lock, error_list)) |
705 | process_list.append(pdf_process) | 713 | process_list.append(pdf_process) |
706 | 714 | ||
707 | for url in self.ocr_1_urls.values(): | 715 | for url in self.ocr_1_urls.values(): |
708 | ocr_1_process = Process(target=self.img_2_ocr_1, args=( | 716 | ocr_1_process = Process(target=self.img_2_ocr_1, args=( |
709 | img_queue, todo_count_dict, res_dict, finish_queue, lock, url)) | 717 | img_queue, todo_count_dict, res_dict, finish_queue, lock, url, error_list)) |
710 | process_list.append(ocr_1_process) | 718 | process_list.append(ocr_1_process) |
711 | 719 | ||
712 | wb_process = Process(target=self.res_2_wb, args=(res_dict, finish_queue, lock)) | 720 | wb_process = Process(target=self.res_2_wb, args=(res_dict, img_queue, finish_queue, lock, error_list)) |
713 | process_list.append(wb_process) | 721 | process_list.append(wb_process) |
714 | 722 | ||
715 | for p in process_list: | 723 | for p in process_list: | ... | ... |
-
Please register or sign in to post a comment