ec638e4f by 周伟奇

PROD Version

1 parent 364772ed
...@@ -8,7 +8,7 @@ PAGE_SIZE_DEFAULT = 10 ...@@ -8,7 +8,7 @@ PAGE_SIZE_DEFAULT = 10
8 8
9 FIXED_APPLICATION_ID_PREFIX = 'CH-S' 9 FIXED_APPLICATION_ID_PREFIX = 'CH-S'
10 10
11 DOC_SCHEME_LIST = ['ACCEPTANCE', 'SETTLEMENT', 'CONTRACT MANAGEMENT'] 11 DOC_SCHEME_LIST = ['ACCEPTANCE', 'SETTLEMENT', 'CONTRACTMANAGEMENT']
12 DATA_SOURCE_LIST = ['POS', 'EAPP', 'ECONTRACT'] 12 DATA_SOURCE_LIST = ['POS', 'EAPP', 'ECONTRACT']
13 13
14 HIL_PREFIX = 'HIL' 14 HIL_PREFIX = 'HIL'
...@@ -31,7 +31,7 @@ DOWNLOAD_ACTION_TYPE = 'Downloaded' ...@@ -31,7 +31,7 @@ DOWNLOAD_ACTION_TYPE = 'Downloaded'
31 DOC_SCHEMA_ID_FILL = { 31 DOC_SCHEMA_ID_FILL = {
32 'ACCEPTANCE': (1, 'DFE-AutoFilingScript'), 32 'ACCEPTANCE': (1, 'DFE-AutoFilingScript'),
33 'SETTLEMENT': (20, 'DFE-AutoFilingScript'), 33 'SETTLEMENT': (20, 'DFE-AutoFilingScript'),
34 'CONTRACT MANAGEMENT': (86, 'Schema-Based') 34 'CONTRACTMANAGEMENT': (86, 'Schema-Based')
35 } 35 }
36 BUSINESS_TYPE_DICT = { 36 BUSINESS_TYPE_DICT = {
37 HIL_PREFIX: 'CO00002', 37 HIL_PREFIX: 'CO00002',
......
...@@ -72,7 +72,12 @@ class Command(BaseCommand, LoggerMixin): ...@@ -72,7 +72,12 @@ class Command(BaseCommand, LoggerMixin):
72 return None, None, None 72 return None, None, None
73 73
74 self.cronjob_log.info('{0} [get_doc_info success] [task={1}] [is_priority={2}]'.format(self.log_base, task_str, is_priority)) 74 self.cronjob_log.info('{0} [get_doc_info success] [task={1}] [is_priority={2}]'.format(self.log_base, task_str, is_priority))
75 try:
75 doc, business_type = self.get_doc_object(task_str) 76 doc, business_type = self.get_doc_object(task_str)
77 except Exception as e:
78 rh.enqueue([task_str], is_priority)
79 self.cronjob_log.error('{0} [process error (get doc info in)] [error={1}]'.format(self.log_base, traceback.format_exc()))
80 raise e
76 81
77 if doc is None: 82 if doc is None:
78 self.cronjob_log.warn('{0} [get_doc_info] [doc not exist] [task_str={1}] [is_priority={2}]'.format( 83 self.cronjob_log.warn('{0} [get_doc_info] [doc not exist] [task_str={1}] [is_priority={2}]'.format(
...@@ -364,7 +369,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -364,7 +369,7 @@ class Command(BaseCommand, LoggerMixin):
364 # summary['confidence'] = max(summary['confidence']) 369 # summary['confidence'] = max(summary['confidence'])
365 return merged_bs_summary 370 return merged_bs_summary
366 371
367 def pdf_2_img_2_queue(self, img_queue, todo_count_dict, lock): 372 def pdf_2_img_2_queue(self, img_queue, todo_count_dict, lock, error_list):
368 while self.switch: 373 while self.switch:
369 try: 374 try:
370 # 1. 从队列获取文件信息 375 # 1. 从队列获取文件信息
...@@ -374,8 +379,10 @@ class Command(BaseCommand, LoggerMixin): ...@@ -374,8 +379,10 @@ class Command(BaseCommand, LoggerMixin):
374 time.sleep(self.sleep_time_doc_get) 379 time.sleep(self.sleep_time_doc_get)
375 continue 380 continue
376 except Exception as e: 381 except Exception as e:
377 self.cronjob_log.error('{0} [process failed (get doc into)] [error={1}]'.format( 382 self.cronjob_log.error('{0} [process error (get doc info out)] [error={1}]'.format(
378 self.log_base, traceback.format_exc())) 383 self.log_base, traceback.format_exc()))
384 error_list.append(1)
385 return
379 else: 386 else:
380 try: 387 try:
381 # 2. 从EDMS获取PDF文件 388 # 2. 从EDMS获取PDF文件
...@@ -413,8 +420,8 @@ class Command(BaseCommand, LoggerMixin): ...@@ -413,8 +420,8 @@ class Command(BaseCommand, LoggerMixin):
413 self.cronjob_log.error('{0} [process failed (pdf to img)] [task={1}] [error={2}]'.format( 420 self.cronjob_log.error('{0} [process failed (pdf to img)] [task={1}] [error={2}]'.format(
414 self.log_base, task_str, traceback.format_exc())) 421 self.log_base, task_str, traceback.format_exc()))
415 422
416 def img_2_ocr_1(self, img_queue, todo_count_dict, res_dict, finish_queue, lock, url): 423 def img_2_ocr_1(self, img_queue, todo_count_dict, res_dict, finish_queue, lock, url, error_list):
417 while True: 424 while len(error_list) == 0 or not img_queue.empty():
418 try: 425 try:
419 img_path = img_queue.get(block=False) 426 img_path = img_queue.get(block=False)
420 except Exception as e: 427 except Exception as e:
...@@ -478,8 +485,8 @@ class Command(BaseCommand, LoggerMixin): ...@@ -478,8 +485,8 @@ class Command(BaseCommand, LoggerMixin):
478 self.cronjob_log.error('{0} [process error (store ocr res)] [img_path={1}] [error={2}]'.format( 485 self.cronjob_log.error('{0} [process error (store ocr res)] [img_path={1}] [error={2}]'.format(
479 self.log_base, img_path, traceback.format_exc())) 486 self.log_base, img_path, traceback.format_exc()))
480 487
481 def res_2_wb(self, res_dict, finish_queue, lock): 488 def res_2_wb(self, res_dict, img_queue, finish_queue, lock, error_list):
482 while True: 489 while len(error_list) == 0 or not img_queue.empty() or not finish_queue.empty():
483 try: 490 try:
484 task_str = finish_queue.get(block=False) 491 task_str = finish_queue.get(block=False)
485 except Exception as e: 492 except Exception as e:
...@@ -605,8 +612,8 @@ class Command(BaseCommand, LoggerMixin): ...@@ -605,8 +612,8 @@ class Command(BaseCommand, LoggerMixin):
605 doc, business_type = self.get_doc_object(task_str) 612 doc, business_type = self.get_doc_object(task_str)
606 doc_data_path = os.path.join(self.data_dir, business_type, consts.TMP_DIR_NAME, str(doc.id)) 613 doc_data_path = os.path.join(self.data_dir, business_type, consts.TMP_DIR_NAME, str(doc.id))
607 excel_path = os.path.join(doc_data_path, '{0}.xlsx'.format(doc.id)) 614 excel_path = os.path.join(doc_data_path, '{0}.xlsx'.format(doc.id))
608 src_excel_path = os.path.join(doc_data_path, 'src.xlsx') 615 # src_excel_path = os.path.join(doc_data_path, 'src.xlsx')
609 wb.save(src_excel_path) 616 # wb.save(src_excel_path)
610 count_list = wb.rebuild(merged_bs_summary, license_summary, res_list, doc.document_scheme) 617 count_list = wb.rebuild(merged_bs_summary, license_summary, res_list, doc.document_scheme)
611 wb.save(excel_path) 618 wb.save(excel_path)
612 except Exception as e: 619 except Exception as e:
...@@ -637,8 +644,8 @@ class Command(BaseCommand, LoggerMixin): ...@@ -637,8 +644,8 @@ class Command(BaseCommand, LoggerMixin):
637 img_save_path = os.path.join(doc_data_path, 'img') 644 img_save_path = os.path.join(doc_data_path, 'img')
638 write_zip_file(img_save_path, os.path.join(doc_data_path, '{0}_img.zip'.format(doc.id))) 645 write_zip_file(img_save_path, os.path.join(doc_data_path, '{0}_img.zip'.format(doc.id)))
639 shutil.rmtree(img_save_path, ignore_errors=True) 646 shutil.rmtree(img_save_path, ignore_errors=True)
640 # pdf_path = os.path.join(doc_data_path, '{0}.pdf'.format(doc.id)) 647 pdf_path = os.path.join(doc_data_path, '{0}.pdf'.format(doc.id))
641 # os.remove(pdf_path) 648 os.remove(pdf_path)
642 # os.remove(src_excel_path) 649 # os.remove(src_excel_path)
643 except Exception as e: 650 except Exception as e:
644 self.cronjob_log.error('{0} [process error (file remove 2)] [task={1}] [error={2}]'.format( 651 self.cronjob_log.error('{0} [process error (file remove 2)] [task={1}] [error={2}]'.format(
...@@ -681,7 +688,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -681,7 +688,7 @@ class Command(BaseCommand, LoggerMixin):
681 setattr(doc, field, count) 688 setattr(doc, field, count)
682 doc.save() 689 doc.save()
683 self.cronjob_log.info('{0} [process complete] [task={1}]'.format(self.log_base, task_str)) 690 self.cronjob_log.info('{0} [process complete] [task={1}]'.format(self.log_base, task_str))
684 # os.remove(excel_path) 691 os.remove(excel_path)
685 except Exception as e: 692 except Exception as e:
686 self.cronjob_log.error('{0} [process error (completed)] [task={1}] [error={2}]'.format( 693 self.cronjob_log.error('{0} [process error (completed)] [task={1}] [error={2}]'.format(
687 self.log_base, task_str, traceback.format_exc())) 694 self.log_base, task_str, traceback.format_exc()))
...@@ -695,21 +702,22 @@ class Command(BaseCommand, LoggerMixin): ...@@ -695,21 +702,22 @@ class Command(BaseCommand, LoggerMixin):
695 def handle(self, *args, **kwargs): 702 def handle(self, *args, **kwargs):
696 lock = Lock() 703 lock = Lock()
697 with Manager() as manager: 704 with Manager() as manager:
705 error_list = manager.list()
698 todo_count_dict = manager.dict() 706 todo_count_dict = manager.dict()
699 res_dict = manager.dict() 707 res_dict = manager.dict()
700 img_queue = Queue(self.img_queue_size) 708 img_queue = Queue(self.img_queue_size)
701 finish_queue = Queue() 709 finish_queue = Queue()
702 710
703 process_list = [] 711 process_list = []
704 pdf_process = Process(target=self.pdf_2_img_2_queue, args=(img_queue, todo_count_dict, lock)) 712 pdf_process = Process(target=self.pdf_2_img_2_queue, args=(img_queue, todo_count_dict, lock, error_list))
705 process_list.append(pdf_process) 713 process_list.append(pdf_process)
706 714
707 for url in self.ocr_1_urls.values(): 715 for url in self.ocr_1_urls.values():
708 ocr_1_process = Process(target=self.img_2_ocr_1, args=( 716 ocr_1_process = Process(target=self.img_2_ocr_1, args=(
709 img_queue, todo_count_dict, res_dict, finish_queue, lock, url)) 717 img_queue, todo_count_dict, res_dict, finish_queue, lock, url, error_list))
710 process_list.append(ocr_1_process) 718 process_list.append(ocr_1_process)
711 719
712 wb_process = Process(target=self.res_2_wb, args=(res_dict, finish_queue, lock)) 720 wb_process = Process(target=self.res_2_wb, args=(res_dict, img_queue, finish_queue, lock, error_list))
713 process_list.append(wb_process) 721 process_list.append(wb_process)
714 722
715 for p in process_list: 723 for p in process_list:
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!