fix flow
Showing
1 changed file
with
64 additions
and
46 deletions
| ... | @@ -55,8 +55,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -55,8 +55,7 @@ class Command(BaseCommand, LoggerMixin): | 
| 55 | def signal_handler(self, sig, frame): | 55 | def signal_handler(self, sig, frame): | 
| 56 | self.switch = False # 停止处理文件 | 56 | self.switch = False # 停止处理文件 | 
| 57 | 57 | ||
| 58 | @staticmethod | 58 | def get_doc_object(self, task_str): | 
| 59 | def get_doc_object(task_str): | ||
| 60 | business_type, doc_id_str = task_str.split(consts.SPLIT_STR) | 59 | business_type, doc_id_str = task_str.split(consts.SPLIT_STR) | 
| 61 | doc_id = int(doc_id_str) | 60 | doc_id = int(doc_id_str) | 
| 62 | doc_class = HILDoc if business_type == consts.HIL_PREFIX else AFCDoc | 61 | doc_class = HILDoc if business_type == consts.HIL_PREFIX else AFCDoc | 
| ... | @@ -71,28 +70,30 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -71,28 +70,30 @@ class Command(BaseCommand, LoggerMixin): | 
| 71 | self.cronjob_log.info('{0} [get_doc_info] [queue empty]'.format(self.log_base)) | 70 | self.cronjob_log.info('{0} [get_doc_info] [queue empty]'.format(self.log_base)) | 
| 72 | return None, None, None | 71 | return None, None, None | 
| 73 | 72 | ||
| 74 | self.cronjob_log.info('{0} [get_doc_info success] [task={1}] [is_priority={2}]'.format(self.log_base, task_str, is_priority)) | 73 | self.cronjob_log.info('{0} [get_doc_info] [task={1}] [is_priority={2}]'.format( | 
| 74 | self.log_base, task_str, is_priority)) | ||
| 75 | try: | 75 | try: | 
| 76 | doc, business_type = self.get_doc_object(task_str) | 76 | doc, business_type = self.get_doc_object(task_str) | 
| 77 | if doc is None: | ||
| 78 | self.cronjob_log.warn('{0} [get_doc_info] [doc not exist] [task_str={1}] [is_priority={2}]'.format( | ||
| 79 | self.log_base, task_str, is_priority)) | ||
| 80 | return None, None, None | ||
| 81 | elif doc.status != DocStatus.INIT.value: | ||
| 82 | self.cronjob_log.warn('{0} [get_doc_info] [doc status error] [task_str={1}] [is_priority={2}] ' | ||
| 83 | '[doc_status={3}]'.format(self.log_base, task_str, is_priority, doc.status)) | ||
| 84 | return None, None, None | ||
| 85 | doc.status = DocStatus.PROCESSING.value | ||
| 86 | doc.start_time = timezone.now() | ||
| 87 | doc.save() | ||
| 77 | except Exception as e: | 88 | except Exception as e: | 
| 78 | rh.enqueue([task_str], is_priority) | 89 | rh.enqueue([task_str], is_priority) | 
| 79 | self.cronjob_log.error('{0} [process error (get doc info in)] [error={1}]'.format(self.log_base, traceback.format_exc())) | 90 | self.cronjob_log.error('{0} [process error (get doc info in)] [error={1}]'.format( | 
| 91 | self.log_base, traceback.format_exc())) | ||
| 80 | raise e | 92 | raise e | 
| 81 | 93 | else: | |
| 82 | if doc is None: | 94 | self.cronjob_log.info('{0} [get_doc_info] [db save end] [task_str={1}] [is_priority={2}]'.format( | 
| 83 | self.cronjob_log.warn('{0} [get_doc_info] [doc not exist] [task_str={1}] [is_priority={2}]'.format( | ||
| 84 | self.log_base, task_str, is_priority)) | 95 | self.log_base, task_str, is_priority)) | 
| 85 | return None, None, None | 96 | return doc, business_type, task_str | 
| 86 | elif doc.status != DocStatus.INIT.value: | ||
| 87 | self.cronjob_log.warn('{0} [get_doc_info] [doc status error] [task_str={1}] [is_priority={2}] ' | ||
| 88 | '[doc_status={3}]'.format(self.log_base, task_str, is_priority, doc.status)) | ||
| 89 | return None, None, None | ||
| 90 | doc.status = DocStatus.PROCESSING.value | ||
| 91 | doc.start_time = timezone.now() | ||
| 92 | doc.save() | ||
| 93 | self.cronjob_log.info('{0} [get_doc_info] [success] [task_str={1}] [is_priority={2}]'.format( | ||
| 94 | self.log_base, task_str, is_priority)) | ||
| 95 | return doc, business_type, task_str | ||
| 96 | 97 | ||
| 97 | def pdf_download(self, doc, pdf_path): | 98 | def pdf_download(self, doc, pdf_path): | 
| 98 | if not doc.application_id.startswith(consts.FIXED_APPLICATION_ID_PREFIX): | 99 | if not doc.application_id.startswith(consts.FIXED_APPLICATION_ID_PREFIX): | 
| ... | @@ -210,21 +211,18 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -210,21 +211,18 @@ class Command(BaseCommand, LoggerMixin): | 
| 210 | else: | 211 | else: | 
| 211 | res_list.append((pno, ino, part_idx, consts.RES_FAILED_2)) | 212 | res_list.append((pno, ino, part_idx, consts.RES_FAILED_2)) | 
| 212 | 213 | ||
| 213 | @staticmethod | 214 | def parse_img_path(self, img_path): | 
| 214 | def parse_img_path(img_path): | ||
| 215 | img_name, _ = os.path.splitext(os.path.basename(img_path)) | 215 | img_name, _ = os.path.splitext(os.path.basename(img_path)) | 
| 216 | part_list = img_name.split('_') | 216 | part_list = img_name.split('_') | 
| 217 | # page_7_img_11_0 | 217 | # page_7_img_11_0 | 
| 218 | return int(part_list[1])+1, int(part_list[3])+1 | 218 | return int(part_list[1])+1, int(part_list[3])+1 | 
| 219 | 219 | ||
| 220 | @staticmethod | 220 | def get_most(self, value_list): | 
| 221 | def get_most(value_list): | ||
| 222 | if value_list: | 221 | if value_list: | 
| 223 | most_common = Counter(value_list).most_common(1) | 222 | most_common = Counter(value_list).most_common(1) | 
| 224 | return most_common[0][0] if most_common else None | 223 | return most_common[0][0] if most_common else None | 
| 225 | 224 | ||
| 226 | @staticmethod | 225 | def date_format(self, date_str, format_str): | 
| 227 | def date_format(date_str, format_str): | ||
| 228 | try: | 226 | try: | 
| 229 | date_res = datetime.strptime(date_str, format_str).date() | 227 | date_res = datetime.strptime(date_str, format_str).date() | 
| 230 | except Exception as e: | 228 | except Exception as e: | 
| ... | @@ -402,23 +400,41 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -402,23 +400,41 @@ class Command(BaseCommand, LoggerMixin): | 
| 402 | self.cronjob_log.info('{0} [pdf to img end] [task={1}] [spend_time={2}]'.format( | 400 | self.cronjob_log.info('{0} [pdf to img end] [task={1}] [spend_time={2}]'.format( | 
| 403 | self.log_base, task_str, speed_time)) | 401 | self.log_base, task_str, speed_time)) | 
| 404 | 402 | ||
| 405 | with lock: | 403 | img_count = len(pdf_handler.img_path_list) | 
| 406 | todo_count_dict[task_str] = len(pdf_handler.img_path_list) | 404 | if img_count == 0: | 
| 407 | for img_path in pdf_handler.img_path_list: | 405 | self.cronjob_log.warn('{0} [pdf to img failed (pdf img empty)] [task={1}]'.format( | 
| 408 | while img_queue.full(): | 406 | self.log_base, task_str)) | 
| 409 | self.cronjob_log.info('{0} [pdf_2_img_2_queue] [img queue full]'.format(self.log_base)) | 407 | raise Exception('pdf img empty') | 
| 410 | time.sleep(self.sleep_time_img_put) | 408 | else: | 
| 411 | img_queue.put(img_path) | 409 | with lock: | 
| 410 | todo_count_dict[task_str] = img_count | ||
| 411 | for img_path in pdf_handler.img_path_list: | ||
| 412 | while img_queue.full(): | ||
| 413 | self.cronjob_log.info('{0} [pdf_2_img_2_queue] [img queue full]'.format(self.log_base)) | ||
| 414 | time.sleep(self.sleep_time_img_put) | ||
| 415 | img_queue.put(img_path) | ||
| 412 | except EDMSException as e: | 416 | except EDMSException as e: | 
| 413 | doc.status = DocStatus.PROCESS_FAILED.value | 417 | try: | 
| 414 | doc.save() | 418 | doc.status = DocStatus.PROCESS_FAILED.value | 
| 415 | self.cronjob_log.error('{0} [process failed (edms download)] [task={1}] [error={2}]'.format( | 419 | doc.save() | 
| 416 | self.log_base, task_str, traceback.format_exc())) | 420 | self.cronjob_log.warn('{0} [process failed (edms download)] [task={1}] [error={2}]'.format( | 
| 421 | self.log_base, task_str, traceback.format_exc())) | ||
| 422 | except Exception as e: | ||
| 423 | self.cronjob_log.error('{0} [process error (db save 1)] [error={1}]'.format( | ||
| 424 | self.log_base, traceback.format_exc())) | ||
| 425 | error_list.append(1) | ||
| 426 | return | ||
| 417 | except Exception as e: | 427 | except Exception as e: | 
| 418 | doc.status = DocStatus.PROCESS_FAILED.value | 428 | try: | 
| 419 | doc.save() | 429 | doc.status = DocStatus.PROCESS_FAILED.value | 
| 420 | self.cronjob_log.error('{0} [process failed (pdf to img)] [task={1}] [error={2}]'.format( | 430 | doc.save() | 
| 421 | self.log_base, task_str, traceback.format_exc())) | 431 | self.cronjob_log.warn('{0} [process failed (pdf to img)] [task={1}] [error={2}]'.format( | 
| 432 | self.log_base, task_str, traceback.format_exc())) | ||
| 433 | except Exception as e: | ||
| 434 | self.cronjob_log.error('{0} [process error (db save 2)] [error={1}]'.format( | ||
| 435 | self.log_base, traceback.format_exc())) | ||
| 436 | error_list.append(1) | ||
| 437 | return | ||
| 422 | 438 | ||
| 423 | def img_2_ocr_1(self, img_queue, todo_count_dict, res_dict, finish_queue, lock, url, error_list): | 439 | def img_2_ocr_1(self, img_queue, todo_count_dict, res_dict, finish_queue, lock, url, error_list): | 
| 424 | while len(error_list) == 0 or not img_queue.empty(): | 440 | while len(error_list) == 0 or not img_queue.empty(): | 
| ... | @@ -447,8 +463,9 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -447,8 +463,9 @@ class Command(BaseCommand, LoggerMixin): | 
| 447 | if ocr_1_response.status_code != 200: | 463 | if ocr_1_response.status_code != 200: | 
| 448 | raise OCR1Exception('ocr_1 status code: {0}'.format(ocr_1_response.status_code)) | 464 | raise OCR1Exception('ocr_1 status code: {0}'.format(ocr_1_response.status_code)) | 
| 449 | except Exception as e: | 465 | except Exception as e: | 
| 450 | self.cronjob_log.warn('{0} [ocr_1 failed] [times={1}] [img_path={2}] [error={3}]'.format( | 466 | self.cronjob_log.warn('{0} [ocr_1 failed] [times={1}] [url={2}] [img_path={3}] ' | 
| 451 | self.log_base, times, img_path, traceback.format_exc())) | 467 | '[error={4}]'.format(self.log_base, times, url, img_path, | 
| 468 | traceback.format_exc())) | ||
| 452 | else: | 469 | else: | 
| 453 | ocr_1_res = ocr_1_response.json() | 470 | ocr_1_res = ocr_1_response.json() | 
| 454 | end_time = time.time() | 471 | end_time = time.time() | 
| ... | @@ -458,7 +475,8 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -458,7 +475,8 @@ class Command(BaseCommand, LoggerMixin): | 
| 458 | break | 475 | break | 
| 459 | else: | 476 | else: | 
| 460 | ocr_1_res = {} | 477 | ocr_1_res = {} | 
| 461 | self.cronjob_log.warn('{0} [ocr_1 failed] [img_path={1}]'.format(self.log_base, img_path)) | 478 | self.cronjob_log.warn('{0} [ocr_1 failed] [img_path={1}] [url={2}]'.format( | 
| 479 | self.log_base, img_path, url)) | ||
| 462 | # continue | 480 | # continue | 
| 463 | except Exception as e: | 481 | except Exception as e: | 
| 464 | self.cronjob_log.error('{0} [process error (ocr fetch)] [img_path={1}] [error={2}]'.format( | 482 | self.cronjob_log.error('{0} [process error (ocr fetch)] [img_path={1}] [error={2}]'.format( | 
| ... | @@ -521,14 +539,14 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -521,14 +539,14 @@ class Command(BaseCommand, LoggerMixin): | 
| 521 | ocr_data_list = res.get('data', []) | 539 | ocr_data_list = res.get('data', []) | 
| 522 | if not isinstance(ocr_data_list, list): | 540 | if not isinstance(ocr_data_list, list): | 
| 523 | res_list.append((pno, ino, part_idx, consts.RES_FAILED_3)) | 541 | res_list.append((pno, ino, part_idx, consts.RES_FAILED_3)) | 
| 524 | self.cronjob_log.info('{0} [ocr_1 res error] [img={1}]'.format(self.log_base, img_path)) | 542 | self.cronjob_log.warn('{0} [ocr_1 res error] [img={1}]'.format(self.log_base, img_path)) | 
| 525 | else: | 543 | else: | 
| 526 | for part_idx, ocr_data in enumerate(ocr_data_list): | 544 | for part_idx, ocr_data in enumerate(ocr_data_list): | 
| 527 | part_idx = part_idx + 1 | 545 | part_idx = part_idx + 1 | 
| 528 | classify = ocr_data.get('classify') | 546 | classify = ocr_data.get('classify') | 
| 529 | if classify is None: | 547 | if classify is None: | 
| 530 | res_list.append((pno, ino, part_idx, consts.RES_FAILED_3)) | 548 | res_list.append((pno, ino, part_idx, consts.RES_FAILED_3)) | 
| 531 | self.cronjob_log.info('{0} [ocr_1 res error] [img={1}]'.format( | 549 | self.cronjob_log.warn('{0} [ocr_1 res error] [img={1}]'.format( | 
| 532 | self.log_base, img_path)) | 550 | self.log_base, img_path)) | 
| 533 | continue | 551 | continue | 
| 534 | elif classify in consts.OTHER_CLASSIFY_SET: # 其他类 | 552 | elif classify in consts.OTHER_CLASSIFY_SET: # 其他类 | 
| ... | @@ -624,7 +642,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -624,7 +642,7 @@ class Command(BaseCommand, LoggerMixin): | 
| 624 | doc, business_type = self.get_doc_object(task_str) | 642 | doc, business_type = self.get_doc_object(task_str) | 
| 625 | doc.status = DocStatus.PROCESS_FAILED.value | 643 | doc.status = DocStatus.PROCESS_FAILED.value | 
| 626 | doc.save() | 644 | doc.save() | 
| 627 | self.cronjob_log.error('{0} [process failed (res to wb)] [task={1}] [error={2}]'.format( | 645 | self.cronjob_log.warn('{0} [process failed (res to wb)] [task={1}] [error={2}]'.format( | 
| 628 | self.log_base, task_str, traceback.format_exc())) | 646 | self.log_base, task_str, traceback.format_exc())) | 
| 629 | except Exception as e: | 647 | except Exception as e: | 
| 630 | self.cronjob_log.error('{0} [process error (wb end)] [task={1}] [error={2}]'.format( | 648 | self.cronjob_log.error('{0} [process error (wb end)] [task={1}] [error={2}]'.format( | 
| ... | @@ -673,7 +691,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -673,7 +691,7 @@ class Command(BaseCommand, LoggerMixin): | 
| 673 | if hasattr(doc, field): | 691 | if hasattr(doc, field): | 
| 674 | setattr(doc, field, count) | 692 | setattr(doc, field, count) | 
| 675 | doc.save() | 693 | doc.save() | 
| 676 | self.cronjob_log.error('{0} [process failed (edms upload)] [task={1}] [error={2}]'.format( | 694 | self.cronjob_log.warn('{0} [process failed (edms upload)] [task={1}] [error={2}]'.format( | 
| 677 | self.log_base, task_str, traceback.format_exc())) | 695 | self.log_base, task_str, traceback.format_exc())) | 
| 678 | except Exception as e: | 696 | except Exception as e: | 
| 679 | self.cronjob_log.error('{0} [process error (edms upload)] [task={1}] [error={2}]'.format( | 697 | self.cronjob_log.error('{0} [process error (edms upload)] [task={1}] [error={2}]'.format( | ... | ... | 
- 
Please register or sign in to post a comment