fix flow
Showing
1 changed file
with
43 additions
and
25 deletions
... | @@ -55,8 +55,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -55,8 +55,7 @@ class Command(BaseCommand, LoggerMixin): |
55 | def signal_handler(self, sig, frame): | 55 | def signal_handler(self, sig, frame): |
56 | self.switch = False # 停止处理文件 | 56 | self.switch = False # 停止处理文件 |
57 | 57 | ||
58 | @staticmethod | 58 | def get_doc_object(self, task_str): |
59 | def get_doc_object(task_str): | ||
60 | business_type, doc_id_str = task_str.split(consts.SPLIT_STR) | 59 | business_type, doc_id_str = task_str.split(consts.SPLIT_STR) |
61 | doc_id = int(doc_id_str) | 60 | doc_id = int(doc_id_str) |
62 | doc_class = HILDoc if business_type == consts.HIL_PREFIX else AFCDoc | 61 | doc_class = HILDoc if business_type == consts.HIL_PREFIX else AFCDoc |
... | @@ -71,14 +70,10 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -71,14 +70,10 @@ class Command(BaseCommand, LoggerMixin): |
71 | self.cronjob_log.info('{0} [get_doc_info] [queue empty]'.format(self.log_base)) | 70 | self.cronjob_log.info('{0} [get_doc_info] [queue empty]'.format(self.log_base)) |
72 | return None, None, None | 71 | return None, None, None |
73 | 72 | ||
74 | self.cronjob_log.info('{0} [get_doc_info success] [task={1}] [is_priority={2}]'.format(self.log_base, task_str, is_priority)) | 73 | self.cronjob_log.info('{0} [get_doc_info] [task={1}] [is_priority={2}]'.format( |
74 | self.log_base, task_str, is_priority)) | ||
75 | try: | 75 | try: |
76 | doc, business_type = self.get_doc_object(task_str) | 76 | doc, business_type = self.get_doc_object(task_str) |
77 | except Exception as e: | ||
78 | rh.enqueue([task_str], is_priority) | ||
79 | self.cronjob_log.error('{0} [process error (get doc info in)] [error={1}]'.format(self.log_base, traceback.format_exc())) | ||
80 | raise e | ||
81 | |||
82 | if doc is None: | 77 | if doc is None: |
83 | self.cronjob_log.warn('{0} [get_doc_info] [doc not exist] [task_str={1}] [is_priority={2}]'.format( | 78 | self.cronjob_log.warn('{0} [get_doc_info] [doc not exist] [task_str={1}] [is_priority={2}]'.format( |
84 | self.log_base, task_str, is_priority)) | 79 | self.log_base, task_str, is_priority)) |
... | @@ -90,7 +85,13 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -90,7 +85,13 @@ class Command(BaseCommand, LoggerMixin): |
90 | doc.status = DocStatus.PROCESSING.value | 85 | doc.status = DocStatus.PROCESSING.value |
91 | doc.start_time = timezone.now() | 86 | doc.start_time = timezone.now() |
92 | doc.save() | 87 | doc.save() |
93 | self.cronjob_log.info('{0} [get_doc_info] [success] [task_str={1}] [is_priority={2}]'.format( | 88 | except Exception as e: |
89 | rh.enqueue([task_str], is_priority) | ||
90 | self.cronjob_log.error('{0} [process error (get doc info in)] [error={1}]'.format( | ||
91 | self.log_base, traceback.format_exc())) | ||
92 | raise e | ||
93 | else: | ||
94 | self.cronjob_log.info('{0} [get_doc_info] [db save end] [task_str={1}] [is_priority={2}]'.format( | ||
94 | self.log_base, task_str, is_priority)) | 95 | self.log_base, task_str, is_priority)) |
95 | return doc, business_type, task_str | 96 | return doc, business_type, task_str |
96 | 97 | ||
... | @@ -210,21 +211,18 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -210,21 +211,18 @@ class Command(BaseCommand, LoggerMixin): |
210 | else: | 211 | else: |
211 | res_list.append((pno, ino, part_idx, consts.RES_FAILED_2)) | 212 | res_list.append((pno, ino, part_idx, consts.RES_FAILED_2)) |
212 | 213 | ||
213 | @staticmethod | 214 | def parse_img_path(self, img_path): |
214 | def parse_img_path(img_path): | ||
215 | img_name, _ = os.path.splitext(os.path.basename(img_path)) | 215 | img_name, _ = os.path.splitext(os.path.basename(img_path)) |
216 | part_list = img_name.split('_') | 216 | part_list = img_name.split('_') |
217 | # page_7_img_11_0 | 217 | # page_7_img_11_0 |
218 | return int(part_list[1])+1, int(part_list[3])+1 | 218 | return int(part_list[1])+1, int(part_list[3])+1 |
219 | 219 | ||
220 | @staticmethod | 220 | def get_most(self, value_list): |
221 | def get_most(value_list): | ||
222 | if value_list: | 221 | if value_list: |
223 | most_common = Counter(value_list).most_common(1) | 222 | most_common = Counter(value_list).most_common(1) |
224 | return most_common[0][0] if most_common else None | 223 | return most_common[0][0] if most_common else None |
225 | 224 | ||
226 | @staticmethod | 225 | def date_format(self, date_str, format_str): |
227 | def date_format(date_str, format_str): | ||
228 | try: | 226 | try: |
229 | date_res = datetime.strptime(date_str, format_str).date() | 227 | date_res = datetime.strptime(date_str, format_str).date() |
230 | except Exception as e: | 228 | except Exception as e: |
... | @@ -402,23 +400,41 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -402,23 +400,41 @@ class Command(BaseCommand, LoggerMixin): |
402 | self.cronjob_log.info('{0} [pdf to img end] [task={1}] [spend_time={2}]'.format( | 400 | self.cronjob_log.info('{0} [pdf to img end] [task={1}] [spend_time={2}]'.format( |
403 | self.log_base, task_str, speed_time)) | 401 | self.log_base, task_str, speed_time)) |
404 | 402 | ||
403 | img_count = len(pdf_handler.img_path_list) | ||
404 | if img_count == 0: | ||
405 | self.cronjob_log.warn('{0} [pdf to img failed (pdf img empty)] [task={1}]'.format( | ||
406 | self.log_base, task_str)) | ||
407 | raise Exception('pdf img empty') | ||
408 | else: | ||
405 | with lock: | 409 | with lock: |
406 | todo_count_dict[task_str] = len(pdf_handler.img_path_list) | 410 | todo_count_dict[task_str] = img_count |
407 | for img_path in pdf_handler.img_path_list: | 411 | for img_path in pdf_handler.img_path_list: |
408 | while img_queue.full(): | 412 | while img_queue.full(): |
409 | self.cronjob_log.info('{0} [pdf_2_img_2_queue] [img queue full]'.format(self.log_base)) | 413 | self.cronjob_log.info('{0} [pdf_2_img_2_queue] [img queue full]'.format(self.log_base)) |
410 | time.sleep(self.sleep_time_img_put) | 414 | time.sleep(self.sleep_time_img_put) |
411 | img_queue.put(img_path) | 415 | img_queue.put(img_path) |
412 | except EDMSException as e: | 416 | except EDMSException as e: |
417 | try: | ||
413 | doc.status = DocStatus.PROCESS_FAILED.value | 418 | doc.status = DocStatus.PROCESS_FAILED.value |
414 | doc.save() | 419 | doc.save() |
415 | self.cronjob_log.error('{0} [process failed (edms download)] [task={1}] [error={2}]'.format( | 420 | self.cronjob_log.warn('{0} [process failed (edms download)] [task={1}] [error={2}]'.format( |
416 | self.log_base, task_str, traceback.format_exc())) | 421 | self.log_base, task_str, traceback.format_exc())) |
417 | except Exception as e: | 422 | except Exception as e: |
423 | self.cronjob_log.error('{0} [process error (db save 1)] [error={1}]'.format( | ||
424 | self.log_base, traceback.format_exc())) | ||
425 | error_list.append(1) | ||
426 | return | ||
427 | except Exception as e: | ||
428 | try: | ||
418 | doc.status = DocStatus.PROCESS_FAILED.value | 429 | doc.status = DocStatus.PROCESS_FAILED.value |
419 | doc.save() | 430 | doc.save() |
420 | self.cronjob_log.error('{0} [process failed (pdf to img)] [task={1}] [error={2}]'.format( | 431 | self.cronjob_log.warn('{0} [process failed (pdf to img)] [task={1}] [error={2}]'.format( |
421 | self.log_base, task_str, traceback.format_exc())) | 432 | self.log_base, task_str, traceback.format_exc())) |
433 | except Exception as e: | ||
434 | self.cronjob_log.error('{0} [process error (db save 2)] [error={1}]'.format( | ||
435 | self.log_base, traceback.format_exc())) | ||
436 | error_list.append(1) | ||
437 | return | ||
422 | 438 | ||
423 | def img_2_ocr_1(self, img_queue, todo_count_dict, res_dict, finish_queue, lock, url, error_list): | 439 | def img_2_ocr_1(self, img_queue, todo_count_dict, res_dict, finish_queue, lock, url, error_list): |
424 | while len(error_list) == 0 or not img_queue.empty(): | 440 | while len(error_list) == 0 or not img_queue.empty(): |
... | @@ -447,8 +463,9 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -447,8 +463,9 @@ class Command(BaseCommand, LoggerMixin): |
447 | if ocr_1_response.status_code != 200: | 463 | if ocr_1_response.status_code != 200: |
448 | raise OCR1Exception('ocr_1 status code: {0}'.format(ocr_1_response.status_code)) | 464 | raise OCR1Exception('ocr_1 status code: {0}'.format(ocr_1_response.status_code)) |
449 | except Exception as e: | 465 | except Exception as e: |
450 | self.cronjob_log.warn('{0} [ocr_1 failed] [times={1}] [img_path={2}] [error={3}]'.format( | 466 | self.cronjob_log.warn('{0} [ocr_1 failed] [times={1}] [url={2}] [img_path={3}] ' |
451 | self.log_base, times, img_path, traceback.format_exc())) | 467 | '[error={4}]'.format(self.log_base, times, url, img_path, |
468 | traceback.format_exc())) | ||
452 | else: | 469 | else: |
453 | ocr_1_res = ocr_1_response.json() | 470 | ocr_1_res = ocr_1_response.json() |
454 | end_time = time.time() | 471 | end_time = time.time() |
... | @@ -458,7 +475,8 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -458,7 +475,8 @@ class Command(BaseCommand, LoggerMixin): |
458 | break | 475 | break |
459 | else: | 476 | else: |
460 | ocr_1_res = {} | 477 | ocr_1_res = {} |
461 | self.cronjob_log.warn('{0} [ocr_1 failed] [img_path={1}]'.format(self.log_base, img_path)) | 478 | self.cronjob_log.warn('{0} [ocr_1 failed] [img_path={1}] [url={2}]'.format( |
479 | self.log_base, img_path, url)) | ||
462 | # continue | 480 | # continue |
463 | except Exception as e: | 481 | except Exception as e: |
464 | self.cronjob_log.error('{0} [process error (ocr fetch)] [img_path={1}] [error={2}]'.format( | 482 | self.cronjob_log.error('{0} [process error (ocr fetch)] [img_path={1}] [error={2}]'.format( |
... | @@ -521,14 +539,14 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -521,14 +539,14 @@ class Command(BaseCommand, LoggerMixin): |
521 | ocr_data_list = res.get('data', []) | 539 | ocr_data_list = res.get('data', []) |
522 | if not isinstance(ocr_data_list, list): | 540 | if not isinstance(ocr_data_list, list): |
523 | res_list.append((pno, ino, part_idx, consts.RES_FAILED_3)) | 541 | res_list.append((pno, ino, part_idx, consts.RES_FAILED_3)) |
524 | self.cronjob_log.info('{0} [ocr_1 res error] [img={1}]'.format(self.log_base, img_path)) | 542 | self.cronjob_log.warn('{0} [ocr_1 res error] [img={1}]'.format(self.log_base, img_path)) |
525 | else: | 543 | else: |
526 | for part_idx, ocr_data in enumerate(ocr_data_list): | 544 | for part_idx, ocr_data in enumerate(ocr_data_list): |
527 | part_idx = part_idx + 1 | 545 | part_idx = part_idx + 1 |
528 | classify = ocr_data.get('classify') | 546 | classify = ocr_data.get('classify') |
529 | if classify is None: | 547 | if classify is None: |
530 | res_list.append((pno, ino, part_idx, consts.RES_FAILED_3)) | 548 | res_list.append((pno, ino, part_idx, consts.RES_FAILED_3)) |
531 | self.cronjob_log.info('{0} [ocr_1 res error] [img={1}]'.format( | 549 | self.cronjob_log.warn('{0} [ocr_1 res error] [img={1}]'.format( |
532 | self.log_base, img_path)) | 550 | self.log_base, img_path)) |
533 | continue | 551 | continue |
534 | elif classify in consts.OTHER_CLASSIFY_SET: # 其他类 | 552 | elif classify in consts.OTHER_CLASSIFY_SET: # 其他类 |
... | @@ -624,7 +642,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -624,7 +642,7 @@ class Command(BaseCommand, LoggerMixin): |
624 | doc, business_type = self.get_doc_object(task_str) | 642 | doc, business_type = self.get_doc_object(task_str) |
625 | doc.status = DocStatus.PROCESS_FAILED.value | 643 | doc.status = DocStatus.PROCESS_FAILED.value |
626 | doc.save() | 644 | doc.save() |
627 | self.cronjob_log.error('{0} [process failed (res to wb)] [task={1}] [error={2}]'.format( | 645 | self.cronjob_log.warn('{0} [process failed (res to wb)] [task={1}] [error={2}]'.format( |
628 | self.log_base, task_str, traceback.format_exc())) | 646 | self.log_base, task_str, traceback.format_exc())) |
629 | except Exception as e: | 647 | except Exception as e: |
630 | self.cronjob_log.error('{0} [process error (wb end)] [task={1}] [error={2}]'.format( | 648 | self.cronjob_log.error('{0} [process error (wb end)] [task={1}] [error={2}]'.format( |
... | @@ -673,7 +691,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -673,7 +691,7 @@ class Command(BaseCommand, LoggerMixin): |
673 | if hasattr(doc, field): | 691 | if hasattr(doc, field): |
674 | setattr(doc, field, count) | 692 | setattr(doc, field, count) |
675 | doc.save() | 693 | doc.save() |
676 | self.cronjob_log.error('{0} [process failed (edms upload)] [task={1}] [error={2}]'.format( | 694 | self.cronjob_log.warn('{0} [process failed (edms upload)] [task={1}] [error={2}]'.format( |
677 | self.log_base, task_str, traceback.format_exc())) | 695 | self.log_base, task_str, traceback.format_exc())) |
678 | except Exception as e: | 696 | except Exception as e: |
679 | self.cronjob_log.error('{0} [process error (edms upload)] [task={1}] [error={2}]'.format( | 697 | self.cronjob_log.error('{0} [process error (edms upload)] [task={1}] [error={2}]'.format( | ... | ... |
-
Please register or sign in to post a comment