fix bug
Showing
1 changed file
with
81 additions
and
64 deletions
... | @@ -71,6 +71,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -71,6 +71,7 @@ class Command(BaseCommand, LoggerMixin): |
71 | self.cronjob_log.info('{0} [get_doc_info] [queue empty]'.format(self.log_base)) | 71 | self.cronjob_log.info('{0} [get_doc_info] [queue empty]'.format(self.log_base)) |
72 | return None, None, None | 72 | return None, None, None |
73 | 73 | ||
74 | self.cronjob_log.info('{0} [get_doc_info success] [task={1}] [is_priority={2}]'.format(self.log_base, task_str, is_priority)) | ||
74 | doc, business_type = self.get_doc_object(task_str) | 75 | doc, business_type = self.get_doc_object(task_str) |
75 | 76 | ||
76 | if doc is None: | 77 | if doc is None: |
... | @@ -421,36 +422,40 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -421,36 +422,40 @@ class Command(BaseCommand, LoggerMixin): |
421 | time.sleep(self.sleep_time_img_get) | 422 | time.sleep(self.sleep_time_img_get) |
422 | continue | 423 | continue |
423 | else: | 424 | else: |
424 | self.cronjob_log.info('{0} [img_2_ocr_1] [get img] [img_path={1}]'.format(self.log_base, img_path)) | 425 | try: |
425 | 426 | self.cronjob_log.info('{0} [img_2_ocr_1] [get img] [img_path={1}]'.format(self.log_base, img_path)) | |
426 | for times in range(consts.RETRY_TIMES): | 427 | |
427 | try: | 428 | for times in range(consts.RETRY_TIMES): |
428 | with open(img_path, 'rb') as f: | 429 | try: |
429 | base64_data = base64.b64encode(f.read()) | 430 | with open(img_path, 'rb') as f: |
430 | # 获取解码后的base64值 | 431 | base64_data = base64.b64encode(f.read()) |
431 | file_data = base64_data.decode() | 432 | # 获取解码后的base64值 |
432 | json_data_1 = { | 433 | file_data = base64_data.decode() |
433 | "file": file_data | 434 | json_data_1 = { |
434 | } | 435 | "file": file_data |
435 | 436 | } | |
436 | start_time = time.time() | 437 | |
437 | ocr_1_response = requests.post(url, json=json_data_1) | 438 | start_time = time.time() |
438 | if ocr_1_response.status_code != 200: | 439 | ocr_1_response = requests.post(url, json=json_data_1) |
439 | raise OCR1Exception('ocr_1 status code: {0}'.format(ocr_1_response.status_code)) | 440 | if ocr_1_response.status_code != 200: |
440 | except Exception as e: | 441 | raise OCR1Exception('ocr_1 status code: {0}'.format(ocr_1_response.status_code)) |
441 | self.cronjob_log.warn('{0} [ocr_1 failed] [times={1}] [img_path={2}] [error={3}]'.format( | 442 | except Exception as e: |
442 | self.log_base, times, img_path, traceback.format_exc())) | 443 | self.cronjob_log.warn('{0} [ocr_1 failed] [times={1}] [img_path={2}] [error={3}]'.format( |
444 | self.log_base, times, img_path, traceback.format_exc())) | ||
445 | else: | ||
446 | ocr_1_res = ocr_1_response.json() | ||
447 | end_time = time.time() | ||
448 | speed_time = int(end_time - start_time) | ||
449 | self.cronjob_log.info('{0} [ocr_1 success] [img={1}] [res={2}] [speed_time={3}]'.format( | ||
450 | self.log_base, img_path, ocr_1_res, speed_time)) | ||
451 | break | ||
443 | else: | 452 | else: |
444 | ocr_1_res = ocr_1_response.json() | 453 | ocr_1_res = {} |
445 | end_time = time.time() | 454 | self.cronjob_log.warn('{0} [ocr_1 failed] [img_path={1}]'.format(self.log_base, img_path)) |
446 | speed_time = int(end_time - start_time) | 455 | # continue |
447 | self.cronjob_log.info('{0} [ocr_1 success] [img={1}] [res={2}] [speed_time={3}]'.format( | 456 | except Exception as e: |
448 | self.log_base, img_path, ocr_1_res, speed_time)) | 457 | self.cronjob_log.error('{0} [process error (ocr fetch)] [img_path={1}] [error={2}]'.format( |
449 | break | 458 | self.log_base, img_path, traceback.format_exc())) |
450 | else: | ||
451 | ocr_1_res = {} | ||
452 | self.cronjob_log.warn('{0} [ocr_1 failed] [img_path={1}]'.format(self.log_base, img_path)) | ||
453 | # continue | ||
454 | 459 | ||
455 | try: | 460 | try: |
456 | del json_data_1 | 461 | del json_data_1 |
... | @@ -470,7 +475,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -470,7 +475,7 @@ class Command(BaseCommand, LoggerMixin): |
470 | else: | 475 | else: |
471 | todo_count_dict[task_str] = todo_count - 1 | 476 | todo_count_dict[task_str] = todo_count - 1 |
472 | except Exception as e: | 477 | except Exception as e: |
473 | self.cronjob_log.error('{0} [process failed (store ocr res)] [img_path={1}] [error={2}]'.format( | 478 | self.cronjob_log.error('{0} [process error (store ocr res)] [img_path={1}] [error={2}]'.format( |
474 | self.log_base, img_path, traceback.format_exc())) | 479 | self.log_base, img_path, traceback.format_exc())) |
475 | 480 | ||
476 | def res_2_wb(self, res_dict, finish_queue, lock): | 481 | def res_2_wb(self, res_dict, finish_queue, lock): |
... | @@ -482,12 +487,12 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -482,12 +487,12 @@ class Command(BaseCommand, LoggerMixin): |
482 | time.sleep(self.sleep_time_task_get) | 487 | time.sleep(self.sleep_time_task_get) |
483 | continue | 488 | continue |
484 | else: | 489 | else: |
485 | self.cronjob_log.info('{0} [res_2_wb] [get task] [task={1}]'.format(self.log_base, task_str)) | ||
486 | ocr_1_res = res_dict.get(task_str, {}) | ||
487 | # self.cronjob_log.info('{0} [res_2_wb] [get task res] [task={1}]'.format( | ||
488 | # self.log_base, task_str)) | ||
489 | |||
490 | try: | 490 | try: |
491 | self.cronjob_log.info('{0} [res_2_wb] [get task] [task={1}]'.format(self.log_base, task_str)) | ||
492 | ocr_1_res = res_dict.get(task_str, {}) | ||
493 | # self.cronjob_log.info('{0} [res_2_wb] [get task res] [task={1}]'.format( | ||
494 | # self.log_base, task_str)) | ||
495 | |||
491 | # 4.OCR结果并且构建excel文件 | 496 | # 4.OCR结果并且构建excel文件 |
492 | bs_summary = {} | 497 | bs_summary = {} |
493 | license_summary = {} | 498 | license_summary = {} |
... | @@ -605,14 +610,19 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -605,14 +610,19 @@ class Command(BaseCommand, LoggerMixin): |
605 | count_list = wb.rebuild(merged_bs_summary, license_summary, res_list, doc.document_scheme) | 610 | count_list = wb.rebuild(merged_bs_summary, license_summary, res_list, doc.document_scheme) |
606 | wb.save(excel_path) | 611 | wb.save(excel_path) |
607 | except Exception as e: | 612 | except Exception as e: |
608 | with lock: | 613 | try: |
609 | if task_str in res_dict: | 614 | with lock: |
610 | del res_dict[task_str] | 615 | if task_str in res_dict: |
611 | doc, business_type = self.get_doc_object(task_str) | 616 | del res_dict[task_str] |
612 | doc.status = DocStatus.PROCESS_FAILED.value | 617 | doc, business_type = self.get_doc_object(task_str) |
613 | doc.save() | 618 | doc.status = DocStatus.PROCESS_FAILED.value |
614 | self.cronjob_log.error('{0} [process failed (res to wb)] [task={1}] [error={2}]'.format( | 619 | doc.save() |
615 | self.log_base, task_str, traceback.format_exc())) | 620 | self.cronjob_log.error('{0} [process failed (res to wb)] [task={1}] [error={2}]'.format( |
621 | self.log_base, task_str, traceback.format_exc())) | ||
622 | except Exception as e: | ||
623 | self.cronjob_log.error('{0} [process error (wb end)] [task={1}] [error={2}]'.format( | ||
624 | self.log_base, task_str, traceback.format_exc())) | ||
625 | |||
616 | try: | 626 | try: |
617 | doc_data_path = os.path.join(self.data_dir, business_type, consts.TMP_DIR_NAME, str(doc.id)) | 627 | doc_data_path = os.path.join(self.data_dir, business_type, consts.TMP_DIR_NAME, str(doc.id)) |
618 | img_save_path = os.path.join(doc_data_path, 'img') | 628 | img_save_path = os.path.join(doc_data_path, 'img') |
... | @@ -620,7 +630,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -620,7 +630,7 @@ class Command(BaseCommand, LoggerMixin): |
620 | pdf_path = os.path.join(doc_data_path, '{0}.pdf'.format(doc.id)) | 630 | pdf_path = os.path.join(doc_data_path, '{0}.pdf'.format(doc.id)) |
621 | os.remove(pdf_path) | 631 | os.remove(pdf_path) |
622 | except Exception as e: | 632 | except Exception as e: |
623 | self.cronjob_log.error('{0} [file remove failed] [task={1}] [error={2}]'.format( | 633 | self.cronjob_log.error('{0} [process error (file remove 1)] [task={1}] [error={2}]'.format( |
624 | self.log_base, task_str, traceback.format_exc())) | 634 | self.log_base, task_str, traceback.format_exc())) |
625 | else: | 635 | else: |
626 | try: | 636 | try: |
... | @@ -631,7 +641,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -631,7 +641,7 @@ class Command(BaseCommand, LoggerMixin): |
631 | # os.remove(pdf_path) | 641 | # os.remove(pdf_path) |
632 | # os.remove(src_excel_path) | 642 | # os.remove(src_excel_path) |
633 | except Exception as e: | 643 | except Exception as e: |
634 | self.cronjob_log.error('{0} [file remove failed] [task={1}] [error={2}]'.format( | 644 | self.cronjob_log.error('{0} [process error (file remove 2)] [task={1}] [error={2}]'.format( |
635 | self.log_base, task_str, traceback.format_exc())) | 645 | self.log_base, task_str, traceback.format_exc())) |
636 | try: | 646 | try: |
637 | # 5.上传至EDMS | 647 | # 5.上传至EDMS |
... | @@ -648,26 +658,33 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -648,26 +658,33 @@ class Command(BaseCommand, LoggerMixin): |
648 | else: | 658 | else: |
649 | raise EDMSException(edms_exc) | 659 | raise EDMSException(edms_exc) |
650 | except Exception as e: | 660 | except Exception as e: |
651 | doc.status = DocStatus.UPLOAD_FAILED.value | 661 | try: |
652 | doc.end_time = timezone.now() | 662 | doc.status = DocStatus.UPLOAD_FAILED.value |
653 | doc.duration = min((doc.end_time - doc.start_time).seconds, 32760) | 663 | doc.end_time = timezone.now() |
654 | for field, count in count_list: | 664 | doc.duration = min((doc.end_time - doc.start_time).seconds, 32760) |
655 | if hasattr(doc, field): | 665 | for field, count in count_list: |
656 | setattr(doc, field, count) | 666 | if hasattr(doc, field): |
657 | doc.save() | 667 | setattr(doc, field, count) |
658 | self.cronjob_log.error('{0} [process failed (edms upload)] [task={1}] [error={2}]'.format( | 668 | doc.save() |
659 | self.log_base, task_str, traceback.format_exc())) | 669 | self.cronjob_log.error('{0} [process failed (edms upload)] [task={1}] [error={2}]'.format( |
670 | self.log_base, task_str, traceback.format_exc())) | ||
671 | except Exception as e: | ||
672 | self.cronjob_log.error('{0} [process error (edms upload)] [task={1}] [error={2}]'.format( | ||
673 | self.log_base, task_str, traceback.format_exc())) | ||
660 | else: | 674 | else: |
661 | doc.status = DocStatus.COMPLETE.value | 675 | try: |
662 | doc.end_time = timezone.now() | 676 | doc.status = DocStatus.COMPLETE.value |
663 | doc.duration = min((doc.end_time - doc.start_time).seconds, 32760) | 677 | doc.end_time = timezone.now() |
664 | for field, count in count_list: | 678 | doc.duration = min((doc.end_time - doc.start_time).seconds, 32760) |
665 | if hasattr(doc, field): | 679 | for field, count in count_list: |
666 | setattr(doc, field, count) | 680 | if hasattr(doc, field): |
667 | doc.save() | 681 | setattr(doc, field, count) |
668 | self.cronjob_log.info('{0} [process complete] [task={1}]'.format(self.log_base, task_str)) | 682 | doc.save() |
669 | # os.remove(excel_path) | 683 | self.cronjob_log.info('{0} [process complete] [task={1}]'.format(self.log_base, task_str)) |
670 | 684 | # os.remove(excel_path) | |
685 | except Exception as e: | ||
686 | self.cronjob_log.error('{0} [process error (completed)] [task={1}] [error={2}]'.format( | ||
687 | self.log_base, task_str, traceback.format_exc())) | ||
671 | 688 | ||
672 | 689 | ||
673 | # TODO 细化文件状态,不同异常状态,归还队列,重试时采取不同的处理 | 690 | # TODO 细化文件状态,不同异常状态,归还队列,重试时采取不同的处理 | ... | ... |
-
Please register or sign in to post a comment