log prune
Showing
2 changed files
with
10 additions
and
23 deletions
| 1 | import os | 1 | import os |
| 2 | import re | 2 | import re |
| 3 | import time | 3 | import time |
| 4 | import json | ||
| 5 | import shutil | 4 | import shutil |
| 6 | import base64 | 5 | import base64 |
| 7 | import signal | 6 | import signal |
| 8 | import asyncio | ||
| 9 | import aiohttp | ||
| 10 | import difflib | ||
| 11 | import requests | 7 | import requests |
| 12 | import traceback | 8 | import traceback |
| 13 | from collections import Counter | ||
| 14 | from datetime import datetime, date | ||
| 15 | from django.utils import timezone | ||
| 16 | from django.core.management import BaseCommand | 9 | from django.core.management import BaseCommand |
| 17 | from multiprocessing import Process, Queue, Manager, Lock | 10 | from multiprocessing import Process |
| 18 | 11 | ||
| 19 | from settings import conf | 12 | from settings import conf |
| 20 | from common.mixins import LoggerMixin | 13 | from common.mixins import LoggerMixin |
| 21 | from common.tools.file_tools import write_zip_file | ||
| 22 | from common.tools.pdf_to_img import PDFHandler | 14 | from common.tools.pdf_to_img import PDFHandler |
| 23 | from apps.doc import consts | 15 | from apps.doc import consts |
| 24 | from apps.doc.ocr.edms import EDMS, rh | 16 | from apps.doc.exceptions import OCR1Exception |
| 25 | from apps.doc.named_enum import KeywordsType | 17 | from apps.doc.ocr.wb import BSWorkbook |
| 26 | from apps.doc.exceptions import EDMSException, OCR1Exception, OCR2Exception | ||
| 27 | from apps.doc.ocr.wb import BSWorkbook, Workbook | ||
| 28 | from apps.doc.models import DocStatus, HILDoc, AFCDoc, Keywords | ||
| 29 | 18 | ||
| 30 | 19 | ||
| 31 | class Command(BaseCommand, LoggerMixin): | 20 | class Command(BaseCommand, LoggerMixin): |
| ... | @@ -37,7 +26,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -37,7 +26,7 @@ class Command(BaseCommand, LoggerMixin): |
| 37 | self.switch = True | 26 | self.switch = True |
| 38 | # 睡眠时间 | 27 | # 睡眠时间 |
| 39 | self.sleep_time = float(conf.SLEEP_SECOND_FOLDER) | 28 | self.sleep_time = float(conf.SLEEP_SECOND_FOLDER) |
| 40 | # input foler | 29 | # input folder |
| 41 | self.input_dirs = conf.get_namespace('INPUT_DIR_') | 30 | self.input_dirs = conf.get_namespace('INPUT_DIR_') |
| 42 | # ocr相关 | 31 | # ocr相关 |
| 43 | self.ocr_url = conf.OCR_URL_FOLDER | 32 | self.ocr_url = conf.OCR_URL_FOLDER | ... | ... |
| ... | @@ -475,8 +475,8 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -475,8 +475,8 @@ class Command(BaseCommand, LoggerMixin): |
| 475 | else: | 475 | else: |
| 476 | self.cronjob_log.info('{0} [res_2_wb] [get task] [task={1}]'.format(self.log_base, task_str)) | 476 | self.cronjob_log.info('{0} [res_2_wb] [get task] [task={1}]'.format(self.log_base, task_str)) |
| 477 | ocr_1_res = res_dict.get(task_str, {}) | 477 | ocr_1_res = res_dict.get(task_str, {}) |
| 478 | self.cronjob_log.info('{0} [res_2_wb] [get task res] [task={1}] [res={2}]'.format( | 478 | # self.cronjob_log.info('{0} [res_2_wb] [get task res] [task={1}]'.format( |
| 479 | self.log_base, task_str, ocr_1_res)) | 479 | # self.log_base, task_str)) |
| 480 | 480 | ||
| 481 | try: | 481 | try: |
| 482 | # 4.OCR结果并且构建excel文件 | 482 | # 4.OCR结果并且构建excel文件 |
| ... | @@ -499,16 +499,15 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -499,16 +499,15 @@ class Command(BaseCommand, LoggerMixin): |
| 499 | ocr_data_list = res.get('data', []) | 499 | ocr_data_list = res.get('data', []) |
| 500 | if not isinstance(ocr_data_list, list): | 500 | if not isinstance(ocr_data_list, list): |
| 501 | res_list.append((pno, ino, part_idx, consts.RES_FAILED_3)) | 501 | res_list.append((pno, ino, part_idx, consts.RES_FAILED_3)) |
| 502 | self.cronjob_log.info('{0} [ocr_1 res error] [img={1}] [res={2}]'.format( | 502 | self.cronjob_log.info('{0} [ocr_1 res error] [img={1}]'.format(self.log_base, img_path)) |
| 503 | self.log_base, img_path, res)) | ||
| 504 | else: | 503 | else: |
| 505 | for part_idx, ocr_data in enumerate(ocr_data_list): | 504 | for part_idx, ocr_data in enumerate(ocr_data_list): |
| 506 | part_idx = part_idx + 1 | 505 | part_idx = part_idx + 1 |
| 507 | classify = ocr_data.get('classify') | 506 | classify = ocr_data.get('classify') |
| 508 | if classify is None: | 507 | if classify is None: |
| 509 | res_list.append((pno, ino, part_idx, consts.RES_FAILED_3)) | 508 | res_list.append((pno, ino, part_idx, consts.RES_FAILED_3)) |
| 510 | self.cronjob_log.info('{0} [ocr_1 res error] [img={1}] [res={2}]'.format( | 509 | self.cronjob_log.info('{0} [ocr_1 res error] [img={1}]'.format( |
| 511 | self.log_base, img_path, res)) | 510 | self.log_base, img_path)) |
| 512 | continue | 511 | continue |
| 513 | elif classify in consts.OTHER_CLASSIFY_SET: # 其他类 | 512 | elif classify in consts.OTHER_CLASSIFY_SET: # 其他类 |
| 514 | res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_OTHER)) | 513 | res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_OTHER)) |
| ... | @@ -569,8 +568,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -569,8 +568,7 @@ class Command(BaseCommand, LoggerMixin): |
| 569 | self.bs_process(wb, ocr_data, bs_summary, unknown_summary, classify, res_list, pno, ino, part_idx) | 568 | self.bs_process(wb, ocr_data, bs_summary, unknown_summary, classify, res_list, pno, ino, part_idx) |
| 570 | else: | 569 | else: |
| 571 | res_list.append((pno, ino, part_idx, consts.RES_FAILED_1)) | 570 | res_list.append((pno, ino, part_idx, consts.RES_FAILED_1)) |
| 572 | self.cronjob_log.info('{0} [ocr_1 res error] [img={1}] [res={2}]'.format( | 571 | self.cronjob_log.info('{0} [ocr_1 res error] [img={1}]'.format(self.log_base, img_path)) |
| 573 | self.log_base, img_path, res)) | ||
| 574 | 572 | ||
| 575 | with lock: | 573 | with lock: |
| 576 | del res_dict[task_str] | 574 | del res_dict[task_str] | ... | ... |
-
Please register or sign in to post a comment