log prune
Showing
2 changed files
with
10 additions
and
23 deletions
1 | import os | 1 | import os |
2 | import re | 2 | import re |
3 | import time | 3 | import time |
4 | import json | ||
5 | import shutil | 4 | import shutil |
6 | import base64 | 5 | import base64 |
7 | import signal | 6 | import signal |
8 | import asyncio | ||
9 | import aiohttp | ||
10 | import difflib | ||
11 | import requests | 7 | import requests |
12 | import traceback | 8 | import traceback |
13 | from collections import Counter | ||
14 | from datetime import datetime, date | ||
15 | from django.utils import timezone | ||
16 | from django.core.management import BaseCommand | 9 | from django.core.management import BaseCommand |
17 | from multiprocessing import Process, Queue, Manager, Lock | 10 | from multiprocessing import Process |
18 | 11 | ||
19 | from settings import conf | 12 | from settings import conf |
20 | from common.mixins import LoggerMixin | 13 | from common.mixins import LoggerMixin |
21 | from common.tools.file_tools import write_zip_file | ||
22 | from common.tools.pdf_to_img import PDFHandler | 14 | from common.tools.pdf_to_img import PDFHandler |
23 | from apps.doc import consts | 15 | from apps.doc import consts |
24 | from apps.doc.ocr.edms import EDMS, rh | 16 | from apps.doc.exceptions import OCR1Exception |
25 | from apps.doc.named_enum import KeywordsType | 17 | from apps.doc.ocr.wb import BSWorkbook |
26 | from apps.doc.exceptions import EDMSException, OCR1Exception, OCR2Exception | ||
27 | from apps.doc.ocr.wb import BSWorkbook, Workbook | ||
28 | from apps.doc.models import DocStatus, HILDoc, AFCDoc, Keywords | ||
29 | 18 | ||
30 | 19 | ||
31 | class Command(BaseCommand, LoggerMixin): | 20 | class Command(BaseCommand, LoggerMixin): |
... | @@ -37,7 +26,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -37,7 +26,7 @@ class Command(BaseCommand, LoggerMixin): |
37 | self.switch = True | 26 | self.switch = True |
38 | # 睡眠时间 | 27 | # 睡眠时间 |
39 | self.sleep_time = float(conf.SLEEP_SECOND_FOLDER) | 28 | self.sleep_time = float(conf.SLEEP_SECOND_FOLDER) |
40 | # input foler | 29 | # input folder |
41 | self.input_dirs = conf.get_namespace('INPUT_DIR_') | 30 | self.input_dirs = conf.get_namespace('INPUT_DIR_') |
42 | # ocr相关 | 31 | # ocr相关 |
43 | self.ocr_url = conf.OCR_URL_FOLDER | 32 | self.ocr_url = conf.OCR_URL_FOLDER | ... | ... |
... | @@ -475,8 +475,8 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -475,8 +475,8 @@ class Command(BaseCommand, LoggerMixin): |
475 | else: | 475 | else: |
476 | self.cronjob_log.info('{0} [res_2_wb] [get task] [task={1}]'.format(self.log_base, task_str)) | 476 | self.cronjob_log.info('{0} [res_2_wb] [get task] [task={1}]'.format(self.log_base, task_str)) |
477 | ocr_1_res = res_dict.get(task_str, {}) | 477 | ocr_1_res = res_dict.get(task_str, {}) |
478 | self.cronjob_log.info('{0} [res_2_wb] [get task res] [task={1}] [res={2}]'.format( | 478 | # self.cronjob_log.info('{0} [res_2_wb] [get task res] [task={1}]'.format( |
479 | self.log_base, task_str, ocr_1_res)) | 479 | # self.log_base, task_str)) |
480 | 480 | ||
481 | try: | 481 | try: |
482 | # 4.OCR结果并且构建excel文件 | 482 | # 4.OCR结果并且构建excel文件 |
... | @@ -499,16 +499,15 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -499,16 +499,15 @@ class Command(BaseCommand, LoggerMixin): |
499 | ocr_data_list = res.get('data', []) | 499 | ocr_data_list = res.get('data', []) |
500 | if not isinstance(ocr_data_list, list): | 500 | if not isinstance(ocr_data_list, list): |
501 | res_list.append((pno, ino, part_idx, consts.RES_FAILED_3)) | 501 | res_list.append((pno, ino, part_idx, consts.RES_FAILED_3)) |
502 | self.cronjob_log.info('{0} [ocr_1 res error] [img={1}] [res={2}]'.format( | 502 | self.cronjob_log.info('{0} [ocr_1 res error] [img={1}]'.format(self.log_base, img_path)) |
503 | self.log_base, img_path, res)) | ||
504 | else: | 503 | else: |
505 | for part_idx, ocr_data in enumerate(ocr_data_list): | 504 | for part_idx, ocr_data in enumerate(ocr_data_list): |
506 | part_idx = part_idx + 1 | 505 | part_idx = part_idx + 1 |
507 | classify = ocr_data.get('classify') | 506 | classify = ocr_data.get('classify') |
508 | if classify is None: | 507 | if classify is None: |
509 | res_list.append((pno, ino, part_idx, consts.RES_FAILED_3)) | 508 | res_list.append((pno, ino, part_idx, consts.RES_FAILED_3)) |
510 | self.cronjob_log.info('{0} [ocr_1 res error] [img={1}] [res={2}]'.format( | 509 | self.cronjob_log.info('{0} [ocr_1 res error] [img={1}]'.format( |
511 | self.log_base, img_path, res)) | 510 | self.log_base, img_path)) |
512 | continue | 511 | continue |
513 | elif classify in consts.OTHER_CLASSIFY_SET: # 其他类 | 512 | elif classify in consts.OTHER_CLASSIFY_SET: # 其他类 |
514 | res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_OTHER)) | 513 | res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_OTHER)) |
... | @@ -569,8 +568,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -569,8 +568,7 @@ class Command(BaseCommand, LoggerMixin): |
569 | self.bs_process(wb, ocr_data, bs_summary, unknown_summary, classify, res_list, pno, ino, part_idx) | 568 | self.bs_process(wb, ocr_data, bs_summary, unknown_summary, classify, res_list, pno, ino, part_idx) |
570 | else: | 569 | else: |
571 | res_list.append((pno, ino, part_idx, consts.RES_FAILED_1)) | 570 | res_list.append((pno, ino, part_idx, consts.RES_FAILED_1)) |
572 | self.cronjob_log.info('{0} [ocr_1 res error] [img={1}] [res={2}]'.format( | 571 | self.cronjob_log.info('{0} [ocr_1 res error] [img={1}]'.format(self.log_base, img_path)) |
573 | self.log_base, img_path, res)) | ||
574 | 572 | ||
575 | with lock: | 573 | with lock: |
576 | del res_dict[task_str] | 574 | del res_dict[task_str] | ... | ... |
-
Please register or sign in to post a comment