97b0b2ed by 周伟奇

log prune

1 parent d9b0ae8c
1 import os 1 import os
2 import re 2 import re
3 import time 3 import time
4 import json
5 import shutil 4 import shutil
6 import base64 5 import base64
7 import signal 6 import signal
8 import asyncio
9 import aiohttp
10 import difflib
11 import requests 7 import requests
12 import traceback 8 import traceback
13 from collections import Counter
14 from datetime import datetime, date
15 from django.utils import timezone
16 from django.core.management import BaseCommand 9 from django.core.management import BaseCommand
17 from multiprocessing import Process, Queue, Manager, Lock 10 from multiprocessing import Process
18 11
19 from settings import conf 12 from settings import conf
20 from common.mixins import LoggerMixin 13 from common.mixins import LoggerMixin
21 from common.tools.file_tools import write_zip_file
22 from common.tools.pdf_to_img import PDFHandler 14 from common.tools.pdf_to_img import PDFHandler
23 from apps.doc import consts 15 from apps.doc import consts
24 from apps.doc.ocr.edms import EDMS, rh 16 from apps.doc.exceptions import OCR1Exception
25 from apps.doc.named_enum import KeywordsType 17 from apps.doc.ocr.wb import BSWorkbook
26 from apps.doc.exceptions import EDMSException, OCR1Exception, OCR2Exception
27 from apps.doc.ocr.wb import BSWorkbook, Workbook
28 from apps.doc.models import DocStatus, HILDoc, AFCDoc, Keywords
29 18
30 19
31 class Command(BaseCommand, LoggerMixin): 20 class Command(BaseCommand, LoggerMixin):
...@@ -37,7 +26,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -37,7 +26,7 @@ class Command(BaseCommand, LoggerMixin):
37 self.switch = True 26 self.switch = True
38 # 睡眠时间 27 # 睡眠时间
39 self.sleep_time = float(conf.SLEEP_SECOND_FOLDER) 28 self.sleep_time = float(conf.SLEEP_SECOND_FOLDER)
40 # input foler 29 # input folder
41 self.input_dirs = conf.get_namespace('INPUT_DIR_') 30 self.input_dirs = conf.get_namespace('INPUT_DIR_')
42 # ocr相关 31 # ocr相关
43 self.ocr_url = conf.OCR_URL_FOLDER 32 self.ocr_url = conf.OCR_URL_FOLDER
......
...@@ -475,8 +475,8 @@ class Command(BaseCommand, LoggerMixin): ...@@ -475,8 +475,8 @@ class Command(BaseCommand, LoggerMixin):
475 else: 475 else:
476 self.cronjob_log.info('{0} [res_2_wb] [get task] [task={1}]'.format(self.log_base, task_str)) 476 self.cronjob_log.info('{0} [res_2_wb] [get task] [task={1}]'.format(self.log_base, task_str))
477 ocr_1_res = res_dict.get(task_str, {}) 477 ocr_1_res = res_dict.get(task_str, {})
478 self.cronjob_log.info('{0} [res_2_wb] [get task res] [task={1}] [res={2}]'.format( 478 # self.cronjob_log.info('{0} [res_2_wb] [get task res] [task={1}]'.format(
479 self.log_base, task_str, ocr_1_res)) 479 # self.log_base, task_str))
480 480
481 try: 481 try:
482 # 4.OCR结果并且构建excel文件 482 # 4.OCR结果并且构建excel文件
...@@ -499,16 +499,15 @@ class Command(BaseCommand, LoggerMixin): ...@@ -499,16 +499,15 @@ class Command(BaseCommand, LoggerMixin):
499 ocr_data_list = res.get('data', []) 499 ocr_data_list = res.get('data', [])
500 if not isinstance(ocr_data_list, list): 500 if not isinstance(ocr_data_list, list):
501 res_list.append((pno, ino, part_idx, consts.RES_FAILED_3)) 501 res_list.append((pno, ino, part_idx, consts.RES_FAILED_3))
502 self.cronjob_log.info('{0} [ocr_1 res error] [img={1}] [res={2}]'.format( 502 self.cronjob_log.info('{0} [ocr_1 res error] [img={1}]'.format(self.log_base, img_path))
503 self.log_base, img_path, res))
504 else: 503 else:
505 for part_idx, ocr_data in enumerate(ocr_data_list): 504 for part_idx, ocr_data in enumerate(ocr_data_list):
506 part_idx = part_idx + 1 505 part_idx = part_idx + 1
507 classify = ocr_data.get('classify') 506 classify = ocr_data.get('classify')
508 if classify is None: 507 if classify is None:
509 res_list.append((pno, ino, part_idx, consts.RES_FAILED_3)) 508 res_list.append((pno, ino, part_idx, consts.RES_FAILED_3))
510 self.cronjob_log.info('{0} [ocr_1 res error] [img={1}] [res={2}]'.format( 509 self.cronjob_log.info('{0} [ocr_1 res error] [img={1}]'.format(
511 self.log_base, img_path, res)) 510 self.log_base, img_path))
512 continue 511 continue
513 elif classify in consts.OTHER_CLASSIFY_SET: # 其他类 512 elif classify in consts.OTHER_CLASSIFY_SET: # 其他类
514 res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_OTHER)) 513 res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_OTHER))
...@@ -569,8 +568,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -569,8 +568,7 @@ class Command(BaseCommand, LoggerMixin):
569 self.bs_process(wb, ocr_data, bs_summary, unknown_summary, classify, res_list, pno, ino, part_idx) 568 self.bs_process(wb, ocr_data, bs_summary, unknown_summary, classify, res_list, pno, ino, part_idx)
570 else: 569 else:
571 res_list.append((pno, ino, part_idx, consts.RES_FAILED_1)) 570 res_list.append((pno, ino, part_idx, consts.RES_FAILED_1))
572 self.cronjob_log.info('{0} [ocr_1 res error] [img={1}] [res={2}]'.format( 571 self.cronjob_log.info('{0} [ocr_1 res error] [img={1}]'.format(self.log_base, img_path))
573 self.log_base, img_path, res))
574 572
575 with lock: 573 with lock:
576 del res_dict[task_str] 574 del res_dict[task_str]
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!