97b0b2ed by 周伟奇

log prune

1 parent d9b0ae8c
import os
import re
import time
import json
import shutil
import base64
import signal
import asyncio
import aiohttp
import difflib
import requests
import traceback
from collections import Counter
from datetime import datetime, date
from django.utils import timezone
from django.core.management import BaseCommand
from multiprocessing import Process, Queue, Manager, Lock
from multiprocessing import Process
from settings import conf
from common.mixins import LoggerMixin
from common.tools.file_tools import write_zip_file
from common.tools.pdf_to_img import PDFHandler
from apps.doc import consts
from apps.doc.ocr.edms import EDMS, rh
from apps.doc.named_enum import KeywordsType
from apps.doc.exceptions import EDMSException, OCR1Exception, OCR2Exception
from apps.doc.ocr.wb import BSWorkbook, Workbook
from apps.doc.models import DocStatus, HILDoc, AFCDoc, Keywords
from apps.doc.exceptions import OCR1Exception
from apps.doc.ocr.wb import BSWorkbook
class Command(BaseCommand, LoggerMixin):
......@@ -37,7 +26,7 @@ class Command(BaseCommand, LoggerMixin):
self.switch = True
# 睡眠时间
self.sleep_time = float(conf.SLEEP_SECOND_FOLDER)
# input foler
# input folder
self.input_dirs = conf.get_namespace('INPUT_DIR_')
# ocr相关
self.ocr_url = conf.OCR_URL_FOLDER
......
......@@ -475,8 +475,8 @@ class Command(BaseCommand, LoggerMixin):
else:
self.cronjob_log.info('{0} [res_2_wb] [get task] [task={1}]'.format(self.log_base, task_str))
ocr_1_res = res_dict.get(task_str, {})
self.cronjob_log.info('{0} [res_2_wb] [get task res] [task={1}] [res={2}]'.format(
self.log_base, task_str, ocr_1_res))
# self.cronjob_log.info('{0} [res_2_wb] [get task res] [task={1}]'.format(
# self.log_base, task_str))
try:
# 4.OCR结果并且构建excel文件
......@@ -499,16 +499,15 @@ class Command(BaseCommand, LoggerMixin):
ocr_data_list = res.get('data', [])
if not isinstance(ocr_data_list, list):
res_list.append((pno, ino, part_idx, consts.RES_FAILED_3))
self.cronjob_log.info('{0} [ocr_1 res error] [img={1}] [res={2}]'.format(
self.log_base, img_path, res))
self.cronjob_log.info('{0} [ocr_1 res error] [img={1}]'.format(self.log_base, img_path))
else:
for part_idx, ocr_data in enumerate(ocr_data_list):
part_idx = part_idx + 1
classify = ocr_data.get('classify')
if classify is None:
res_list.append((pno, ino, part_idx, consts.RES_FAILED_3))
self.cronjob_log.info('{0} [ocr_1 res error] [img={1}] [res={2}]'.format(
self.log_base, img_path, res))
self.cronjob_log.info('{0} [ocr_1 res error] [img={1}]'.format(
self.log_base, img_path))
continue
elif classify in consts.OTHER_CLASSIFY_SET: # 其他类
res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_OTHER))
......@@ -569,8 +568,7 @@ class Command(BaseCommand, LoggerMixin):
self.bs_process(wb, ocr_data, bs_summary, unknown_summary, classify, res_list, pno, ino, part_idx)
else:
res_list.append((pno, ino, part_idx, consts.RES_FAILED_1))
self.cronjob_log.info('{0} [ocr_1 res error] [img={1}] [res={2}]'.format(
self.log_base, img_path, res))
self.cronjob_log.info('{0} [ocr_1 res error] [img={1}]'.format(self.log_base, img_path))
with lock:
del res_dict[task_str]
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!