d2b24497 by 周伟奇

add offline report

1 parent c7852512
......@@ -2331,3 +2331,5 @@ MPOS_MAP = {
BC_CLASSIFY,
BL_CLASSIFY,
}
FOLDER_WSC_CLASSIFY = 199
......
......@@ -7,6 +7,7 @@ import base64
import signal
import requests
import traceback
from django import db
from PIL import Image
from datetime import datetime
from django.core.management import BaseCommand
......@@ -19,6 +20,8 @@ from common.tools.pdf_to_img import PDFHandler
from apps.doc import consts
from apps.doc.exceptions import OCR1Exception, OCR2Exception, LTGTException
from apps.doc.ocr.wb import BSWorkbook
from apps.doc.models import OfflineReport
from apps.doc.named_enum import OfflineFailureReason
class TIFFHandler:
......@@ -384,6 +387,9 @@ class Command(BaseCommand, LoggerMixin):
if len(true_file_set) == 0 and len(os_error_filename_set) > 0:
true_file_set.add(os_error_filename_set.pop())
for name in true_file_set:
is_success = True
failure_reason = OfflineFailureReason.OS_ERROR.value
start_time = time.time()
path = os.path.join(input_dir, name)
try:
if not os.path.exists(path):
......@@ -408,16 +414,21 @@ class Command(BaseCommand, LoggerMixin):
failed_path = os.path.join(failed_output_dir, '{0}_{1}'.format(time.time(), name))
shutil.move(path, failed_path)
except OSError:
is_success = False
failure_reason = OfflineFailureReason.OS_ERROR.value
os_error_filename_set.add(name)
self.folder_log.error('{0} [os error] [path={1}] [error={2}]'.format(
self.log_base, path, traceback.format_exc()))
except Exception as e:
is_success = False
failure_reason = OfflineFailureReason.PROCESS_ERROR.value
try:
self.folder_log.error('{0} [file error] [path={1}] [error={2}]'.format(self.log_base, path,
traceback.format_exc()))
failed_path = os.path.join(failed_output_dir, '{0}_{1}'.format(time.time(), name))
shutil.move(path, failed_path)
except Exception as e:
failure_reason = OfflineFailureReason.OS_ERROR.value
os_error_filename_set.add(name)
self.folder_log.error('{0} [file move error] [path={1}] [error={2}]'.format(
self.log_base, path, traceback.format_exc()))
......@@ -433,8 +444,23 @@ class Command(BaseCommand, LoggerMixin):
self.DATE_KEY: date_str
}
)
finally:
end_time = time.time()
try:
OfflineReport.objects.create(
input_folder=input_dir,
doc_type=consts.DDA_CLASSIFY,
file_name=name,
status=is_success,
failure_reason=failure_reason,
duration=int(end_time - start_time)
)
except Exception as e:
self.folder_log.error('{0} [db save failed] [path={1}] [error={2}]'.format(
self.log_base, path, traceback.format_exc()))
def handle(self, *args, **kwargs):
db.close_old_connections()
if len(self.input_dirs) == 0:
return
result_queue = Queue()
......
......@@ -7,6 +7,7 @@ import base64
import signal
import requests
import traceback
from django import db
from PIL import Image
from datetime import datetime
from django.core.management import BaseCommand
......@@ -20,6 +21,8 @@ from common.electronic_afc_contract.afc_contract_ocr import predict as afc_predi
from apps.doc import consts
from apps.doc.exceptions import OCR1Exception, OCR2Exception, LTGTException
from apps.doc.ocr.wb import BSWorkbook
from apps.doc.models import OfflineReport
from apps.doc.named_enum import OfflineFailureReason
class TIFFHandler:
......@@ -688,6 +691,9 @@ class Command(BaseCommand, LoggerMixin):
for name in true_file_set:
time.sleep(5)
path = os.path.join(input_dir, name)
is_success = True
failure_reason = OfflineFailureReason.OS_ERROR.value
start_time = time.time()
try:
if not os.path.exists(path):
......@@ -716,16 +722,21 @@ class Command(BaseCommand, LoggerMixin):
failed_path = os.path.join(failed_output_dir, '{0}_{1}'.format(time.time(), name))
shutil.move(path, failed_path)
except OSError:
is_success = False
failure_reason = OfflineFailureReason.OS_ERROR.value
os_error_filename_set.add(name)
self.folder_log.error('{0} [os error] [path={1}] [error={2}]'.format(
self.log_base, path, traceback.format_exc()))
except Exception as e:
is_success = False
failure_reason = OfflineFailureReason.PROCESS_ERROR.value
try:
self.folder_log.error('{0} [file error] [path={1}] [error={2}]'.format(self.log_base, path,
traceback.format_exc()))
failed_path = os.path.join(failed_output_dir, '{0}_{1}'.format(time.time(), name))
shutil.move(path, failed_path)
except Exception as e:
failure_reason = OfflineFailureReason.OS_ERROR.value
os_error_filename_set.add(name)
self.folder_log.error('{0} [file move error] [path={1}] [error={2}]'.format(
self.log_base, path, traceback.format_exc()))
......@@ -755,8 +766,23 @@ class Command(BaseCommand, LoggerMixin):
self.FILE_KEY: name,
}
)
finally:
end_time = time.time()
try:
OfflineReport.objects.create(
input_folder=input_dir,
doc_type=classify,
file_name=name,
status=is_success,
failure_reason=failure_reason,
duration=int(end_time - start_time)
)
except Exception as e:
self.folder_log.error('{0} [db save failed] [path={1}] [error={2}]'.format(
self.log_base, path, traceback.format_exc()))
def handle(self, *args, **kwargs):
db.close_old_connections()
if len(self.input_dirs) == 0:
return
result_queue = Queue()
......
......@@ -6,6 +6,7 @@ import base64
import signal
import requests
import traceback
from django import db
from PIL import Image
from datetime import datetime
from django.core.management import BaseCommand
......@@ -17,6 +18,8 @@ from common.tools.pdf_to_img import PDFHandler
from apps.doc import consts
from apps.doc.exceptions import OCR1Exception, OCR4Exception
from apps.doc.ocr.wb import BSWorkbook
from apps.doc.models import OfflineReport
from apps.doc.named_enum import OfflineFailureReason
class TIFFHandler:
......@@ -337,6 +340,9 @@ class Command(BaseCommand, LoggerMixin):
if len(true_file_set) == 0 and len(os_error_filename_set) > 0:
true_file_set.add(os_error_filename_set.pop())
for name in true_file_set:
is_success = True
failure_reason = OfflineFailureReason.OS_ERROR.value
start_time = time.time()
path = os.path.join(input_dir, name)
try:
......@@ -358,21 +364,41 @@ class Command(BaseCommand, LoggerMixin):
failed_path = os.path.join(failed_output_dir, '{0}_{1}'.format(time.time(), name))
shutil.move(path, failed_path)
except OSError:
is_success = False
failure_reason = OfflineFailureReason.OS_ERROR.value
os_error_filename_set.add(name)
self.folder_log.error('{0} [os error] [path={1}] [error={2}]'.format(
self.log_base, path, traceback.format_exc()))
except Exception as e:
is_success = False
failure_reason = OfflineFailureReason.PROCESS_ERROR.value
try:
self.folder_log.error('{0} [file error] [path={1}] [error={2}]'.format(self.log_base, path,
traceback.format_exc()))
failed_path = os.path.join(failed_output_dir, '{0}_{1}'.format(time.time(), name))
shutil.move(path, failed_path)
except Exception as e:
failure_reason = OfflineFailureReason.OS_ERROR.value
os_error_filename_set.add(name)
self.folder_log.error('{0} [file move error] [path={1}] [error={2}]'.format(
self.log_base, path, traceback.format_exc()))
finally:
end_time = time.time()
try:
OfflineReport.objects.create(
input_folder=input_dir,
doc_type=classify,
file_name=name,
status=is_success,
failure_reason=failure_reason,
duration=int(end_time-start_time)
)
except Exception as e:
self.folder_log.error('{0} [db save failed] [path={1}] [error={2}]'.format(
self.log_base, path, traceback.format_exc()))
def handle(self, *args, **kwargs):
db.close_old_connections()
process_list = []
for classify_idx, input_dir in self.input_dirs.items():
classify = int(classify_idx.split('_')[0])
......
......@@ -6,6 +6,7 @@ import base64
import signal
import requests
import traceback
from django import db
from PIL import Image
from datetime import datetime
from django.core.management import BaseCommand
......@@ -20,6 +21,8 @@ from common.tools.pdf_to_img import PDFHandler
from apps.doc import consts
from apps.doc.exceptions import OCR1Exception, OCR4Exception
from apps.doc.ocr.wb import BSWorkbook, PatternFill
from apps.doc.models import OfflineReport
from apps.doc.named_enum import OfflineFailureReason
class Finder:
......@@ -582,6 +585,9 @@ class Command(BaseCommand, LoggerMixin):
for name in true_file_set:
time.sleep(10) # 防止文件较大时,读取到不完整文件
path = os.path.join(input_dir, name)
is_success = True
failure_reason = OfflineFailureReason.OS_ERROR.value
start_time = time.time()
try:
if not os.path.exists(path):
......@@ -605,20 +611,40 @@ class Command(BaseCommand, LoggerMixin):
failed_path = os.path.join(failed_output_dir, '{0}_{1}'.format(time.time(), name))
shutil.move(path, failed_path)
except OSError:
is_success = False
failure_reason = OfflineFailureReason.OS_ERROR.value
os_error_filename_set.add(name)
self.folder_log.error('{0} [os error] [path={1}] [error={2}]'.format(
self.log_base, path, traceback.format_exc()))
except Exception as e:
is_success = False
failure_reason = OfflineFailureReason.PROCESS_ERROR.value
try:
self.folder_log.error('{0} [file error] [path={1}] [error={2}]'.format(self.log_base, path,
traceback.format_exc()))
failed_path = os.path.join(failed_output_dir, '{0}_{1}'.format(time.time(), name))
shutil.move(path, failed_path)
except Exception as e:
failure_reason = OfflineFailureReason.OS_ERROR.value
os_error_filename_set.add(name)
self.folder_log.error('{0} [file move error] [path={1}] [error={2}]'.format(
self.log_base, path, traceback.format_exc()))
finally:
end_time = time.time()
try:
OfflineReport.objects.create(
input_folder=input_dir,
doc_type=consts.FOLDER_WSC_CLASSIFY,
file_name=name,
status=is_success,
failure_reason=failure_reason,
duration=int(end_time - start_time)
)
except Exception as e:
self.folder_log.error('{0} [db save failed] [path={1}] [error={2}]'.format(
self.log_base, path, traceback.format_exc()))
def handle(self, *args, **kwargs):
db.close_old_connections()
self.folder_process(self.input_dir)
self.folder_log.info('{0} [stop safely]'.format(self.log_base))
......
......@@ -933,23 +933,37 @@ class MposReport(models.Model):
db_table = 'mpos_report'
# class HILOfflineReport(models.Model):
class OfflineReport(models.Model):
id = models.AutoField(primary_key=True, verbose_name="id") # 主键
input_folder = models.CharField(max_length=512, verbose_name="文件夹路径")
doc_type = models.SmallIntegerField(default=0, verbose_name="文件类型")
file_name = models.CharField(max_length=1024, verbose_name="文件名")
status = models.BooleanField(default=True, verbose_name="是否成功")
failure_reason = models.SmallIntegerField(default=0, verbose_name="失败原因")
duration = models.IntegerField(verbose_name='处理时长')
create_time = models.DateTimeField(auto_now_add=True, verbose_name='创建时间')
class Meta:
managed = False
db_table = 'offline_report'
# class AFCOfflineReport(models.Model):
# id = models.AutoField(primary_key=True, verbose_name="id") # 主键
#
# update_time = models.DateTimeField(auto_now=True, verbose_name='修改时间')
# input_folder = models.CharField(max_length=512, verbose_name="文件夹路径")
# doc_type = models.SmallIntegerField(default=0, verbose_name="文件类型")
# file_name = models.CharField(max_length=1024, verbose_name="文件名")
# status = models.BooleanField(default=True, verbose_name="是否成功")
# failure_reason = models.SmallIntegerField(default=0, verbose_name="失败原因")
# duration = models.IntegerField(verbose_name='处理时长')
#
# create_time = models.DateTimeField(auto_now_add=True, verbose_name='创建时间')
#
# class Meta:
# managed = False
# db_table = 'hil_offline_report'
# new teble: hil/afc_offline_ocr_report
# 1. file_name string eg. 'CH-B2432.pdf'
# 2. doc_type int eg. 2(VAT Invoice)
# 3. successful_at_this_level boolean eg. 0
# 4. failure_reason int eg. 2(PDF)
# 5. duration int eg. 100
#
# id/input_folder/start_time/end_time/create_time
# db_table = 'afc_offline_report'
# situ_db_label = 'afc'
......
......@@ -90,3 +90,8 @@ class BSCheckResult(NamedEnum):
CHECK_TRUE = (1, 'CHECK_TRUE')
CHECK_FALSE = (2, 'CHECK_FALSE')
CHECK_FAILED = (3, 'CHECK_FAILED')
class OfflineFailureReason(NamedEnum):
OS_ERROR = (0, 'OS_ERROR')
PROCESS_ERROR = (1, 'PROCESS_ERROR')
......
......@@ -14,6 +14,18 @@ hil_sql = """
ALTER TABLE hil_ocr_report ADD bank_name nvarchar(2048);
ALTER TABLE hil_ocr_report ADD is_ebank bit default 0 not null;
ALTER TABLE hil_ocr_report ADD bs_check_result tinyint default 0 not null;
create table offline_report
(
id bigint identity primary key,
input_folder nvarchar(512) not null,
doc_type tinyint default 0 not null,
file_name nvarchar(1024) not null,
status bit default 1 not null,
failure_reason tinyint default 0 not null,
duration smallint not null,
create_time datetime not null
);
"""
afc_sql = """
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!