15dccc97 by 周伟奇

Merge branch 'feature/main' into feature/mssql

2 parents 236b64e0 5e463cbd
......@@ -152,7 +152,7 @@ RESULT_IDX = FIXED_HEADERS.index('核对结果')
# '借贷': ('贷', '借'), # 竖版-无表格-广发银行
# '借贷状态': ('贷', '借'), # 竖版-特殊-交通银行
# '收/支': ('收入', '支出'), # 横版-表格-北京银行
BORROW_HEADERS_SET = {'借贷', '借贷状态', '收/支', '收支标志'}
BORROW_HEADERS_SET = {'借贷', '借\n贷', '借贷状态', '收/支', '收支标志'}
BORROW_INCOME_SET = {'贷', '收入', '收', '收(Cr)'}
BORROW_OUTLAY_SET = {'借', '支出', '支', '付(Dr)'}
INCOME_HEADERS_SET = {'收入金额', '收入', '存入', '存入金额(贷)', '存入金额(贷)'}
......@@ -165,6 +165,7 @@ HEADERS_MAPPING = {}
HEADERS_MAPPING.update(
{
'借贷': BORROW_KEY,
'借\n贷': BORROW_KEY,
'借贷状态': BORROW_KEY,
'收支标志': BORROW_KEY,
'收/支': BORROW_KEY,
......
......@@ -40,7 +40,8 @@ class Command(BaseCommand, LoggerMixin):
print('excel dir not exists')
return
excel_path = os.path.join(excel_dir, 'bs_{0}.xlsx'.format(date_str))
log_path = os.path.join(conf.LOG_DIR, 'bs.log.{0}'.format(date_str))
# log_path = os.path.join(conf.LOG_DIR, 'bs.log.{0}'.format(date_str))
log_path = os.path.join(conf.LOG_DIR, 'bs_statistics.log.{0}'.format(date_str))
if not os.path.exists(log_path):
print('log_path not exists')
return
......@@ -48,7 +49,8 @@ class Command(BaseCommand, LoggerMixin):
summary_dict = {}
with open(log_path, 'r', encoding='utf-8') as fp:
for line in fp:
search_obj = re.search(r'task=(.*) merged_bs_summary=(.*)', line)
# search_obj = re.search(r'task=(.*) merged_bs_summary=(.*)', line)
search_obj = re.search(r'\[task=(.*)] \[bs_summary=(.*)]', line)
task_str = search_obj.group(1)
business_type, doc_id_str = task_str.split(consts.SPLIT_STR)
doc_id = int(doc_id_str)
......
import re
import os
import ast
import datetime
from openpyxl import Workbook
from django.core.management import BaseCommand
from settings import conf
from common.mixins import LoggerMixin
from apps.doc.models import HILDoc, AFCDoc
from apps.doc import consts
class Command(BaseCommand, LoggerMixin):
def __init__(self):
super().__init__()
self.sheet_name = '身份证'
self.header = ('申请号', '身份证号', '民族', '时间戳')
def add_arguments(self, parser):
parser.add_argument(
'--date',
default=datetime.date.today() - datetime.timedelta(days=1),
dest='date',
help='将要计算的日期,格式: 2018-01-01'
)
def handle(self, *args, **kwargs):
date = kwargs.get('date')
if isinstance(date, str):
if not re.match(r'\d{4}-\d{2}-\d{2}', date):
print('date format error')
return
date_str = date
else:
date_str = date.strftime('%Y-%m-%d')
afc_excel_dir = os.path.join(conf.DATA_DIR, 'AFC', 'IdCard')
hil_excel_dir = os.path.join(conf.DATA_DIR, 'HIL', 'IdCard')
if not os.path.exists(afc_excel_dir) or not os.path.exists(hil_excel_dir):
print('excel_dir not exist')
return
log_path = os.path.join(conf.LOG_DIR, 'idcard.log.{0}'.format(date_str))
if not os.path.exists(log_path):
print('log_path not exists')
return
wb_afc = Workbook()
ws_afc = wb_afc.create_sheet(self.sheet_name)
ws_afc.append(self.header)
wb_afc.remove(wb_afc.get_sheet_by_name('Sheet'))
wb_hil = Workbook()
ws_hil = wb_hil.create_sheet(self.sheet_name)
ws_hil.append(self.header)
wb_hil.remove(wb_hil.get_sheet_by_name('Sheet'))
with open(log_path, 'r', encoding='utf-8') as fp:
for line in fp:
search_obj = re.match(r'\[(.*)] \[task=(.*)] \[idcard=(.*)]', line)
idcard_str = search_obj.group(3)
idcard_list = ast.literal_eval(idcard_str)
content_list = []
for idcard_dict in idcard_list:
nation = idcard_dict.get('民族')
if nation is None:
continue
if idcard_dict.get('类别') == '1':
continue
content_list.append((idcard_dict.get('公民身份号码'), nation))
if len(content_list) == 0:
continue
time_str = search_obj.group(1)
task_str = search_obj.group(2)
business_type, doc_id_str = task_str.split(consts.SPLIT_STR)
doc_class = HILDoc if business_type == consts.HIL_PREFIX else AFCDoc
application_id = doc_class.objects.filter(id=int(doc_id_str)).values_list('application_id', flat=True)
if business_type == consts.HIL_PREFIX:
for id_num, nation in content_list:
ws_hil.append((application_id[0], id_num, nation, time_str))
else:
for id_num, nation in content_list:
ws_afc.append((application_id[0], id_num, nation, time_str))
afc_excel_path = os.path.join(afc_excel_dir, 'idcard_{0}.xlsx'.format(date_str))
hil_excel_path = os.path.join(hil_excel_dir, 'idcard_{0}.xlsx'.format(date_str))
wb_afc.save(afc_excel_path)
wb_hil.save(hil_excel_path)
import os
import datetime
from calendar import monthrange
from openpyxl import Workbook, load_workbook
from django.core.management import BaseCommand
from settings import conf
from common.mixins import LoggerMixin
class Command(BaseCommand, LoggerMixin):
def __init__(self):
super().__init__()
self.dirs = ('AFC', 'HIL')
def handle(self, *args, **kwargs):
now_time = datetime.datetime.now()
end_day_in_mouth = now_time.replace(day=1)
pre_mouth = end_day_in_mouth - datetime.timedelta(days=1)
for target_dir in self.dirs:
excel_dir = os.path.join(conf.DATA_DIR, target_dir, 'IdCard')
if not os.path.exists(excel_dir):
print('excel dir not exists: {0}'.format(excel_dir))
return
monthly_wb = Workbook()
for d in range(1, monthrange(pre_mouth.year, pre_mouth.month)[1] + 1):
date_str = '{:04d}-{:02d}-{:02d}'.format(pre_mouth.year, pre_mouth.month, d)
daily_excel_path = os.path.join(excel_dir, 'idcard_{0}.xlsx'.format(date_str))
if not os.path.exists(daily_excel_path):
print('daily excel path not exists: {0}'.format(daily_excel_path))
continue
monthly_ws = monthly_wb.create_sheet(date_str)
daily_wb = load_workbook(daily_excel_path)
daily_ws = daily_wb.get_sheet_by_name('身份证')
for row in daily_ws.iter_rows(min_row=1, values_only=True):
monthly_ws.append(row)
monthly_excel_path = os.path.join(excel_dir, 'idcard_{0}.xlsx'.format(pre_mouth.strftime('%Y-%m')))
monthly_wb.remove(monthly_wb.get_sheet_by_name('Sheet'))
monthly_wb.save(monthly_excel_path)
......@@ -14,7 +14,6 @@ class Command(BaseCommand, LoggerMixin):
def __init__(self):
super().__init__()
self.log_base = '[license statistics]'
self.header_map = {
consts.MVI_CLASSIFY: [('申请ID', '发票代码', '发票号码', '开票日期', '不含税价', '发票联', '购买方名称',
'购买方证件号码', '纳税人识别号', '车架号', '价税合计小写', '销货单位名称', '增值税税额',
......@@ -75,7 +74,8 @@ class Command(BaseCommand, LoggerMixin):
print('excel dir not exists')
return
excel_path = os.path.join(excel_dir, 'license_{0}.xlsx'.format(date_str))
log_path = os.path.join(conf.LOG_DIR, 'license.log.{0}'.format(date_str))
# log_path = os.path.join(conf.LOG_DIR, 'license.log.{0}'.format(date_str))
log_path = os.path.join(conf.LOG_DIR, 'license_statistics.log.{0}'.format(date_str))
if not os.path.exists(log_path):
print('log_path not exists')
return
......@@ -92,7 +92,8 @@ class Command(BaseCommand, LoggerMixin):
with open(log_path, 'r', encoding='utf-8') as fp:
for line in fp:
search_obj = re.search(r'task=(.*) license_summary=(.*)', line)
# search_obj = re.search(r'task=(.*) license_summary=(.*)', line)
search_obj = re.search(r'\[task=(.*)] \[license_summary=(.*)]', line)
task_str = search_obj.group(1)
license_summary = ast.literal_eval(search_obj.group(2))
business_type, doc_id_str = task_str.split(consts.SPLIT_STR)
......
......@@ -689,8 +689,15 @@ class Command(BaseCommand, LoggerMixin):
'[license_summary={4}]'.format(self.log_base, task_str, bs_summary,
unknown_summary, license_summary))
self.license_log.info('[task={0}] [license_summary={1}]'.format(task_str, license_summary))
idcard_list = license_summary.get(consts.IC_CLASSIFY)
if idcard_list:
self.idcard_log.info('[task={0}] [idcard={1}]'.format(task_str, idcard_list))
merged_bs_summary = self.rebuild_bs_summary(bs_summary, unknown_summary)
self.bs_log.info('[task={0}] [bs_summary={1}]'.format(task_str, merged_bs_summary))
self.cronjob_log.info('{0} [task={1}] [merged_bs_summary={2}] [unknown_summary={3}] '
'[res_list={4}]'.format(self.log_base, task_str, merged_bs_summary,
unknown_summary, res_list))
......
......@@ -40,6 +40,9 @@ class LoggerMixin:
exception_log = logging.getLogger('exception')
cronjob_log = logging.getLogger('cronjob')
folder_log = logging.getLogger('folder')
bs_log = logging.getLogger('bs')
license_log = logging.getLogger('license')
idcard_log = logging.getLogger('idcard')
class GenericView(LoggerMixin, GenericExceptionMixin, GenericAPIView):
......
......@@ -84,9 +84,15 @@ class PDFHandler:
def extract_single_image(self, pdf, xref, smask, colorspace, pno, img_index=0):
pix = self.recover_pix(pdf, xref, smask, colorspace)
ext, img_data = self.get_img_data(pix)
img_save_path = self.get_img_save_path(pno, img_index=img_index, ext=ext)
with open(img_save_path, "wb") as f:
f.write(img_data)
if ext == 'jpx':
img_save_path = self.get_img_save_path(pno, img_index=img_index, ext='jpeg')
jpx_pix = fitz.Pixmap(img_data)
jpx_pix.writeImage(img_save_path)
jpx_pix = None
else:
img_save_path = self.get_img_save_path(pno, img_index=img_index, ext=ext)
with open(img_save_path, "wb") as f:
f.write(img_data)
self.xref_set.add(xref)
self.img_path_list.append(img_save_path)
......
[loggers]
keys=root, running, exception, cronjob, folder, django.db.backends
keys=root, running, exception, cronjob, folder, bs, license, idcard, django.db.backends
[handlers]
keys=consoleHandler, django_rotateFileHandler, exceptionFileHandler, cronjobFileHandler, folderFileHandler, djangodbFileHandler
keys=consoleHandler, django_rotateFileHandler, exceptionFileHandler, cronjobFileHandler, folderFileHandler, bsFileHandler, licenseFileHandler, idcardFileHandler, djangodbFileHandler
[formatters]
keys=SituFormatter, dataLogFormatter
keys=SituFormatter, dataLogFormatter, SimpleFormatter
[formatter_SituFormatter]
format=[%(asctime)s] [%(process)d] [%(thread)d] [%(threadName)s] [%(filename)s:%(lineno)d] %(levelname)s %(message)s
......@@ -15,6 +15,10 @@ datefmt=
class=situlogger.JsonFormatter
format=%(asctime)s %(levelname)s %(funcName)s
[formatter_SimpleFormatter]
format=[%(asctime)s] %(message)s
datefmt=
[handler_consoleHandler]
class=StreamHandler
level=ERROR
......@@ -45,6 +49,24 @@ level=DEBUG
formatter=SituFormatter
args=('../logs/folder_ocr.log',)
[handler_bsFileHandler]
class=situlogger.SituRotatingFileHandler
level=DEBUG
formatter=SimpleFormatter
args=('../logs/bs_statistics.log',)
[handler_licenseFileHandler]
class=situlogger.SituRotatingFileHandler
level=DEBUG
formatter=SimpleFormatter
args=('../logs/license_statistics.log',)
[handler_idcardFileHandler]
class=situlogger.SituRotatingFileHandler
level=DEBUG
formatter=SimpleFormatter
args=('../logs/idcard.log',)
[handler_djangodbFileHandler]
class=situlogger.SituRotatingFileHandler
level=DEBUG
......@@ -79,6 +101,24 @@ handlers=folderFileHandler
qualname=folder
propagate=0
[logger_bs]
level=INFO
handlers=bsFileHandler
qualname=bs
propagate=0
[logger_license]
level=INFO
handlers=licenseFileHandler
qualname=license
propagate=0
[logger_idcard]
level=INFO
handlers=idcardFileHandler
qualname=idcard
propagate=0
[logger_django.db.backends]
level=DEBUG
handlers=djangodbFileHandler
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!