Merge branch 'feature/main' into feature/mssql
Showing
10 changed files
with
207 additions
and
12 deletions
| ... | @@ -152,7 +152,7 @@ RESULT_IDX = FIXED_HEADERS.index('核对结果') | ... | @@ -152,7 +152,7 @@ RESULT_IDX = FIXED_HEADERS.index('核对结果') |
| 152 | # '借贷': ('贷', '借'), # 竖版-无表格-广发银行 | 152 | # '借贷': ('贷', '借'), # 竖版-无表格-广发银行 |
| 153 | # '借贷状态': ('贷', '借'), # 竖版-特殊-交通银行 | 153 | # '借贷状态': ('贷', '借'), # 竖版-特殊-交通银行 |
| 154 | # '收/支': ('收入', '支出'), # 横版-表格-北京银行 | 154 | # '收/支': ('收入', '支出'), # 横版-表格-北京银行 |
| 155 | BORROW_HEADERS_SET = {'借贷', '借贷状态', '收/支', '收支标志'} | 155 | BORROW_HEADERS_SET = {'借贷', '借\n贷', '借贷状态', '收/支', '收支标志'} |
| 156 | BORROW_INCOME_SET = {'贷', '收入', '收', '收(Cr)'} | 156 | BORROW_INCOME_SET = {'贷', '收入', '收', '收(Cr)'} |
| 157 | BORROW_OUTLAY_SET = {'借', '支出', '支', '付(Dr)'} | 157 | BORROW_OUTLAY_SET = {'借', '支出', '支', '付(Dr)'} |
| 158 | INCOME_HEADERS_SET = {'收入金额', '收入', '存入', '存入金额(贷)', '存入金额(贷)'} | 158 | INCOME_HEADERS_SET = {'收入金额', '收入', '存入', '存入金额(贷)', '存入金额(贷)'} |
| ... | @@ -165,6 +165,7 @@ HEADERS_MAPPING = {} | ... | @@ -165,6 +165,7 @@ HEADERS_MAPPING = {} |
| 165 | HEADERS_MAPPING.update( | 165 | HEADERS_MAPPING.update( |
| 166 | { | 166 | { |
| 167 | '借贷': BORROW_KEY, | 167 | '借贷': BORROW_KEY, |
| 168 | '借\n贷': BORROW_KEY, | ||
| 168 | '借贷状态': BORROW_KEY, | 169 | '借贷状态': BORROW_KEY, |
| 169 | '收支标志': BORROW_KEY, | 170 | '收支标志': BORROW_KEY, |
| 170 | '收/支': BORROW_KEY, | 171 | '收/支': BORROW_KEY, | ... | ... |
| ... | @@ -40,7 +40,8 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -40,7 +40,8 @@ class Command(BaseCommand, LoggerMixin): |
| 40 | print('excel dir not exists') | 40 | print('excel dir not exists') |
| 41 | return | 41 | return |
| 42 | excel_path = os.path.join(excel_dir, 'bs_{0}.xlsx'.format(date_str)) | 42 | excel_path = os.path.join(excel_dir, 'bs_{0}.xlsx'.format(date_str)) |
| 43 | log_path = os.path.join(conf.LOG_DIR, 'bs.log.{0}'.format(date_str)) | 43 | # log_path = os.path.join(conf.LOG_DIR, 'bs.log.{0}'.format(date_str)) |
| 44 | log_path = os.path.join(conf.LOG_DIR, 'bs_statistics.log.{0}'.format(date_str)) | ||
| 44 | if not os.path.exists(log_path): | 45 | if not os.path.exists(log_path): |
| 45 | print('log_path not exists') | 46 | print('log_path not exists') |
| 46 | return | 47 | return |
| ... | @@ -48,7 +49,8 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -48,7 +49,8 @@ class Command(BaseCommand, LoggerMixin): |
| 48 | summary_dict = {} | 49 | summary_dict = {} |
| 49 | with open(log_path, 'r', encoding='utf-8') as fp: | 50 | with open(log_path, 'r', encoding='utf-8') as fp: |
| 50 | for line in fp: | 51 | for line in fp: |
| 51 | search_obj = re.search(r'task=(.*) merged_bs_summary=(.*)', line) | 52 | # search_obj = re.search(r'task=(.*) merged_bs_summary=(.*)', line) |
| 53 | search_obj = re.search(r'\[task=(.*)] \[bs_summary=(.*)]', line) | ||
| 52 | task_str = search_obj.group(1) | 54 | task_str = search_obj.group(1) |
| 53 | business_type, doc_id_str = task_str.split(consts.SPLIT_STR) | 55 | business_type, doc_id_str = task_str.split(consts.SPLIT_STR) |
| 54 | doc_id = int(doc_id_str) | 56 | doc_id = int(doc_id_str) | ... | ... |
This diff is collapsed.
Click to expand it.
| 1 | import re | ||
| 2 | import os | ||
| 3 | import ast | ||
| 4 | import datetime | ||
| 5 | from openpyxl import Workbook | ||
| 6 | from django.core.management import BaseCommand | ||
| 7 | from settings import conf | ||
| 8 | from common.mixins import LoggerMixin | ||
| 9 | from apps.doc.models import HILDoc, AFCDoc | ||
| 10 | from apps.doc import consts | ||
| 11 | |||
| 12 | |||
| 13 | class Command(BaseCommand, LoggerMixin): | ||
| 14 | |||
| 15 | def __init__(self): | ||
| 16 | super().__init__() | ||
| 17 | self.sheet_name = '身份证' | ||
| 18 | self.header = ('申请号', '身份证号', '民族', '时间戳') | ||
| 19 | |||
| 20 | def add_arguments(self, parser): | ||
| 21 | parser.add_argument( | ||
| 22 | '--date', | ||
| 23 | default=datetime.date.today() - datetime.timedelta(days=1), | ||
| 24 | dest='date', | ||
| 25 | help='将要计算的日期,格式: 2018-01-01' | ||
| 26 | ) | ||
| 27 | |||
| 28 | def handle(self, *args, **kwargs): | ||
| 29 | date = kwargs.get('date') | ||
| 30 | if isinstance(date, str): | ||
| 31 | if not re.match(r'\d{4}-\d{2}-\d{2}', date): | ||
| 32 | print('date format error') | ||
| 33 | return | ||
| 34 | date_str = date | ||
| 35 | else: | ||
| 36 | date_str = date.strftime('%Y-%m-%d') | ||
| 37 | |||
| 38 | afc_excel_dir = os.path.join(conf.DATA_DIR, 'AFC', 'IdCard') | ||
| 39 | hil_excel_dir = os.path.join(conf.DATA_DIR, 'HIL', 'IdCard') | ||
| 40 | if not os.path.exists(afc_excel_dir) or not os.path.exists(hil_excel_dir): | ||
| 41 | print('excel_dir not exist') | ||
| 42 | return | ||
| 43 | |||
| 44 | log_path = os.path.join(conf.LOG_DIR, 'idcard.log.{0}'.format(date_str)) | ||
| 45 | if not os.path.exists(log_path): | ||
| 46 | print('log_path not exists') | ||
| 47 | return | ||
| 48 | |||
| 49 | wb_afc = Workbook() | ||
| 50 | ws_afc = wb_afc.create_sheet(self.sheet_name) | ||
| 51 | ws_afc.append(self.header) | ||
| 52 | wb_afc.remove(wb_afc.get_sheet_by_name('Sheet')) | ||
| 53 | |||
| 54 | wb_hil = Workbook() | ||
| 55 | ws_hil = wb_hil.create_sheet(self.sheet_name) | ||
| 56 | ws_hil.append(self.header) | ||
| 57 | wb_hil.remove(wb_hil.get_sheet_by_name('Sheet')) | ||
| 58 | |||
| 59 | with open(log_path, 'r', encoding='utf-8') as fp: | ||
| 60 | for line in fp: | ||
| 61 | search_obj = re.match(r'\[(.*)] \[task=(.*)] \[idcard=(.*)]', line) | ||
| 62 | idcard_str = search_obj.group(3) | ||
| 63 | idcard_list = ast.literal_eval(idcard_str) | ||
| 64 | content_list = [] | ||
| 65 | for idcard_dict in idcard_list: | ||
| 66 | nation = idcard_dict.get('民族') | ||
| 67 | if nation is None: | ||
| 68 | continue | ||
| 69 | if idcard_dict.get('类别') == '1': | ||
| 70 | continue | ||
| 71 | content_list.append((idcard_dict.get('公民身份号码'), nation)) | ||
| 72 | if len(content_list) == 0: | ||
| 73 | continue | ||
| 74 | |||
| 75 | time_str = search_obj.group(1) | ||
| 76 | task_str = search_obj.group(2) | ||
| 77 | business_type, doc_id_str = task_str.split(consts.SPLIT_STR) | ||
| 78 | doc_class = HILDoc if business_type == consts.HIL_PREFIX else AFCDoc | ||
| 79 | application_id = doc_class.objects.filter(id=int(doc_id_str)).values_list('application_id', flat=True) | ||
| 80 | |||
| 81 | if business_type == consts.HIL_PREFIX: | ||
| 82 | for id_num, nation in content_list: | ||
| 83 | ws_hil.append((application_id[0], id_num, nation, time_str)) | ||
| 84 | else: | ||
| 85 | for id_num, nation in content_list: | ||
| 86 | ws_afc.append((application_id[0], id_num, nation, time_str)) | ||
| 87 | |||
| 88 | afc_excel_path = os.path.join(afc_excel_dir, 'idcard_{0}.xlsx'.format(date_str)) | ||
| 89 | hil_excel_path = os.path.join(hil_excel_dir, 'idcard_{0}.xlsx'.format(date_str)) | ||
| 90 | wb_afc.save(afc_excel_path) | ||
| 91 | wb_hil.save(hil_excel_path) |
| 1 | import os | ||
| 2 | import datetime | ||
| 3 | from calendar import monthrange | ||
| 4 | from openpyxl import Workbook, load_workbook | ||
| 5 | from django.core.management import BaseCommand | ||
| 6 | from settings import conf | ||
| 7 | from common.mixins import LoggerMixin | ||
| 8 | |||
| 9 | |||
| 10 | class Command(BaseCommand, LoggerMixin): | ||
| 11 | |||
| 12 | def __init__(self): | ||
| 13 | super().__init__() | ||
| 14 | self.dirs = ('AFC', 'HIL') | ||
| 15 | |||
| 16 | def handle(self, *args, **kwargs): | ||
| 17 | now_time = datetime.datetime.now() | ||
| 18 | end_day_in_mouth = now_time.replace(day=1) | ||
| 19 | pre_mouth = end_day_in_mouth - datetime.timedelta(days=1) | ||
| 20 | |||
| 21 | for target_dir in self.dirs: | ||
| 22 | excel_dir = os.path.join(conf.DATA_DIR, target_dir, 'IdCard') | ||
| 23 | if not os.path.exists(excel_dir): | ||
| 24 | print('excel dir not exists: {0}'.format(excel_dir)) | ||
| 25 | return | ||
| 26 | |||
| 27 | monthly_wb = Workbook() | ||
| 28 | |||
| 29 | for d in range(1, monthrange(pre_mouth.year, pre_mouth.month)[1] + 1): | ||
| 30 | date_str = '{:04d}-{:02d}-{:02d}'.format(pre_mouth.year, pre_mouth.month, d) | ||
| 31 | daily_excel_path = os.path.join(excel_dir, 'idcard_{0}.xlsx'.format(date_str)) | ||
| 32 | if not os.path.exists(daily_excel_path): | ||
| 33 | print('daily excel path not exists: {0}'.format(daily_excel_path)) | ||
| 34 | continue | ||
| 35 | |||
| 36 | monthly_ws = monthly_wb.create_sheet(date_str) | ||
| 37 | daily_wb = load_workbook(daily_excel_path) | ||
| 38 | daily_ws = daily_wb.get_sheet_by_name('身份证') | ||
| 39 | for row in daily_ws.iter_rows(min_row=1, values_only=True): | ||
| 40 | monthly_ws.append(row) | ||
| 41 | |||
| 42 | monthly_excel_path = os.path.join(excel_dir, 'idcard_{0}.xlsx'.format(pre_mouth.strftime('%Y-%m'))) | ||
| 43 | monthly_wb.remove(monthly_wb.get_sheet_by_name('Sheet')) | ||
| 44 | monthly_wb.save(monthly_excel_path) |
| ... | @@ -14,7 +14,6 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -14,7 +14,6 @@ class Command(BaseCommand, LoggerMixin): |
| 14 | 14 | ||
| 15 | def __init__(self): | 15 | def __init__(self): |
| 16 | super().__init__() | 16 | super().__init__() |
| 17 | self.log_base = '[license statistics]' | ||
| 18 | self.header_map = { | 17 | self.header_map = { |
| 19 | consts.MVI_CLASSIFY: [('申请ID', '发票代码', '发票号码', '开票日期', '不含税价', '发票联', '购买方名称', | 18 | consts.MVI_CLASSIFY: [('申请ID', '发票代码', '发票号码', '开票日期', '不含税价', '发票联', '购买方名称', |
| 20 | '购买方证件号码', '纳税人识别号', '车架号', '价税合计小写', '销货单位名称', '增值税税额', | 19 | '购买方证件号码', '纳税人识别号', '车架号', '价税合计小写', '销货单位名称', '增值税税额', |
| ... | @@ -75,7 +74,8 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -75,7 +74,8 @@ class Command(BaseCommand, LoggerMixin): |
| 75 | print('excel dir not exists') | 74 | print('excel dir not exists') |
| 76 | return | 75 | return |
| 77 | excel_path = os.path.join(excel_dir, 'license_{0}.xlsx'.format(date_str)) | 76 | excel_path = os.path.join(excel_dir, 'license_{0}.xlsx'.format(date_str)) |
| 78 | log_path = os.path.join(conf.LOG_DIR, 'license.log.{0}'.format(date_str)) | 77 | # log_path = os.path.join(conf.LOG_DIR, 'license.log.{0}'.format(date_str)) |
| 78 | log_path = os.path.join(conf.LOG_DIR, 'license_statistics.log.{0}'.format(date_str)) | ||
| 79 | if not os.path.exists(log_path): | 79 | if not os.path.exists(log_path): |
| 80 | print('log_path not exists') | 80 | print('log_path not exists') |
| 81 | return | 81 | return |
| ... | @@ -92,7 +92,8 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -92,7 +92,8 @@ class Command(BaseCommand, LoggerMixin): |
| 92 | 92 | ||
| 93 | with open(log_path, 'r', encoding='utf-8') as fp: | 93 | with open(log_path, 'r', encoding='utf-8') as fp: |
| 94 | for line in fp: | 94 | for line in fp: |
| 95 | search_obj = re.search(r'task=(.*) license_summary=(.*)', line) | 95 | # search_obj = re.search(r'task=(.*) license_summary=(.*)', line) |
| 96 | search_obj = re.search(r'\[task=(.*)] \[license_summary=(.*)]', line) | ||
| 96 | task_str = search_obj.group(1) | 97 | task_str = search_obj.group(1) |
| 97 | license_summary = ast.literal_eval(search_obj.group(2)) | 98 | license_summary = ast.literal_eval(search_obj.group(2)) |
| 98 | business_type, doc_id_str = task_str.split(consts.SPLIT_STR) | 99 | business_type, doc_id_str = task_str.split(consts.SPLIT_STR) | ... | ... |
| ... | @@ -689,8 +689,15 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -689,8 +689,15 @@ class Command(BaseCommand, LoggerMixin): |
| 689 | '[license_summary={4}]'.format(self.log_base, task_str, bs_summary, | 689 | '[license_summary={4}]'.format(self.log_base, task_str, bs_summary, |
| 690 | unknown_summary, license_summary)) | 690 | unknown_summary, license_summary)) |
| 691 | 691 | ||
| 692 | self.license_log.info('[task={0}] [license_summary={1}]'.format(task_str, license_summary)) | ||
| 693 | idcard_list = license_summary.get(consts.IC_CLASSIFY) | ||
| 694 | if idcard_list: | ||
| 695 | self.idcard_log.info('[task={0}] [idcard={1}]'.format(task_str, idcard_list)) | ||
| 696 | |||
| 692 | merged_bs_summary = self.rebuild_bs_summary(bs_summary, unknown_summary) | 697 | merged_bs_summary = self.rebuild_bs_summary(bs_summary, unknown_summary) |
| 693 | 698 | ||
| 699 | self.bs_log.info('[task={0}] [bs_summary={1}]'.format(task_str, merged_bs_summary)) | ||
| 700 | |||
| 694 | self.cronjob_log.info('{0} [task={1}] [merged_bs_summary={2}] [unknown_summary={3}] ' | 701 | self.cronjob_log.info('{0} [task={1}] [merged_bs_summary={2}] [unknown_summary={3}] ' |
| 695 | '[res_list={4}]'.format(self.log_base, task_str, merged_bs_summary, | 702 | '[res_list={4}]'.format(self.log_base, task_str, merged_bs_summary, |
| 696 | unknown_summary, res_list)) | 703 | unknown_summary, res_list)) | ... | ... |
| ... | @@ -40,6 +40,9 @@ class LoggerMixin: | ... | @@ -40,6 +40,9 @@ class LoggerMixin: |
| 40 | exception_log = logging.getLogger('exception') | 40 | exception_log = logging.getLogger('exception') |
| 41 | cronjob_log = logging.getLogger('cronjob') | 41 | cronjob_log = logging.getLogger('cronjob') |
| 42 | folder_log = logging.getLogger('folder') | 42 | folder_log = logging.getLogger('folder') |
| 43 | bs_log = logging.getLogger('bs') | ||
| 44 | license_log = logging.getLogger('license') | ||
| 45 | idcard_log = logging.getLogger('idcard') | ||
| 43 | 46 | ||
| 44 | 47 | ||
| 45 | class GenericView(LoggerMixin, GenericExceptionMixin, GenericAPIView): | 48 | class GenericView(LoggerMixin, GenericExceptionMixin, GenericAPIView): | ... | ... |
| ... | @@ -84,9 +84,15 @@ class PDFHandler: | ... | @@ -84,9 +84,15 @@ class PDFHandler: |
| 84 | def extract_single_image(self, pdf, xref, smask, colorspace, pno, img_index=0): | 84 | def extract_single_image(self, pdf, xref, smask, colorspace, pno, img_index=0): |
| 85 | pix = self.recover_pix(pdf, xref, smask, colorspace) | 85 | pix = self.recover_pix(pdf, xref, smask, colorspace) |
| 86 | ext, img_data = self.get_img_data(pix) | 86 | ext, img_data = self.get_img_data(pix) |
| 87 | img_save_path = self.get_img_save_path(pno, img_index=img_index, ext=ext) | 87 | if ext == 'jpx': |
| 88 | with open(img_save_path, "wb") as f: | 88 | img_save_path = self.get_img_save_path(pno, img_index=img_index, ext='jpeg') |
| 89 | f.write(img_data) | 89 | jpx_pix = fitz.Pixmap(img_data) |
| 90 | jpx_pix.writeImage(img_save_path) | ||
| 91 | jpx_pix = None | ||
| 92 | else: | ||
| 93 | img_save_path = self.get_img_save_path(pno, img_index=img_index, ext=ext) | ||
| 94 | with open(img_save_path, "wb") as f: | ||
| 95 | f.write(img_data) | ||
| 90 | self.xref_set.add(xref) | 96 | self.xref_set.add(xref) |
| 91 | self.img_path_list.append(img_save_path) | 97 | self.img_path_list.append(img_save_path) |
| 92 | 98 | ... | ... |
| 1 | [loggers] | 1 | [loggers] |
| 2 | keys=root, running, exception, cronjob, folder, django.db.backends | 2 | keys=root, running, exception, cronjob, folder, bs, license, idcard, django.db.backends |
| 3 | 3 | ||
| 4 | [handlers] | 4 | [handlers] |
| 5 | keys=consoleHandler, django_rotateFileHandler, exceptionFileHandler, cronjobFileHandler, folderFileHandler, djangodbFileHandler | 5 | keys=consoleHandler, django_rotateFileHandler, exceptionFileHandler, cronjobFileHandler, folderFileHandler, bsFileHandler, licenseFileHandler, idcardFileHandler, djangodbFileHandler |
| 6 | 6 | ||
| 7 | [formatters] | 7 | [formatters] |
| 8 | keys=SituFormatter, dataLogFormatter | 8 | keys=SituFormatter, dataLogFormatter, SimpleFormatter |
| 9 | 9 | ||
| 10 | [formatter_SituFormatter] | 10 | [formatter_SituFormatter] |
| 11 | format=[%(asctime)s] [%(process)d] [%(thread)d] [%(threadName)s] [%(filename)s:%(lineno)d] %(levelname)s %(message)s | 11 | format=[%(asctime)s] [%(process)d] [%(thread)d] [%(threadName)s] [%(filename)s:%(lineno)d] %(levelname)s %(message)s |
| ... | @@ -15,6 +15,10 @@ datefmt= | ... | @@ -15,6 +15,10 @@ datefmt= |
| 15 | class=situlogger.JsonFormatter | 15 | class=situlogger.JsonFormatter |
| 16 | format=%(asctime)s %(levelname)s %(funcName)s | 16 | format=%(asctime)s %(levelname)s %(funcName)s |
| 17 | 17 | ||
| 18 | [formatter_SimpleFormatter] | ||
| 19 | format=[%(asctime)s] %(message)s | ||
| 20 | datefmt= | ||
| 21 | |||
| 18 | [handler_consoleHandler] | 22 | [handler_consoleHandler] |
| 19 | class=StreamHandler | 23 | class=StreamHandler |
| 20 | level=ERROR | 24 | level=ERROR |
| ... | @@ -45,6 +49,24 @@ level=DEBUG | ... | @@ -45,6 +49,24 @@ level=DEBUG |
| 45 | formatter=SituFormatter | 49 | formatter=SituFormatter |
| 46 | args=('../logs/folder_ocr.log',) | 50 | args=('../logs/folder_ocr.log',) |
| 47 | 51 | ||
| 52 | [handler_bsFileHandler] | ||
| 53 | class=situlogger.SituRotatingFileHandler | ||
| 54 | level=DEBUG | ||
| 55 | formatter=SimpleFormatter | ||
| 56 | args=('../logs/bs_statistics.log',) | ||
| 57 | |||
| 58 | [handler_licenseFileHandler] | ||
| 59 | class=situlogger.SituRotatingFileHandler | ||
| 60 | level=DEBUG | ||
| 61 | formatter=SimpleFormatter | ||
| 62 | args=('../logs/license_statistics.log',) | ||
| 63 | |||
| 64 | [handler_idcardFileHandler] | ||
| 65 | class=situlogger.SituRotatingFileHandler | ||
| 66 | level=DEBUG | ||
| 67 | formatter=SimpleFormatter | ||
| 68 | args=('../logs/idcard.log',) | ||
| 69 | |||
| 48 | [handler_djangodbFileHandler] | 70 | [handler_djangodbFileHandler] |
| 49 | class=situlogger.SituRotatingFileHandler | 71 | class=situlogger.SituRotatingFileHandler |
| 50 | level=DEBUG | 72 | level=DEBUG |
| ... | @@ -79,6 +101,24 @@ handlers=folderFileHandler | ... | @@ -79,6 +101,24 @@ handlers=folderFileHandler |
| 79 | qualname=folder | 101 | qualname=folder |
| 80 | propagate=0 | 102 | propagate=0 |
| 81 | 103 | ||
| 104 | [logger_bs] | ||
| 105 | level=INFO | ||
| 106 | handlers=bsFileHandler | ||
| 107 | qualname=bs | ||
| 108 | propagate=0 | ||
| 109 | |||
| 110 | [logger_license] | ||
| 111 | level=INFO | ||
| 112 | handlers=licenseFileHandler | ||
| 113 | qualname=license | ||
| 114 | propagate=0 | ||
| 115 | |||
| 116 | [logger_idcard] | ||
| 117 | level=INFO | ||
| 118 | handlers=idcardFileHandler | ||
| 119 | qualname=idcard | ||
| 120 | propagate=0 | ||
| 121 | |||
| 82 | [logger_django.db.backends] | 122 | [logger_django.db.backends] |
| 83 | level=DEBUG | 123 | level=DEBUG |
| 84 | handlers=djangodbFileHandler | 124 | handlers=djangodbFileHandler | ... | ... |
-
Please register or sign in to post a comment