Merge branch 'feature/main' into feature/mssql
Showing
10 changed files
with
204 additions
and
9 deletions
... | @@ -152,7 +152,7 @@ RESULT_IDX = FIXED_HEADERS.index('核对结果') | ... | @@ -152,7 +152,7 @@ RESULT_IDX = FIXED_HEADERS.index('核对结果') |
152 | # '借贷': ('贷', '借'), # 竖版-无表格-广发银行 | 152 | # '借贷': ('贷', '借'), # 竖版-无表格-广发银行 |
153 | # '借贷状态': ('贷', '借'), # 竖版-特殊-交通银行 | 153 | # '借贷状态': ('贷', '借'), # 竖版-特殊-交通银行 |
154 | # '收/支': ('收入', '支出'), # 横版-表格-北京银行 | 154 | # '收/支': ('收入', '支出'), # 横版-表格-北京银行 |
155 | BORROW_HEADERS_SET = {'借贷', '借贷状态', '收/支', '收支标志'} | 155 | BORROW_HEADERS_SET = {'借贷', '借\n贷', '借贷状态', '收/支', '收支标志'} |
156 | BORROW_INCOME_SET = {'贷', '收入', '收', '收(Cr)'} | 156 | BORROW_INCOME_SET = {'贷', '收入', '收', '收(Cr)'} |
157 | BORROW_OUTLAY_SET = {'借', '支出', '支', '付(Dr)'} | 157 | BORROW_OUTLAY_SET = {'借', '支出', '支', '付(Dr)'} |
158 | INCOME_HEADERS_SET = {'收入金额', '收入', '存入', '存入金额(贷)', '存入金额(贷)'} | 158 | INCOME_HEADERS_SET = {'收入金额', '收入', '存入', '存入金额(贷)', '存入金额(贷)'} |
... | @@ -165,6 +165,7 @@ HEADERS_MAPPING = {} | ... | @@ -165,6 +165,7 @@ HEADERS_MAPPING = {} |
165 | HEADERS_MAPPING.update( | 165 | HEADERS_MAPPING.update( |
166 | { | 166 | { |
167 | '借贷': BORROW_KEY, | 167 | '借贷': BORROW_KEY, |
168 | '借\n贷': BORROW_KEY, | ||
168 | '借贷状态': BORROW_KEY, | 169 | '借贷状态': BORROW_KEY, |
169 | '收支标志': BORROW_KEY, | 170 | '收支标志': BORROW_KEY, |
170 | '收/支': BORROW_KEY, | 171 | '收/支': BORROW_KEY, | ... | ... |
... | @@ -40,7 +40,8 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -40,7 +40,8 @@ class Command(BaseCommand, LoggerMixin): |
40 | print('excel dir not exists') | 40 | print('excel dir not exists') |
41 | return | 41 | return |
42 | excel_path = os.path.join(excel_dir, 'bs_{0}.xlsx'.format(date_str)) | 42 | excel_path = os.path.join(excel_dir, 'bs_{0}.xlsx'.format(date_str)) |
43 | log_path = os.path.join(conf.LOG_DIR, 'bs.log.{0}'.format(date_str)) | 43 | # log_path = os.path.join(conf.LOG_DIR, 'bs.log.{0}'.format(date_str)) |
44 | log_path = os.path.join(conf.LOG_DIR, 'bs_statistics.log.{0}'.format(date_str)) | ||
44 | if not os.path.exists(log_path): | 45 | if not os.path.exists(log_path): |
45 | print('log_path not exists') | 46 | print('log_path not exists') |
46 | return | 47 | return |
... | @@ -48,7 +49,8 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -48,7 +49,8 @@ class Command(BaseCommand, LoggerMixin): |
48 | summary_dict = {} | 49 | summary_dict = {} |
49 | with open(log_path, 'r', encoding='utf-8') as fp: | 50 | with open(log_path, 'r', encoding='utf-8') as fp: |
50 | for line in fp: | 51 | for line in fp: |
51 | search_obj = re.search(r'task=(.*) merged_bs_summary=(.*)', line) | 52 | # search_obj = re.search(r'task=(.*) merged_bs_summary=(.*)', line) |
53 | search_obj = re.search(r'\[task=(.*)] \[bs_summary=(.*)]', line) | ||
52 | task_str = search_obj.group(1) | 54 | task_str = search_obj.group(1) |
53 | business_type, doc_id_str = task_str.split(consts.SPLIT_STR) | 55 | business_type, doc_id_str = task_str.split(consts.SPLIT_STR) |
54 | doc_id = int(doc_id_str) | 56 | doc_id = int(doc_id_str) | ... | ... |
This diff is collapsed.
Click to expand it.
1 | import re | ||
2 | import os | ||
3 | import ast | ||
4 | import datetime | ||
5 | from openpyxl import Workbook | ||
6 | from django.core.management import BaseCommand | ||
7 | from settings import conf | ||
8 | from common.mixins import LoggerMixin | ||
9 | from apps.doc.models import HILDoc, AFCDoc | ||
10 | from apps.doc import consts | ||
11 | |||
12 | |||
13 | class Command(BaseCommand, LoggerMixin): | ||
14 | |||
15 | def __init__(self): | ||
16 | super().__init__() | ||
17 | self.sheet_name = '身份证' | ||
18 | self.header = ('申请号', '身份证号', '民族', '时间戳') | ||
19 | |||
20 | def add_arguments(self, parser): | ||
21 | parser.add_argument( | ||
22 | '--date', | ||
23 | default=datetime.date.today() - datetime.timedelta(days=1), | ||
24 | dest='date', | ||
25 | help='将要计算的日期,格式: 2018-01-01' | ||
26 | ) | ||
27 | |||
28 | def handle(self, *args, **kwargs): | ||
29 | date = kwargs.get('date') | ||
30 | if isinstance(date, str): | ||
31 | if not re.match(r'\d{4}-\d{2}-\d{2}', date): | ||
32 | print('date format error') | ||
33 | return | ||
34 | date_str = date | ||
35 | else: | ||
36 | date_str = date.strftime('%Y-%m-%d') | ||
37 | |||
38 | afc_excel_dir = os.path.join(conf.DATA_DIR, 'AFC', 'IdCard') | ||
39 | hil_excel_dir = os.path.join(conf.DATA_DIR, 'HIL', 'IdCard') | ||
40 | if not os.path.exists(afc_excel_dir) or not os.path.exists(hil_excel_dir): | ||
41 | print('excel_dir not exist') | ||
42 | return | ||
43 | |||
44 | log_path = os.path.join(conf.LOG_DIR, 'idcard.log.{0}'.format(date_str)) | ||
45 | if not os.path.exists(log_path): | ||
46 | print('log_path not exists') | ||
47 | return | ||
48 | |||
49 | wb_afc = Workbook() | ||
50 | ws_afc = wb_afc.create_sheet(self.sheet_name) | ||
51 | ws_afc.append(self.header) | ||
52 | wb_afc.remove(wb_afc.get_sheet_by_name('Sheet')) | ||
53 | |||
54 | wb_hil = Workbook() | ||
55 | ws_hil = wb_hil.create_sheet(self.sheet_name) | ||
56 | ws_hil.append(self.header) | ||
57 | wb_hil.remove(wb_hil.get_sheet_by_name('Sheet')) | ||
58 | |||
59 | with open(log_path, 'r', encoding='utf-8') as fp: | ||
60 | for line in fp: | ||
61 | search_obj = re.match(r'\[(.*)] \[task=(.*)] \[idcard=(.*)]', line) | ||
62 | idcard_str = search_obj.group(3) | ||
63 | idcard_list = ast.literal_eval(idcard_str) | ||
64 | content_list = [] | ||
65 | for idcard_dict in idcard_list: | ||
66 | nation = idcard_dict.get('民族') | ||
67 | if nation is None: | ||
68 | continue | ||
69 | if idcard_dict.get('类别') == '1': | ||
70 | continue | ||
71 | content_list.append((idcard_dict.get('公民身份号码'), nation)) | ||
72 | if len(content_list) == 0: | ||
73 | continue | ||
74 | |||
75 | time_str = search_obj.group(1) | ||
76 | task_str = search_obj.group(2) | ||
77 | business_type, doc_id_str = task_str.split(consts.SPLIT_STR) | ||
78 | doc_class = HILDoc if business_type == consts.HIL_PREFIX else AFCDoc | ||
79 | application_id = doc_class.objects.filter(id=int(doc_id_str)).values_list('application_id', flat=True) | ||
80 | |||
81 | if business_type == consts.HIL_PREFIX: | ||
82 | for id_num, nation in content_list: | ||
83 | ws_hil.append((application_id[0], id_num, nation, time_str)) | ||
84 | else: | ||
85 | for id_num, nation in content_list: | ||
86 | ws_afc.append((application_id[0], id_num, nation, time_str)) | ||
87 | |||
88 | afc_excel_path = os.path.join(afc_excel_dir, 'idcard_{0}.xlsx'.format(date_str)) | ||
89 | hil_excel_path = os.path.join(hil_excel_dir, 'idcard_{0}.xlsx'.format(date_str)) | ||
90 | wb_afc.save(afc_excel_path) | ||
91 | wb_hil.save(hil_excel_path) |
1 | import os | ||
2 | import datetime | ||
3 | from calendar import monthrange | ||
4 | from openpyxl import Workbook, load_workbook | ||
5 | from django.core.management import BaseCommand | ||
6 | from settings import conf | ||
7 | from common.mixins import LoggerMixin | ||
8 | |||
9 | |||
10 | class Command(BaseCommand, LoggerMixin): | ||
11 | |||
12 | def __init__(self): | ||
13 | super().__init__() | ||
14 | self.dirs = ('AFC', 'HIL') | ||
15 | |||
16 | def handle(self, *args, **kwargs): | ||
17 | now_time = datetime.datetime.now() | ||
18 | end_day_in_mouth = now_time.replace(day=1) | ||
19 | pre_mouth = end_day_in_mouth - datetime.timedelta(days=1) | ||
20 | |||
21 | for target_dir in self.dirs: | ||
22 | excel_dir = os.path.join(conf.DATA_DIR, target_dir, 'IdCard') | ||
23 | if not os.path.exists(excel_dir): | ||
24 | print('excel dir not exists: {0}'.format(excel_dir)) | ||
25 | return | ||
26 | |||
27 | monthly_wb = Workbook() | ||
28 | |||
29 | for d in range(1, monthrange(pre_mouth.year, pre_mouth.month)[1] + 1): | ||
30 | date_str = '{:04d}-{:02d}-{:02d}'.format(pre_mouth.year, pre_mouth.month, d) | ||
31 | daily_excel_path = os.path.join(excel_dir, 'idcard_{0}.xlsx'.format(date_str)) | ||
32 | if not os.path.exists(daily_excel_path): | ||
33 | print('daily excel path not exists: {0}'.format(daily_excel_path)) | ||
34 | continue | ||
35 | |||
36 | monthly_ws = monthly_wb.create_sheet(date_str) | ||
37 | daily_wb = load_workbook(daily_excel_path) | ||
38 | daily_ws = daily_wb.get_sheet_by_name('身份证') | ||
39 | for row in daily_ws.iter_rows(min_row=1, values_only=True): | ||
40 | monthly_ws.append(row) | ||
41 | |||
42 | monthly_excel_path = os.path.join(excel_dir, 'idcard_{0}.xlsx'.format(pre_mouth.strftime('%Y-%m'))) | ||
43 | monthly_wb.remove(monthly_wb.get_sheet_by_name('Sheet')) | ||
44 | monthly_wb.save(monthly_excel_path) |
... | @@ -14,7 +14,6 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -14,7 +14,6 @@ class Command(BaseCommand, LoggerMixin): |
14 | 14 | ||
15 | def __init__(self): | 15 | def __init__(self): |
16 | super().__init__() | 16 | super().__init__() |
17 | self.log_base = '[license statistics]' | ||
18 | self.header_map = { | 17 | self.header_map = { |
19 | consts.MVI_CLASSIFY: [('申请ID', '发票代码', '发票号码', '开票日期', '不含税价', '发票联', '购买方名称', | 18 | consts.MVI_CLASSIFY: [('申请ID', '发票代码', '发票号码', '开票日期', '不含税价', '发票联', '购买方名称', |
20 | '购买方证件号码', '纳税人识别号', '车架号', '价税合计小写', '销货单位名称', '增值税税额', | 19 | '购买方证件号码', '纳税人识别号', '车架号', '价税合计小写', '销货单位名称', '增值税税额', |
... | @@ -75,7 +74,8 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -75,7 +74,8 @@ class Command(BaseCommand, LoggerMixin): |
75 | print('excel dir not exists') | 74 | print('excel dir not exists') |
76 | return | 75 | return |
77 | excel_path = os.path.join(excel_dir, 'license_{0}.xlsx'.format(date_str)) | 76 | excel_path = os.path.join(excel_dir, 'license_{0}.xlsx'.format(date_str)) |
78 | log_path = os.path.join(conf.LOG_DIR, 'license.log.{0}'.format(date_str)) | 77 | # log_path = os.path.join(conf.LOG_DIR, 'license.log.{0}'.format(date_str)) |
78 | log_path = os.path.join(conf.LOG_DIR, 'license_statistics.log.{0}'.format(date_str)) | ||
79 | if not os.path.exists(log_path): | 79 | if not os.path.exists(log_path): |
80 | print('log_path not exists') | 80 | print('log_path not exists') |
81 | return | 81 | return |
... | @@ -92,7 +92,8 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -92,7 +92,8 @@ class Command(BaseCommand, LoggerMixin): |
92 | 92 | ||
93 | with open(log_path, 'r', encoding='utf-8') as fp: | 93 | with open(log_path, 'r', encoding='utf-8') as fp: |
94 | for line in fp: | 94 | for line in fp: |
95 | search_obj = re.search(r'task=(.*) license_summary=(.*)', line) | 95 | # search_obj = re.search(r'task=(.*) license_summary=(.*)', line) |
96 | search_obj = re.search(r'\[task=(.*)] \[license_summary=(.*)]', line) | ||
96 | task_str = search_obj.group(1) | 97 | task_str = search_obj.group(1) |
97 | license_summary = ast.literal_eval(search_obj.group(2)) | 98 | license_summary = ast.literal_eval(search_obj.group(2)) |
98 | business_type, doc_id_str = task_str.split(consts.SPLIT_STR) | 99 | business_type, doc_id_str = task_str.split(consts.SPLIT_STR) | ... | ... |
... | @@ -689,8 +689,15 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -689,8 +689,15 @@ class Command(BaseCommand, LoggerMixin): |
689 | '[license_summary={4}]'.format(self.log_base, task_str, bs_summary, | 689 | '[license_summary={4}]'.format(self.log_base, task_str, bs_summary, |
690 | unknown_summary, license_summary)) | 690 | unknown_summary, license_summary)) |
691 | 691 | ||
692 | self.license_log.info('[task={0}] [license_summary={1}]'.format(task_str, license_summary)) | ||
693 | idcard_list = license_summary.get(consts.IC_CLASSIFY) | ||
694 | if idcard_list: | ||
695 | self.idcard_log.info('[task={0}] [idcard={1}]'.format(task_str, idcard_list)) | ||
696 | |||
692 | merged_bs_summary = self.rebuild_bs_summary(bs_summary, unknown_summary) | 697 | merged_bs_summary = self.rebuild_bs_summary(bs_summary, unknown_summary) |
693 | 698 | ||
699 | self.bs_log.info('[task={0}] [bs_summary={1}]'.format(task_str, merged_bs_summary)) | ||
700 | |||
694 | self.cronjob_log.info('{0} [task={1}] [merged_bs_summary={2}] [unknown_summary={3}] ' | 701 | self.cronjob_log.info('{0} [task={1}] [merged_bs_summary={2}] [unknown_summary={3}] ' |
695 | '[res_list={4}]'.format(self.log_base, task_str, merged_bs_summary, | 702 | '[res_list={4}]'.format(self.log_base, task_str, merged_bs_summary, |
696 | unknown_summary, res_list)) | 703 | unknown_summary, res_list)) | ... | ... |
... | @@ -40,6 +40,9 @@ class LoggerMixin: | ... | @@ -40,6 +40,9 @@ class LoggerMixin: |
40 | exception_log = logging.getLogger('exception') | 40 | exception_log = logging.getLogger('exception') |
41 | cronjob_log = logging.getLogger('cronjob') | 41 | cronjob_log = logging.getLogger('cronjob') |
42 | folder_log = logging.getLogger('folder') | 42 | folder_log = logging.getLogger('folder') |
43 | bs_log = logging.getLogger('bs') | ||
44 | license_log = logging.getLogger('license') | ||
45 | idcard_log = logging.getLogger('idcard') | ||
43 | 46 | ||
44 | 47 | ||
45 | class GenericView(LoggerMixin, GenericExceptionMixin, GenericAPIView): | 48 | class GenericView(LoggerMixin, GenericExceptionMixin, GenericAPIView): | ... | ... |
... | @@ -84,6 +84,12 @@ class PDFHandler: | ... | @@ -84,6 +84,12 @@ class PDFHandler: |
84 | def extract_single_image(self, pdf, xref, smask, colorspace, pno, img_index=0): | 84 | def extract_single_image(self, pdf, xref, smask, colorspace, pno, img_index=0): |
85 | pix = self.recover_pix(pdf, xref, smask, colorspace) | 85 | pix = self.recover_pix(pdf, xref, smask, colorspace) |
86 | ext, img_data = self.get_img_data(pix) | 86 | ext, img_data = self.get_img_data(pix) |
87 | if ext == 'jpx': | ||
88 | img_save_path = self.get_img_save_path(pno, img_index=img_index, ext='jpeg') | ||
89 | jpx_pix = fitz.Pixmap(img_data) | ||
90 | jpx_pix.writeImage(img_save_path) | ||
91 | jpx_pix = None | ||
92 | else: | ||
87 | img_save_path = self.get_img_save_path(pno, img_index=img_index, ext=ext) | 93 | img_save_path = self.get_img_save_path(pno, img_index=img_index, ext=ext) |
88 | with open(img_save_path, "wb") as f: | 94 | with open(img_save_path, "wb") as f: |
89 | f.write(img_data) | 95 | f.write(img_data) | ... | ... |
1 | [loggers] | 1 | [loggers] |
2 | keys=root, running, exception, cronjob, folder, django.db.backends | 2 | keys=root, running, exception, cronjob, folder, bs, license, idcard, django.db.backends |
3 | 3 | ||
4 | [handlers] | 4 | [handlers] |
5 | keys=consoleHandler, django_rotateFileHandler, exceptionFileHandler, cronjobFileHandler, folderFileHandler, djangodbFileHandler | 5 | keys=consoleHandler, django_rotateFileHandler, exceptionFileHandler, cronjobFileHandler, folderFileHandler, bsFileHandler, licenseFileHandler, idcardFileHandler, djangodbFileHandler |
6 | 6 | ||
7 | [formatters] | 7 | [formatters] |
8 | keys=SituFormatter, dataLogFormatter | 8 | keys=SituFormatter, dataLogFormatter, SimpleFormatter |
9 | 9 | ||
10 | [formatter_SituFormatter] | 10 | [formatter_SituFormatter] |
11 | format=[%(asctime)s] [%(process)d] [%(thread)d] [%(threadName)s] [%(filename)s:%(lineno)d] %(levelname)s %(message)s | 11 | format=[%(asctime)s] [%(process)d] [%(thread)d] [%(threadName)s] [%(filename)s:%(lineno)d] %(levelname)s %(message)s |
... | @@ -15,6 +15,10 @@ datefmt= | ... | @@ -15,6 +15,10 @@ datefmt= |
15 | class=situlogger.JsonFormatter | 15 | class=situlogger.JsonFormatter |
16 | format=%(asctime)s %(levelname)s %(funcName)s | 16 | format=%(asctime)s %(levelname)s %(funcName)s |
17 | 17 | ||
18 | [formatter_SimpleFormatter] | ||
19 | format=[%(asctime)s] %(message)s | ||
20 | datefmt= | ||
21 | |||
18 | [handler_consoleHandler] | 22 | [handler_consoleHandler] |
19 | class=StreamHandler | 23 | class=StreamHandler |
20 | level=ERROR | 24 | level=ERROR |
... | @@ -45,6 +49,24 @@ level=DEBUG | ... | @@ -45,6 +49,24 @@ level=DEBUG |
45 | formatter=SituFormatter | 49 | formatter=SituFormatter |
46 | args=('../logs/folder_ocr.log',) | 50 | args=('../logs/folder_ocr.log',) |
47 | 51 | ||
52 | [handler_bsFileHandler] | ||
53 | class=situlogger.SituRotatingFileHandler | ||
54 | level=DEBUG | ||
55 | formatter=SimpleFormatter | ||
56 | args=('../logs/bs_statistics.log',) | ||
57 | |||
58 | [handler_licenseFileHandler] | ||
59 | class=situlogger.SituRotatingFileHandler | ||
60 | level=DEBUG | ||
61 | formatter=SimpleFormatter | ||
62 | args=('../logs/license_statistics.log',) | ||
63 | |||
64 | [handler_idcardFileHandler] | ||
65 | class=situlogger.SituRotatingFileHandler | ||
66 | level=DEBUG | ||
67 | formatter=SimpleFormatter | ||
68 | args=('../logs/idcard.log',) | ||
69 | |||
48 | [handler_djangodbFileHandler] | 70 | [handler_djangodbFileHandler] |
49 | class=situlogger.SituRotatingFileHandler | 71 | class=situlogger.SituRotatingFileHandler |
50 | level=DEBUG | 72 | level=DEBUG |
... | @@ -79,6 +101,24 @@ handlers=folderFileHandler | ... | @@ -79,6 +101,24 @@ handlers=folderFileHandler |
79 | qualname=folder | 101 | qualname=folder |
80 | propagate=0 | 102 | propagate=0 |
81 | 103 | ||
104 | [logger_bs] | ||
105 | level=INFO | ||
106 | handlers=bsFileHandler | ||
107 | qualname=bs | ||
108 | propagate=0 | ||
109 | |||
110 | [logger_license] | ||
111 | level=INFO | ||
112 | handlers=licenseFileHandler | ||
113 | qualname=license | ||
114 | propagate=0 | ||
115 | |||
116 | [logger_idcard] | ||
117 | level=INFO | ||
118 | handlers=idcardFileHandler | ||
119 | qualname=idcard | ||
120 | propagate=0 | ||
121 | |||
82 | [logger_django.db.backends] | 122 | [logger_django.db.backends] |
83 | level=DEBUG | 123 | level=DEBUG |
84 | handlers=djangodbFileHandler | 124 | handlers=djangodbFileHandler | ... | ... |
-
Please register or sign in to post a comment