15dccc97 by 周伟奇

Merge branch 'feature/main' into feature/mssql

2 parents 236b64e0 5e463cbd
...@@ -152,7 +152,7 @@ RESULT_IDX = FIXED_HEADERS.index('核对结果') ...@@ -152,7 +152,7 @@ RESULT_IDX = FIXED_HEADERS.index('核对结果')
152 # '借贷': ('贷', '借'), # 竖版-无表格-广发银行 152 # '借贷': ('贷', '借'), # 竖版-无表格-广发银行
153 # '借贷状态': ('贷', '借'), # 竖版-特殊-交通银行 153 # '借贷状态': ('贷', '借'), # 竖版-特殊-交通银行
154 # '收/支': ('收入', '支出'), # 横版-表格-北京银行 154 # '收/支': ('收入', '支出'), # 横版-表格-北京银行
155 BORROW_HEADERS_SET = {'借贷', '借贷状态', '收/支', '收支标志'} 155 BORROW_HEADERS_SET = {'借贷', '借\n贷', '借贷状态', '收/支', '收支标志'}
156 BORROW_INCOME_SET = {'贷', '收入', '收', '收(Cr)'} 156 BORROW_INCOME_SET = {'贷', '收入', '收', '收(Cr)'}
157 BORROW_OUTLAY_SET = {'借', '支出', '支', '付(Dr)'} 157 BORROW_OUTLAY_SET = {'借', '支出', '支', '付(Dr)'}
158 INCOME_HEADERS_SET = {'收入金额', '收入', '存入', '存入金额(贷)', '存入金额(贷)'} 158 INCOME_HEADERS_SET = {'收入金额', '收入', '存入', '存入金额(贷)', '存入金额(贷)'}
...@@ -165,6 +165,7 @@ HEADERS_MAPPING = {} ...@@ -165,6 +165,7 @@ HEADERS_MAPPING = {}
165 HEADERS_MAPPING.update( 165 HEADERS_MAPPING.update(
166 { 166 {
167 '借贷': BORROW_KEY, 167 '借贷': BORROW_KEY,
168 '借\n贷': BORROW_KEY,
168 '借贷状态': BORROW_KEY, 169 '借贷状态': BORROW_KEY,
169 '收支标志': BORROW_KEY, 170 '收支标志': BORROW_KEY,
170 '收/支': BORROW_KEY, 171 '收/支': BORROW_KEY,
......
...@@ -40,7 +40,8 @@ class Command(BaseCommand, LoggerMixin): ...@@ -40,7 +40,8 @@ class Command(BaseCommand, LoggerMixin):
40 print('excel dir not exists') 40 print('excel dir not exists')
41 return 41 return
42 excel_path = os.path.join(excel_dir, 'bs_{0}.xlsx'.format(date_str)) 42 excel_path = os.path.join(excel_dir, 'bs_{0}.xlsx'.format(date_str))
43 log_path = os.path.join(conf.LOG_DIR, 'bs.log.{0}'.format(date_str)) 43 # log_path = os.path.join(conf.LOG_DIR, 'bs.log.{0}'.format(date_str))
44 log_path = os.path.join(conf.LOG_DIR, 'bs_statistics.log.{0}'.format(date_str))
44 if not os.path.exists(log_path): 45 if not os.path.exists(log_path):
45 print('log_path not exists') 46 print('log_path not exists')
46 return 47 return
...@@ -48,7 +49,8 @@ class Command(BaseCommand, LoggerMixin): ...@@ -48,7 +49,8 @@ class Command(BaseCommand, LoggerMixin):
48 summary_dict = {} 49 summary_dict = {}
49 with open(log_path, 'r', encoding='utf-8') as fp: 50 with open(log_path, 'r', encoding='utf-8') as fp:
50 for line in fp: 51 for line in fp:
51 search_obj = re.search(r'task=(.*) merged_bs_summary=(.*)', line) 52 # search_obj = re.search(r'task=(.*) merged_bs_summary=(.*)', line)
53 search_obj = re.search(r'\[task=(.*)] \[bs_summary=(.*)]', line)
52 task_str = search_obj.group(1) 54 task_str = search_obj.group(1)
53 business_type, doc_id_str = task_str.split(consts.SPLIT_STR) 55 business_type, doc_id_str = task_str.split(consts.SPLIT_STR)
54 doc_id = int(doc_id_str) 56 doc_id = int(doc_id_str)
......
1 import re
2 import os
3 import ast
4 import datetime
5 from openpyxl import Workbook
6 from django.core.management import BaseCommand
7 from settings import conf
8 from common.mixins import LoggerMixin
9 from apps.doc.models import HILDoc, AFCDoc
10 from apps.doc import consts
11
12
13 class Command(BaseCommand, LoggerMixin):
14
15 def __init__(self):
16 super().__init__()
17 self.sheet_name = '身份证'
18 self.header = ('申请号', '身份证号', '民族', '时间戳')
19
20 def add_arguments(self, parser):
21 parser.add_argument(
22 '--date',
23 default=datetime.date.today() - datetime.timedelta(days=1),
24 dest='date',
25 help='将要计算的日期,格式: 2018-01-01'
26 )
27
28 def handle(self, *args, **kwargs):
29 date = kwargs.get('date')
30 if isinstance(date, str):
31 if not re.match(r'\d{4}-\d{2}-\d{2}', date):
32 print('date format error')
33 return
34 date_str = date
35 else:
36 date_str = date.strftime('%Y-%m-%d')
37
38 afc_excel_dir = os.path.join(conf.DATA_DIR, 'AFC', 'IdCard')
39 hil_excel_dir = os.path.join(conf.DATA_DIR, 'HIL', 'IdCard')
40 if not os.path.exists(afc_excel_dir) or not os.path.exists(hil_excel_dir):
41 print('excel_dir not exist')
42 return
43
44 log_path = os.path.join(conf.LOG_DIR, 'idcard.log.{0}'.format(date_str))
45 if not os.path.exists(log_path):
46 print('log_path not exists')
47 return
48
49 wb_afc = Workbook()
50 ws_afc = wb_afc.create_sheet(self.sheet_name)
51 ws_afc.append(self.header)
52 wb_afc.remove(wb_afc.get_sheet_by_name('Sheet'))
53
54 wb_hil = Workbook()
55 ws_hil = wb_hil.create_sheet(self.sheet_name)
56 ws_hil.append(self.header)
57 wb_hil.remove(wb_hil.get_sheet_by_name('Sheet'))
58
59 with open(log_path, 'r', encoding='utf-8') as fp:
60 for line in fp:
61 search_obj = re.match(r'\[(.*)] \[task=(.*)] \[idcard=(.*)]', line)
62 idcard_str = search_obj.group(3)
63 idcard_list = ast.literal_eval(idcard_str)
64 content_list = []
65 for idcard_dict in idcard_list:
66 nation = idcard_dict.get('民族')
67 if nation is None:
68 continue
69 if idcard_dict.get('类别') == '1':
70 continue
71 content_list.append((idcard_dict.get('公民身份号码'), nation))
72 if len(content_list) == 0:
73 continue
74
75 time_str = search_obj.group(1)
76 task_str = search_obj.group(2)
77 business_type, doc_id_str = task_str.split(consts.SPLIT_STR)
78 doc_class = HILDoc if business_type == consts.HIL_PREFIX else AFCDoc
79 application_id = doc_class.objects.filter(id=int(doc_id_str)).values_list('application_id', flat=True)
80
81 if business_type == consts.HIL_PREFIX:
82 for id_num, nation in content_list:
83 ws_hil.append((application_id[0], id_num, nation, time_str))
84 else:
85 for id_num, nation in content_list:
86 ws_afc.append((application_id[0], id_num, nation, time_str))
87
88 afc_excel_path = os.path.join(afc_excel_dir, 'idcard_{0}.xlsx'.format(date_str))
89 hil_excel_path = os.path.join(hil_excel_dir, 'idcard_{0}.xlsx'.format(date_str))
90 wb_afc.save(afc_excel_path)
91 wb_hil.save(hil_excel_path)
1 import os
2 import datetime
3 from calendar import monthrange
4 from openpyxl import Workbook, load_workbook
5 from django.core.management import BaseCommand
6 from settings import conf
7 from common.mixins import LoggerMixin
8
9
10 class Command(BaseCommand, LoggerMixin):
11
12 def __init__(self):
13 super().__init__()
14 self.dirs = ('AFC', 'HIL')
15
16 def handle(self, *args, **kwargs):
17 now_time = datetime.datetime.now()
18 end_day_in_mouth = now_time.replace(day=1)
19 pre_mouth = end_day_in_mouth - datetime.timedelta(days=1)
20
21 for target_dir in self.dirs:
22 excel_dir = os.path.join(conf.DATA_DIR, target_dir, 'IdCard')
23 if not os.path.exists(excel_dir):
24 print('excel dir not exists: {0}'.format(excel_dir))
25 return
26
27 monthly_wb = Workbook()
28
29 for d in range(1, monthrange(pre_mouth.year, pre_mouth.month)[1] + 1):
30 date_str = '{:04d}-{:02d}-{:02d}'.format(pre_mouth.year, pre_mouth.month, d)
31 daily_excel_path = os.path.join(excel_dir, 'idcard_{0}.xlsx'.format(date_str))
32 if not os.path.exists(daily_excel_path):
33 print('daily excel path not exists: {0}'.format(daily_excel_path))
34 continue
35
36 monthly_ws = monthly_wb.create_sheet(date_str)
37 daily_wb = load_workbook(daily_excel_path)
38 daily_ws = daily_wb.get_sheet_by_name('身份证')
39 for row in daily_ws.iter_rows(min_row=1, values_only=True):
40 monthly_ws.append(row)
41
42 monthly_excel_path = os.path.join(excel_dir, 'idcard_{0}.xlsx'.format(pre_mouth.strftime('%Y-%m')))
43 monthly_wb.remove(monthly_wb.get_sheet_by_name('Sheet'))
44 monthly_wb.save(monthly_excel_path)
...@@ -14,7 +14,6 @@ class Command(BaseCommand, LoggerMixin): ...@@ -14,7 +14,6 @@ class Command(BaseCommand, LoggerMixin):
14 14
15 def __init__(self): 15 def __init__(self):
16 super().__init__() 16 super().__init__()
17 self.log_base = '[license statistics]'
18 self.header_map = { 17 self.header_map = {
19 consts.MVI_CLASSIFY: [('申请ID', '发票代码', '发票号码', '开票日期', '不含税价', '发票联', '购买方名称', 18 consts.MVI_CLASSIFY: [('申请ID', '发票代码', '发票号码', '开票日期', '不含税价', '发票联', '购买方名称',
20 '购买方证件号码', '纳税人识别号', '车架号', '价税合计小写', '销货单位名称', '增值税税额', 19 '购买方证件号码', '纳税人识别号', '车架号', '价税合计小写', '销货单位名称', '增值税税额',
...@@ -75,7 +74,8 @@ class Command(BaseCommand, LoggerMixin): ...@@ -75,7 +74,8 @@ class Command(BaseCommand, LoggerMixin):
75 print('excel dir not exists') 74 print('excel dir not exists')
76 return 75 return
77 excel_path = os.path.join(excel_dir, 'license_{0}.xlsx'.format(date_str)) 76 excel_path = os.path.join(excel_dir, 'license_{0}.xlsx'.format(date_str))
78 log_path = os.path.join(conf.LOG_DIR, 'license.log.{0}'.format(date_str)) 77 # log_path = os.path.join(conf.LOG_DIR, 'license.log.{0}'.format(date_str))
78 log_path = os.path.join(conf.LOG_DIR, 'license_statistics.log.{0}'.format(date_str))
79 if not os.path.exists(log_path): 79 if not os.path.exists(log_path):
80 print('log_path not exists') 80 print('log_path not exists')
81 return 81 return
...@@ -92,7 +92,8 @@ class Command(BaseCommand, LoggerMixin): ...@@ -92,7 +92,8 @@ class Command(BaseCommand, LoggerMixin):
92 92
93 with open(log_path, 'r', encoding='utf-8') as fp: 93 with open(log_path, 'r', encoding='utf-8') as fp:
94 for line in fp: 94 for line in fp:
95 search_obj = re.search(r'task=(.*) license_summary=(.*)', line) 95 # search_obj = re.search(r'task=(.*) license_summary=(.*)', line)
96 search_obj = re.search(r'\[task=(.*)] \[license_summary=(.*)]', line)
96 task_str = search_obj.group(1) 97 task_str = search_obj.group(1)
97 license_summary = ast.literal_eval(search_obj.group(2)) 98 license_summary = ast.literal_eval(search_obj.group(2))
98 business_type, doc_id_str = task_str.split(consts.SPLIT_STR) 99 business_type, doc_id_str = task_str.split(consts.SPLIT_STR)
......
...@@ -689,8 +689,15 @@ class Command(BaseCommand, LoggerMixin): ...@@ -689,8 +689,15 @@ class Command(BaseCommand, LoggerMixin):
689 '[license_summary={4}]'.format(self.log_base, task_str, bs_summary, 689 '[license_summary={4}]'.format(self.log_base, task_str, bs_summary,
690 unknown_summary, license_summary)) 690 unknown_summary, license_summary))
691 691
692 self.license_log.info('[task={0}] [license_summary={1}]'.format(task_str, license_summary))
693 idcard_list = license_summary.get(consts.IC_CLASSIFY)
694 if idcard_list:
695 self.idcard_log.info('[task={0}] [idcard={1}]'.format(task_str, idcard_list))
696
692 merged_bs_summary = self.rebuild_bs_summary(bs_summary, unknown_summary) 697 merged_bs_summary = self.rebuild_bs_summary(bs_summary, unknown_summary)
693 698
699 self.bs_log.info('[task={0}] [bs_summary={1}]'.format(task_str, merged_bs_summary))
700
694 self.cronjob_log.info('{0} [task={1}] [merged_bs_summary={2}] [unknown_summary={3}] ' 701 self.cronjob_log.info('{0} [task={1}] [merged_bs_summary={2}] [unknown_summary={3}] '
695 '[res_list={4}]'.format(self.log_base, task_str, merged_bs_summary, 702 '[res_list={4}]'.format(self.log_base, task_str, merged_bs_summary,
696 unknown_summary, res_list)) 703 unknown_summary, res_list))
......
...@@ -40,6 +40,9 @@ class LoggerMixin: ...@@ -40,6 +40,9 @@ class LoggerMixin:
40 exception_log = logging.getLogger('exception') 40 exception_log = logging.getLogger('exception')
41 cronjob_log = logging.getLogger('cronjob') 41 cronjob_log = logging.getLogger('cronjob')
42 folder_log = logging.getLogger('folder') 42 folder_log = logging.getLogger('folder')
43 bs_log = logging.getLogger('bs')
44 license_log = logging.getLogger('license')
45 idcard_log = logging.getLogger('idcard')
43 46
44 47
45 class GenericView(LoggerMixin, GenericExceptionMixin, GenericAPIView): 48 class GenericView(LoggerMixin, GenericExceptionMixin, GenericAPIView):
......
...@@ -84,9 +84,15 @@ class PDFHandler: ...@@ -84,9 +84,15 @@ class PDFHandler:
84 def extract_single_image(self, pdf, xref, smask, colorspace, pno, img_index=0): 84 def extract_single_image(self, pdf, xref, smask, colorspace, pno, img_index=0):
85 pix = self.recover_pix(pdf, xref, smask, colorspace) 85 pix = self.recover_pix(pdf, xref, smask, colorspace)
86 ext, img_data = self.get_img_data(pix) 86 ext, img_data = self.get_img_data(pix)
87 img_save_path = self.get_img_save_path(pno, img_index=img_index, ext=ext) 87 if ext == 'jpx':
88 with open(img_save_path, "wb") as f: 88 img_save_path = self.get_img_save_path(pno, img_index=img_index, ext='jpeg')
89 f.write(img_data) 89 jpx_pix = fitz.Pixmap(img_data)
90 jpx_pix.writeImage(img_save_path)
91 jpx_pix = None
92 else:
93 img_save_path = self.get_img_save_path(pno, img_index=img_index, ext=ext)
94 with open(img_save_path, "wb") as f:
95 f.write(img_data)
90 self.xref_set.add(xref) 96 self.xref_set.add(xref)
91 self.img_path_list.append(img_save_path) 97 self.img_path_list.append(img_save_path)
92 98
......
1 [loggers] 1 [loggers]
2 keys=root, running, exception, cronjob, folder, django.db.backends 2 keys=root, running, exception, cronjob, folder, bs, license, idcard, django.db.backends
3 3
4 [handlers] 4 [handlers]
5 keys=consoleHandler, django_rotateFileHandler, exceptionFileHandler, cronjobFileHandler, folderFileHandler, djangodbFileHandler 5 keys=consoleHandler, django_rotateFileHandler, exceptionFileHandler, cronjobFileHandler, folderFileHandler, bsFileHandler, licenseFileHandler, idcardFileHandler, djangodbFileHandler
6 6
7 [formatters] 7 [formatters]
8 keys=SituFormatter, dataLogFormatter 8 keys=SituFormatter, dataLogFormatter, SimpleFormatter
9 9
10 [formatter_SituFormatter] 10 [formatter_SituFormatter]
11 format=[%(asctime)s] [%(process)d] [%(thread)d] [%(threadName)s] [%(filename)s:%(lineno)d] %(levelname)s %(message)s 11 format=[%(asctime)s] [%(process)d] [%(thread)d] [%(threadName)s] [%(filename)s:%(lineno)d] %(levelname)s %(message)s
...@@ -15,6 +15,10 @@ datefmt= ...@@ -15,6 +15,10 @@ datefmt=
15 class=situlogger.JsonFormatter 15 class=situlogger.JsonFormatter
16 format=%(asctime)s %(levelname)s %(funcName)s 16 format=%(asctime)s %(levelname)s %(funcName)s
17 17
18 [formatter_SimpleFormatter]
19 format=[%(asctime)s] %(message)s
20 datefmt=
21
18 [handler_consoleHandler] 22 [handler_consoleHandler]
19 class=StreamHandler 23 class=StreamHandler
20 level=ERROR 24 level=ERROR
...@@ -45,6 +49,24 @@ level=DEBUG ...@@ -45,6 +49,24 @@ level=DEBUG
45 formatter=SituFormatter 49 formatter=SituFormatter
46 args=('../logs/folder_ocr.log',) 50 args=('../logs/folder_ocr.log',)
47 51
52 [handler_bsFileHandler]
53 class=situlogger.SituRotatingFileHandler
54 level=DEBUG
55 formatter=SimpleFormatter
56 args=('../logs/bs_statistics.log',)
57
58 [handler_licenseFileHandler]
59 class=situlogger.SituRotatingFileHandler
60 level=DEBUG
61 formatter=SimpleFormatter
62 args=('../logs/license_statistics.log',)
63
64 [handler_idcardFileHandler]
65 class=situlogger.SituRotatingFileHandler
66 level=DEBUG
67 formatter=SimpleFormatter
68 args=('../logs/idcard.log',)
69
48 [handler_djangodbFileHandler] 70 [handler_djangodbFileHandler]
49 class=situlogger.SituRotatingFileHandler 71 class=situlogger.SituRotatingFileHandler
50 level=DEBUG 72 level=DEBUG
...@@ -79,6 +101,24 @@ handlers=folderFileHandler ...@@ -79,6 +101,24 @@ handlers=folderFileHandler
79 qualname=folder 101 qualname=folder
80 propagate=0 102 propagate=0
81 103
104 [logger_bs]
105 level=INFO
106 handlers=bsFileHandler
107 qualname=bs
108 propagate=0
109
110 [logger_license]
111 level=INFO
112 handlers=licenseFileHandler
113 qualname=license
114 propagate=0
115
116 [logger_idcard]
117 level=INFO
118 handlers=idcardFileHandler
119 qualname=idcard
120 propagate=0
121
82 [logger_django.db.backends] 122 [logger_django.db.backends]
83 level=DEBUG 123 level=DEBUG
84 handlers=djangodbFileHandler 124 handlers=djangodbFileHandler
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!