7a3d093e by 周伟奇

issue list 1117

1 parent ec638e4f
...@@ -29,9 +29,6 @@ sftp-config.json ...@@ -29,9 +29,6 @@ sftp-config.json
29 *.sqlite3 29 *.sqlite3
30 conf/* 30 conf/*
31 data/* 31 data/*
32 ocr/*
33
34 # 脚本
35 src/*.sh
36 32
37 test* 33 test*
34 flow_test.py
...\ No newline at end of file ...\ No newline at end of file
......
...@@ -140,9 +140,9 @@ RESULT_IDX = FIXED_HEADERS.index('核对结果') ...@@ -140,9 +140,9 @@ RESULT_IDX = FIXED_HEADERS.index('核对结果')
140 # '借贷': ('贷', '借'), # 竖版-无表格-广发银行 140 # '借贷': ('贷', '借'), # 竖版-无表格-广发银行
141 # '借贷状态': ('贷', '借'), # 竖版-特殊-交通银行 141 # '借贷状态': ('贷', '借'), # 竖版-特殊-交通银行
142 # '收/支': ('收入', '支出'), # 横版-表格-北京银行 142 # '收/支': ('收入', '支出'), # 横版-表格-北京银行
143 BORROW_HEADERS_SET = {'借贷', '借贷状态', '收/支'} 143 BORROW_HEADERS_SET = {'借贷', '借贷状态', '收/支', '收支标志'}
144 BORROW_INCOME_SET = {'贷', '收入'} 144 BORROW_INCOME_SET = {'贷', '收入', '收'}
145 BORROW_OUTLAY_SET = {'借', '支出'} 145 BORROW_OUTLAY_SET = {'借', '支出', '支'}
146 INCOME_HEADERS_SET = {'收入金额', '收入', '存入', '存入金额(贷)', '存入金额(贷)'} 146 INCOME_HEADERS_SET = {'收入金额', '收入', '存入', '存入金额(贷)', '存入金额(贷)'}
147 OUTLAY_HEADERS_SET = {'支出金额', '支出', '支取金额(借)', '支取金额(借)'} 147 OUTLAY_HEADERS_SET = {'支出金额', '支出', '支取金额(借)', '支取金额(借)'}
148 148
...@@ -154,6 +154,7 @@ HEADERS_MAPPING.update( ...@@ -154,6 +154,7 @@ HEADERS_MAPPING.update(
154 { 154 {
155 '借贷': BORROW_KEY, 155 '借贷': BORROW_KEY,
156 '借贷状态': BORROW_KEY, 156 '借贷状态': BORROW_KEY,
157 '收支标志': BORROW_KEY,
157 '收/支': BORROW_KEY, 158 '收/支': BORROW_KEY,
158 } 159 }
159 ) 160 )
...@@ -911,11 +912,11 @@ WECHART_HEADERS_MAPPING.update( ...@@ -911,11 +912,11 @@ WECHART_HEADERS_MAPPING.update(
911 } 912 }
912 ) 913 )
913 914
914 PATTERN_LIST = ['收入/支出金额', '收入', '存入', '支出', '支取', '金额', '余额', '发生额', '借贷', '借贷状态', '收/支', '收入金额', 915 PATTERN_LIST = ['收入/支出金额', '收入', '存入', '支出', '支取', '金额', '余额', '发生额', '借贷', '借贷状态', '收支标志', '收/支',
915 '存入金额(贷)', '存入金额(贷)', '支出金额', '支取金额(借)', '支取金额(借)', '记账日期', '附言', '交易日期', '摘要', 916 '收入金额', '存入金额(贷)', '存入金额(贷)', '支出金额', '支取金额(借)', '支取金额(借)', '记账日期', '附言',
916 '业务摘要', '工作日期', '交易金额', '账户余额', '交易类型', '金额(元)', '金额(元)', '时间', '名称/备注', 917 '交易日期', '摘要', '业务摘要', '工作日期', '交易金额', '账户余额', '交易类型', '金额(元)', '金额(元)', '时间',
917 '摘要/附言', '交易发生额', '交易摘要', '借贷发生额(借:-贷:+)', '借贷发生额(借:-贷:+)', '联机余额', '交易金额(元)', 918 '名称/备注', '摘要/附言', '交易发生额', '交易摘要', '借贷发生额(借:-贷:+)', '借贷发生额(借:-贷:+)', '联机余额',
918 '交易金额(元)', '账户余额(元)', '账户余额(元)', '会计日期', '摘要代码', '摘要信息', '日期', '短摘要', '本次余额', 919 '交易金额(元)', '交易金额(元)', '账户余额(元)', '账户余额(元)', '会计日期', '摘要代码', '摘要信息', '日期',
919 '交易后余额', '交易说明', '帐户余额', '交易日期 记账日期'] 920 '短摘要', '本次余额', '交易后余额', '交易说明', '帐户余额', '交易日期 记账日期']
920 921
921 CN_RE = re.compile(u'[\u4e00-\u9fa5]') 922 CN_RE = re.compile(u'[\u4e00-\u9fa5]')
......
...@@ -163,14 +163,19 @@ class Command(BaseCommand, LoggerMixin): ...@@ -163,14 +163,19 @@ class Command(BaseCommand, LoggerMixin):
163 shutil.move(path, img_save_path) 163 shutil.move(path, img_save_path)
164 164
165 def folder_process(self, input_dir, classify): 165 def folder_process(self, input_dir, classify):
166 while not os.path.isdir(input_dir):
167 self.folder_log.info('{0} [input dir is not dir] [input_dir={1}]'.format(self.log_base, input_dir))
168 time.sleep(self.sleep_time)
166 output_dir = os.path.join(os.path.dirname(input_dir), 'Output') 169 output_dir = os.path.join(os.path.dirname(input_dir), 'Output')
167 img_output_dir = os.path.join(output_dir, 'image') 170 img_output_dir = os.path.join(output_dir, 'image')
168 wb_output_dir = os.path.join(output_dir, 'excel') 171 wb_output_dir = os.path.join(output_dir, 'excel')
169 pdf_output_dir = os.path.join(output_dir, 'pdf') 172 pdf_output_dir = os.path.join(output_dir, 'pdf')
173 failed_output_dir = os.path.join(output_dir, 'failed')
170 os.makedirs(output_dir, exist_ok=True) 174 os.makedirs(output_dir, exist_ok=True)
171 os.makedirs(img_output_dir, exist_ok=True) 175 os.makedirs(img_output_dir, exist_ok=True)
172 os.makedirs(wb_output_dir, exist_ok=True) 176 os.makedirs(wb_output_dir, exist_ok=True)
173 os.makedirs(pdf_output_dir, exist_ok=True) 177 os.makedirs(pdf_output_dir, exist_ok=True)
178 os.makedirs(failed_output_dir, exist_ok=True)
174 while self.switch: 179 while self.switch:
175 # 1. 从input dir获取pdf or image 180 # 1. 从input dir获取pdf or image
176 list_dir = os.listdir(input_dir) 181 list_dir = os.listdir(input_dir)
...@@ -178,6 +183,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -178,6 +183,7 @@ class Command(BaseCommand, LoggerMixin):
178 self.folder_log.info('{0} [input dir empty] [input_dir={1}]'.format(self.log_base, input_dir)) 183 self.folder_log.info('{0} [input dir empty] [input_dir={1}]'.format(self.log_base, input_dir))
179 time.sleep(self.sleep_time) 184 time.sleep(self.sleep_time)
180 for name in list_dir: 185 for name in list_dir:
186 try:
181 path = os.path.join(input_dir, name) 187 path = os.path.join(input_dir, name)
182 if os.path.isfile(path): 188 if os.path.isfile(path):
183 self.folder_log.info('{0} [file start] [path={1}]'.format(self.log_base, path)) 189 self.folder_log.info('{0} [file start] [path={1}]'.format(self.log_base, path))
...@@ -186,6 +192,17 @@ class Command(BaseCommand, LoggerMixin): ...@@ -186,6 +192,17 @@ class Command(BaseCommand, LoggerMixin):
186 else: 192 else:
187 self.img_process(name, path, classify, wb_output_dir, img_output_dir, pdf_output_dir) 193 self.img_process(name, path, classify, wb_output_dir, img_output_dir, pdf_output_dir)
188 self.folder_log.info('{0} [file end] [path={1}]'.format(self.log_base, path)) 194 self.folder_log.info('{0} [file end] [path={1}]'.format(self.log_base, path))
195 except Exception as e:
196 try:
197 path = os.path.join(input_dir, name)
198 self.folder_log.error('{0} [file error] [path={1}] [error={2}]'.format(self.log_base, path,
199 traceback.format_exc()))
200 shutil.move(path, failed_output_dir)
201 continue
202 except Exception as e:
203 self.folder_log.error('{0} [file error] [error={1}]'.format(
204 self.log_base, traceback.format_exc()))
205 continue
189 206
190 def handle(self, *args, **kwargs): 207 def handle(self, *args, **kwargs):
191 process_list = [] 208 process_list = []
......
...@@ -20,6 +20,7 @@ class EDMS: ...@@ -20,6 +20,7 @@ class EDMS:
20 self.user_name = conf.EDMS_USER 20 self.user_name = conf.EDMS_USER
21 self.pwd = conf.EDMS_PWD 21 self.pwd = conf.EDMS_PWD
22 self.session_id = None 22 self.session_id = None
23 self.prefix = 'OCR'
23 24
24 def set_session_id(self): 25 def set_session_id(self):
25 self.session_id = self.sm_client.service.StartSession(login=self.user_name, 26 self.session_id = self.sm_client.service.StartSession(login=self.user_name,
...@@ -83,12 +84,15 @@ class EDMS: ...@@ -83,12 +84,15 @@ class EDMS:
83 else: 84 else:
84 raise Exception 85 raise Exception
85 86
86 @staticmethod 87 def get_doc_file_name(self, doc_name):
87 def get_doc_file_name(doc_name): 88 if not isinstance(doc_name, str):
88 if doc_name.endswith('pdf'): 89 return self.prefix
90 if doc_name.endswith('.pdf') or doc_name.endswith('.PDF') or \
91 doc_name.endswith('.pdF') or doc_name.endswith('.pDF') or doc_name.endswith('.pDf') or \
92 doc_name.endswith('.Pdf') or doc_name.endswith('.PdF') or doc_name.endswith('.PDf'):
89 name, _ = os.path.splitext(doc_name) 93 name, _ = os.path.splitext(doc_name)
90 return name 94 return '{0}{1}'.format(self.prefix, name)
91 return doc_name 95 return '{0}{1}'.format(self.prefix, doc_name)
92 96
93 def get_doc_info(self, token, doc, business_type, file_path): 97 def get_doc_info(self, token, doc, business_type, file_path):
94 business_type = consts.BUSINESS_TYPE_DICT.get(business_type) 98 business_type = consts.BUSINESS_TYPE_DICT.get(business_type)
...@@ -140,5 +144,3 @@ class EDMS: ...@@ -140,5 +144,3 @@ class EDMS:
140 headers.pop('Content-Type') 144 headers.pop('Content-Type')
141 metadata_version_id = self.add_doc_info(headers, token, doc, business_type, file_path) 145 metadata_version_id = self.add_doc_info(headers, token, doc, business_type, file_path)
142 return metadata_version_id 146 return metadata_version_id
143
144
......
...@@ -574,12 +574,25 @@ class BSWorkbook(Workbook): ...@@ -574,12 +574,25 @@ class BSWorkbook(Workbook):
574 license_list = license_summary.get(classify) 574 license_list = license_summary.get(classify)
575 if not license_list: 575 if not license_list:
576 continue 576 continue
577 if classify == consts.IC_CLASSIFY: # 身份证、居住证先正面,后反面
578 key, _, _ = consts.FIELD_ORDER_MAP.get(classify)
579 side1_list = []
580 side2_list = []
581 for license_dict in license_list:
582 if key in license_dict:
583 side2_list.append(license_dict)
584 else:
585 side1_list.append(license_dict)
586 side1_list.extend(side2_list)
587 license_list = side1_list
588 side2_list = None
589 side1_list = None
577 count = 0 590 count = 0
578 ws = self.create_sheet(name) 591 ws = self.create_sheet(name)
579 if scheme_diff and document_scheme == consts.DOC_SCHEME_LIST[1]: 592 if scheme_diff and document_scheme == consts.DOC_SCHEME_LIST[1]:
580 classify = consts.MVC_CLASSIFY_SE 593 classify = consts.MVC_CLASSIFY_SE
581 for license_dict in license_list: 594 for license_dict in license_list:
582 if classify == consts.IC_CLASSIFY and license_dict.get('类别') == '1': 595 if classify == consts.IC_CLASSIFY and license_dict.get('类别') == '1': # 居住证处理
583 license_summary.setdefault(consts.RP_CLASSIFY, []).append(license_dict) 596 license_summary.setdefault(consts.RP_CLASSIFY, []).append(license_dict)
584 continue 597 continue
585 if side_diff: 598 if side_diff:
...@@ -632,6 +645,10 @@ class BSWorkbook(Workbook): ...@@ -632,6 +645,10 @@ class BSWorkbook(Workbook):
632 645
633 def rebuild(self, bs_summary, license_summary, res_list, document_scheme): 646 def rebuild(self, bs_summary, license_summary, res_list, document_scheme):
634 count_list = [(consts.MODEL_FIELD_BS, len(self.sheetnames) - 1)] 647 count_list = [(consts.MODEL_FIELD_BS, len(self.sheetnames) - 1)]
648 if document_scheme == consts.DOC_SCHEME_LIST[1]:
649 self.license_rebuild(license_summary, document_scheme, count_list)
650 self.bs_rebuild(bs_summary)
651 else:
635 self.bs_rebuild(bs_summary) 652 self.bs_rebuild(bs_summary)
636 self.license_rebuild(license_summary, document_scheme, count_list) 653 self.license_rebuild(license_summary, document_scheme, count_list)
637 self.res_sheet(res_list) 654 self.res_sheet(res_list)
......
...@@ -293,7 +293,8 @@ class DocView(GenericView, DocHandler): ...@@ -293,7 +293,8 @@ class DocView(GenericView, DocHandler):
293 metadata_version_id = str(int(time.time()) - random_int) 293 metadata_version_id = str(int(time.time()) - random_int)
294 294
295 pdf_file = args.get('pdf_file') 295 pdf_file = args.get('pdf_file')
296 if not pdf_file.name.endswith('pdf'): 296 if isinstance(pdf_file.name, str):
297 if not pdf_file.name.endswith('pdf') or not pdf_file.name.endswith('PDF'):
297 self.invalid_params(msg='invalid params: not a PDF file') 298 self.invalid_params(msg='invalid params: not a PDF file')
298 299
299 business_type = random.choice(consts.BUSINESS_TYPE_LIST) 300 business_type = random.choice(consts.BUSINESS_TYPE_LIST)
......
...@@ -8,7 +8,7 @@ SLEEP_SECOND_FOLDER = 2 ...@@ -8,7 +8,7 @@ SLEEP_SECOND_FOLDER = 2
8 8
9 IMG_QUEUE_SIZE = 500 9 IMG_QUEUE_SIZE = 500
10 10
11 EDMS_DOWNLOAD_URL = https://edms-test.bmw.com/FH/FileHold/DocumentRepository/DownloadHandler.ashx 11 EDMS_DOWNLOAD_URL = http://sccn0639.bmwgroup.net/FH/FileHold/DocumentRepository/DownloadHandler.ashx
12 EDMS_UPLOAD_URL = https://edms-test.bmw.com/FH/FileHold/DocumentRepository/UploadHandler.ashx 12 EDMS_UPLOAD_URL = http://sccn0639.bmwgroup.net/FH/FileHold/DocumentRepository/UploadHandler.ashx
13 DEALER_CODE = ocr_situ_group 13 DEALER_CODE = ocr_group
14 14
......
...@@ -8,6 +8,6 @@ SLEEP_SECOND_FOLDER = 2 ...@@ -8,6 +8,6 @@ SLEEP_SECOND_FOLDER = 2
8 8
9 IMG_QUEUE_SIZE = 500 9 IMG_QUEUE_SIZE = 500
10 10
11 EDMS_DOWNLOAD_URL = https://edms-test.bmw.com/FH/FileHold/DocumentRepository/DownloadHandler.ashx 11 EDMS_DOWNLOAD_URL = http://sccn0637.bmwgroup.net/FH/FileHold/DocumentRepository/DownloadHandler.ashx
12 EDMS_UPLOAD_URL = https://edms-test.bmw.com/FH/FileHold/DocumentRepository/UploadHandler.ashx 12 EDMS_UPLOAD_URL = http://sccn0637.bmwgroup.net/FH/FileHold/DocumentRepository/UploadHandler.ashx
13 DEALER_CODE = ocr_situ_group 13 DEALER_CODE = ocr_situ_group
...\ No newline at end of file ...\ No newline at end of file
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!