issue list 1117
Showing
8 changed files
with
62 additions
and
27 deletions
... | @@ -140,9 +140,9 @@ RESULT_IDX = FIXED_HEADERS.index('核对结果') | ... | @@ -140,9 +140,9 @@ RESULT_IDX = FIXED_HEADERS.index('核对结果') |
140 | # '借贷': ('贷', '借'), # 竖版-无表格-广发银行 | 140 | # '借贷': ('贷', '借'), # 竖版-无表格-广发银行 |
141 | # '借贷状态': ('贷', '借'), # 竖版-特殊-交通银行 | 141 | # '借贷状态': ('贷', '借'), # 竖版-特殊-交通银行 |
142 | # '收/支': ('收入', '支出'), # 横版-表格-北京银行 | 142 | # '收/支': ('收入', '支出'), # 横版-表格-北京银行 |
143 | BORROW_HEADERS_SET = {'借贷', '借贷状态', '收/支'} | 143 | BORROW_HEADERS_SET = {'借贷', '借贷状态', '收/支', '收支标志'} |
144 | BORROW_INCOME_SET = {'贷', '收入'} | 144 | BORROW_INCOME_SET = {'贷', '收入', '收'} |
145 | BORROW_OUTLAY_SET = {'借', '支出'} | 145 | BORROW_OUTLAY_SET = {'借', '支出', '支'} |
146 | INCOME_HEADERS_SET = {'收入金额', '收入', '存入', '存入金额(贷)', '存入金额(贷)'} | 146 | INCOME_HEADERS_SET = {'收入金额', '收入', '存入', '存入金额(贷)', '存入金额(贷)'} |
147 | OUTLAY_HEADERS_SET = {'支出金额', '支出', '支取金额(借)', '支取金额(借)'} | 147 | OUTLAY_HEADERS_SET = {'支出金额', '支出', '支取金额(借)', '支取金额(借)'} |
148 | 148 | ||
... | @@ -154,6 +154,7 @@ HEADERS_MAPPING.update( | ... | @@ -154,6 +154,7 @@ HEADERS_MAPPING.update( |
154 | { | 154 | { |
155 | '借贷': BORROW_KEY, | 155 | '借贷': BORROW_KEY, |
156 | '借贷状态': BORROW_KEY, | 156 | '借贷状态': BORROW_KEY, |
157 | '收支标志': BORROW_KEY, | ||
157 | '收/支': BORROW_KEY, | 158 | '收/支': BORROW_KEY, |
158 | } | 159 | } |
159 | ) | 160 | ) |
... | @@ -911,11 +912,11 @@ WECHART_HEADERS_MAPPING.update( | ... | @@ -911,11 +912,11 @@ WECHART_HEADERS_MAPPING.update( |
911 | } | 912 | } |
912 | ) | 913 | ) |
913 | 914 | ||
914 | PATTERN_LIST = ['收入/支出金额', '收入', '存入', '支出', '支取', '金额', '余额', '发生额', '借贷', '借贷状态', '收/支', '收入金额', | 915 | PATTERN_LIST = ['收入/支出金额', '收入', '存入', '支出', '支取', '金额', '余额', '发生额', '借贷', '借贷状态', '收支标志', '收/支', |
915 | '存入金额(贷)', '存入金额(贷)', '支出金额', '支取金额(借)', '支取金额(借)', '记账日期', '附言', '交易日期', '摘要', | 916 | '收入金额', '存入金额(贷)', '存入金额(贷)', '支出金额', '支取金额(借)', '支取金额(借)', '记账日期', '附言', |
916 | '业务摘要', '工作日期', '交易金额', '账户余额', '交易类型', '金额(元)', '金额(元)', '时间', '名称/备注', | 917 | '交易日期', '摘要', '业务摘要', '工作日期', '交易金额', '账户余额', '交易类型', '金额(元)', '金额(元)', '时间', |
917 | '摘要/附言', '交易发生额', '交易摘要', '借贷发生额(借:-贷:+)', '借贷发生额(借:-贷:+)', '联机余额', '交易金额(元)', | 918 | '名称/备注', '摘要/附言', '交易发生额', '交易摘要', '借贷发生额(借:-贷:+)', '借贷发生额(借:-贷:+)', '联机余额', |
918 | '交易金额(元)', '账户余额(元)', '账户余额(元)', '会计日期', '摘要代码', '摘要信息', '日期', '短摘要', '本次余额', | 919 | '交易金额(元)', '交易金额(元)', '账户余额(元)', '账户余额(元)', '会计日期', '摘要代码', '摘要信息', '日期', |
919 | '交易后余额', '交易说明', '帐户余额', '交易日期 记账日期'] | 920 | '短摘要', '本次余额', '交易后余额', '交易说明', '帐户余额', '交易日期 记账日期'] |
920 | 921 | ||
921 | CN_RE = re.compile(u'[\u4e00-\u9fa5]') | 922 | CN_RE = re.compile(u'[\u4e00-\u9fa5]') | ... | ... |
... | @@ -163,14 +163,19 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -163,14 +163,19 @@ class Command(BaseCommand, LoggerMixin): |
163 | shutil.move(path, img_save_path) | 163 | shutil.move(path, img_save_path) |
164 | 164 | ||
165 | def folder_process(self, input_dir, classify): | 165 | def folder_process(self, input_dir, classify): |
166 | while not os.path.isdir(input_dir): | ||
167 | self.folder_log.info('{0} [input dir is not dir] [input_dir={1}]'.format(self.log_base, input_dir)) | ||
168 | time.sleep(self.sleep_time) | ||
166 | output_dir = os.path.join(os.path.dirname(input_dir), 'Output') | 169 | output_dir = os.path.join(os.path.dirname(input_dir), 'Output') |
167 | img_output_dir = os.path.join(output_dir, 'image') | 170 | img_output_dir = os.path.join(output_dir, 'image') |
168 | wb_output_dir = os.path.join(output_dir, 'excel') | 171 | wb_output_dir = os.path.join(output_dir, 'excel') |
169 | pdf_output_dir = os.path.join(output_dir, 'pdf') | 172 | pdf_output_dir = os.path.join(output_dir, 'pdf') |
173 | failed_output_dir = os.path.join(output_dir, 'failed') | ||
170 | os.makedirs(output_dir, exist_ok=True) | 174 | os.makedirs(output_dir, exist_ok=True) |
171 | os.makedirs(img_output_dir, exist_ok=True) | 175 | os.makedirs(img_output_dir, exist_ok=True) |
172 | os.makedirs(wb_output_dir, exist_ok=True) | 176 | os.makedirs(wb_output_dir, exist_ok=True) |
173 | os.makedirs(pdf_output_dir, exist_ok=True) | 177 | os.makedirs(pdf_output_dir, exist_ok=True) |
178 | os.makedirs(failed_output_dir, exist_ok=True) | ||
174 | while self.switch: | 179 | while self.switch: |
175 | # 1. 从input dir获取pdf or image | 180 | # 1. 从input dir获取pdf or image |
176 | list_dir = os.listdir(input_dir) | 181 | list_dir = os.listdir(input_dir) |
... | @@ -178,6 +183,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -178,6 +183,7 @@ class Command(BaseCommand, LoggerMixin): |
178 | self.folder_log.info('{0} [input dir empty] [input_dir={1}]'.format(self.log_base, input_dir)) | 183 | self.folder_log.info('{0} [input dir empty] [input_dir={1}]'.format(self.log_base, input_dir)) |
179 | time.sleep(self.sleep_time) | 184 | time.sleep(self.sleep_time) |
180 | for name in list_dir: | 185 | for name in list_dir: |
186 | try: | ||
181 | path = os.path.join(input_dir, name) | 187 | path = os.path.join(input_dir, name) |
182 | if os.path.isfile(path): | 188 | if os.path.isfile(path): |
183 | self.folder_log.info('{0} [file start] [path={1}]'.format(self.log_base, path)) | 189 | self.folder_log.info('{0} [file start] [path={1}]'.format(self.log_base, path)) |
... | @@ -186,6 +192,17 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -186,6 +192,17 @@ class Command(BaseCommand, LoggerMixin): |
186 | else: | 192 | else: |
187 | self.img_process(name, path, classify, wb_output_dir, img_output_dir, pdf_output_dir) | 193 | self.img_process(name, path, classify, wb_output_dir, img_output_dir, pdf_output_dir) |
188 | self.folder_log.info('{0} [file end] [path={1}]'.format(self.log_base, path)) | 194 | self.folder_log.info('{0} [file end] [path={1}]'.format(self.log_base, path)) |
195 | except Exception as e: | ||
196 | try: | ||
197 | path = os.path.join(input_dir, name) | ||
198 | self.folder_log.error('{0} [file error] [path={1}] [error={2}]'.format(self.log_base, path, | ||
199 | traceback.format_exc())) | ||
200 | shutil.move(path, failed_output_dir) | ||
201 | continue | ||
202 | except Exception as e: | ||
203 | self.folder_log.error('{0} [file error] [error={1}]'.format( | ||
204 | self.log_base, traceback.format_exc())) | ||
205 | continue | ||
189 | 206 | ||
190 | def handle(self, *args, **kwargs): | 207 | def handle(self, *args, **kwargs): |
191 | process_list = [] | 208 | process_list = [] | ... | ... |
... | @@ -20,6 +20,7 @@ class EDMS: | ... | @@ -20,6 +20,7 @@ class EDMS: |
20 | self.user_name = conf.EDMS_USER | 20 | self.user_name = conf.EDMS_USER |
21 | self.pwd = conf.EDMS_PWD | 21 | self.pwd = conf.EDMS_PWD |
22 | self.session_id = None | 22 | self.session_id = None |
23 | self.prefix = 'OCR' | ||
23 | 24 | ||
24 | def set_session_id(self): | 25 | def set_session_id(self): |
25 | self.session_id = self.sm_client.service.StartSession(login=self.user_name, | 26 | self.session_id = self.sm_client.service.StartSession(login=self.user_name, |
... | @@ -83,12 +84,15 @@ class EDMS: | ... | @@ -83,12 +84,15 @@ class EDMS: |
83 | else: | 84 | else: |
84 | raise Exception | 85 | raise Exception |
85 | 86 | ||
86 | @staticmethod | 87 | def get_doc_file_name(self, doc_name): |
87 | def get_doc_file_name(doc_name): | 88 | if not isinstance(doc_name, str): |
88 | if doc_name.endswith('pdf'): | 89 | return self.prefix |
90 | if doc_name.endswith('.pdf') or doc_name.endswith('.PDF') or \ | ||
91 | doc_name.endswith('.pdF') or doc_name.endswith('.pDF') or doc_name.endswith('.pDf') or \ | ||
92 | doc_name.endswith('.Pdf') or doc_name.endswith('.PdF') or doc_name.endswith('.PDf'): | ||
89 | name, _ = os.path.splitext(doc_name) | 93 | name, _ = os.path.splitext(doc_name) |
90 | return name | 94 | return '{0}{1}'.format(self.prefix, name) |
91 | return doc_name | 95 | return '{0}{1}'.format(self.prefix, doc_name) |
92 | 96 | ||
93 | def get_doc_info(self, token, doc, business_type, file_path): | 97 | def get_doc_info(self, token, doc, business_type, file_path): |
94 | business_type = consts.BUSINESS_TYPE_DICT.get(business_type) | 98 | business_type = consts.BUSINESS_TYPE_DICT.get(business_type) |
... | @@ -140,5 +144,3 @@ class EDMS: | ... | @@ -140,5 +144,3 @@ class EDMS: |
140 | headers.pop('Content-Type') | 144 | headers.pop('Content-Type') |
141 | metadata_version_id = self.add_doc_info(headers, token, doc, business_type, file_path) | 145 | metadata_version_id = self.add_doc_info(headers, token, doc, business_type, file_path) |
142 | return metadata_version_id | 146 | return metadata_version_id |
143 | |||
144 | ... | ... |
... | @@ -574,12 +574,25 @@ class BSWorkbook(Workbook): | ... | @@ -574,12 +574,25 @@ class BSWorkbook(Workbook): |
574 | license_list = license_summary.get(classify) | 574 | license_list = license_summary.get(classify) |
575 | if not license_list: | 575 | if not license_list: |
576 | continue | 576 | continue |
577 | if classify == consts.IC_CLASSIFY: # 身份证、居住证先正面,后反面 | ||
578 | key, _, _ = consts.FIELD_ORDER_MAP.get(classify) | ||
579 | side1_list = [] | ||
580 | side2_list = [] | ||
581 | for license_dict in license_list: | ||
582 | if key in license_dict: | ||
583 | side2_list.append(license_dict) | ||
584 | else: | ||
585 | side1_list.append(license_dict) | ||
586 | side1_list.extend(side2_list) | ||
587 | license_list = side1_list | ||
588 | side2_list = None | ||
589 | side1_list = None | ||
577 | count = 0 | 590 | count = 0 |
578 | ws = self.create_sheet(name) | 591 | ws = self.create_sheet(name) |
579 | if scheme_diff and document_scheme == consts.DOC_SCHEME_LIST[1]: | 592 | if scheme_diff and document_scheme == consts.DOC_SCHEME_LIST[1]: |
580 | classify = consts.MVC_CLASSIFY_SE | 593 | classify = consts.MVC_CLASSIFY_SE |
581 | for license_dict in license_list: | 594 | for license_dict in license_list: |
582 | if classify == consts.IC_CLASSIFY and license_dict.get('类别') == '1': | 595 | if classify == consts.IC_CLASSIFY and license_dict.get('类别') == '1': # 居住证处理 |
583 | license_summary.setdefault(consts.RP_CLASSIFY, []).append(license_dict) | 596 | license_summary.setdefault(consts.RP_CLASSIFY, []).append(license_dict) |
584 | continue | 597 | continue |
585 | if side_diff: | 598 | if side_diff: |
... | @@ -632,6 +645,10 @@ class BSWorkbook(Workbook): | ... | @@ -632,6 +645,10 @@ class BSWorkbook(Workbook): |
632 | 645 | ||
633 | def rebuild(self, bs_summary, license_summary, res_list, document_scheme): | 646 | def rebuild(self, bs_summary, license_summary, res_list, document_scheme): |
634 | count_list = [(consts.MODEL_FIELD_BS, len(self.sheetnames) - 1)] | 647 | count_list = [(consts.MODEL_FIELD_BS, len(self.sheetnames) - 1)] |
648 | if document_scheme == consts.DOC_SCHEME_LIST[1]: | ||
649 | self.license_rebuild(license_summary, document_scheme, count_list) | ||
650 | self.bs_rebuild(bs_summary) | ||
651 | else: | ||
635 | self.bs_rebuild(bs_summary) | 652 | self.bs_rebuild(bs_summary) |
636 | self.license_rebuild(license_summary, document_scheme, count_list) | 653 | self.license_rebuild(license_summary, document_scheme, count_list) |
637 | self.res_sheet(res_list) | 654 | self.res_sheet(res_list) | ... | ... |
... | @@ -293,7 +293,8 @@ class DocView(GenericView, DocHandler): | ... | @@ -293,7 +293,8 @@ class DocView(GenericView, DocHandler): |
293 | metadata_version_id = str(int(time.time()) - random_int) | 293 | metadata_version_id = str(int(time.time()) - random_int) |
294 | 294 | ||
295 | pdf_file = args.get('pdf_file') | 295 | pdf_file = args.get('pdf_file') |
296 | if not pdf_file.name.endswith('pdf'): | 296 | if isinstance(pdf_file.name, str): |
297 | if not pdf_file.name.endswith('pdf') or not pdf_file.name.endswith('PDF'): | ||
297 | self.invalid_params(msg='invalid params: not a PDF file') | 298 | self.invalid_params(msg='invalid params: not a PDF file') |
298 | 299 | ||
299 | business_type = random.choice(consts.BUSINESS_TYPE_LIST) | 300 | business_type = random.choice(consts.BUSINESS_TYPE_LIST) | ... | ... |
... | @@ -8,7 +8,7 @@ SLEEP_SECOND_FOLDER = 2 | ... | @@ -8,7 +8,7 @@ SLEEP_SECOND_FOLDER = 2 |
8 | 8 | ||
9 | IMG_QUEUE_SIZE = 500 | 9 | IMG_QUEUE_SIZE = 500 |
10 | 10 | ||
11 | EDMS_DOWNLOAD_URL = https://edms-test.bmw.com/FH/FileHold/DocumentRepository/DownloadHandler.ashx | 11 | EDMS_DOWNLOAD_URL = http://sccn0639.bmwgroup.net/FH/FileHold/DocumentRepository/DownloadHandler.ashx |
12 | EDMS_UPLOAD_URL = https://edms-test.bmw.com/FH/FileHold/DocumentRepository/UploadHandler.ashx | 12 | EDMS_UPLOAD_URL = http://sccn0639.bmwgroup.net/FH/FileHold/DocumentRepository/UploadHandler.ashx |
13 | DEALER_CODE = ocr_situ_group | 13 | DEALER_CODE = ocr_group |
14 | 14 | ... | ... |
... | @@ -8,6 +8,6 @@ SLEEP_SECOND_FOLDER = 2 | ... | @@ -8,6 +8,6 @@ SLEEP_SECOND_FOLDER = 2 |
8 | 8 | ||
9 | IMG_QUEUE_SIZE = 500 | 9 | IMG_QUEUE_SIZE = 500 |
10 | 10 | ||
11 | EDMS_DOWNLOAD_URL = https://edms-test.bmw.com/FH/FileHold/DocumentRepository/DownloadHandler.ashx | 11 | EDMS_DOWNLOAD_URL = http://sccn0637.bmwgroup.net/FH/FileHold/DocumentRepository/DownloadHandler.ashx |
12 | EDMS_UPLOAD_URL = https://edms-test.bmw.com/FH/FileHold/DocumentRepository/UploadHandler.ashx | 12 | EDMS_UPLOAD_URL = http://sccn0637.bmwgroup.net/FH/FileHold/DocumentRepository/UploadHandler.ashx |
13 | DEALER_CODE = ocr_situ_group | 13 | DEALER_CODE = ocr_situ_group |
... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
-
Please register or sign in to post a comment