issue list 1117
Showing
8 changed files
with
62 additions
and
27 deletions
| ... | @@ -140,9 +140,9 @@ RESULT_IDX = FIXED_HEADERS.index('核对结果') | ... | @@ -140,9 +140,9 @@ RESULT_IDX = FIXED_HEADERS.index('核对结果') |
| 140 | # '借贷': ('贷', '借'), # 竖版-无表格-广发银行 | 140 | # '借贷': ('贷', '借'), # 竖版-无表格-广发银行 |
| 141 | # '借贷状态': ('贷', '借'), # 竖版-特殊-交通银行 | 141 | # '借贷状态': ('贷', '借'), # 竖版-特殊-交通银行 |
| 142 | # '收/支': ('收入', '支出'), # 横版-表格-北京银行 | 142 | # '收/支': ('收入', '支出'), # 横版-表格-北京银行 |
| 143 | BORROW_HEADERS_SET = {'借贷', '借贷状态', '收/支'} | 143 | BORROW_HEADERS_SET = {'借贷', '借贷状态', '收/支', '收支标志'} |
| 144 | BORROW_INCOME_SET = {'贷', '收入'} | 144 | BORROW_INCOME_SET = {'贷', '收入', '收'} |
| 145 | BORROW_OUTLAY_SET = {'借', '支出'} | 145 | BORROW_OUTLAY_SET = {'借', '支出', '支'} |
| 146 | INCOME_HEADERS_SET = {'收入金额', '收入', '存入', '存入金额(贷)', '存入金额(贷)'} | 146 | INCOME_HEADERS_SET = {'收入金额', '收入', '存入', '存入金额(贷)', '存入金额(贷)'} |
| 147 | OUTLAY_HEADERS_SET = {'支出金额', '支出', '支取金额(借)', '支取金额(借)'} | 147 | OUTLAY_HEADERS_SET = {'支出金额', '支出', '支取金额(借)', '支取金额(借)'} |
| 148 | 148 | ||
| ... | @@ -154,6 +154,7 @@ HEADERS_MAPPING.update( | ... | @@ -154,6 +154,7 @@ HEADERS_MAPPING.update( |
| 154 | { | 154 | { |
| 155 | '借贷': BORROW_KEY, | 155 | '借贷': BORROW_KEY, |
| 156 | '借贷状态': BORROW_KEY, | 156 | '借贷状态': BORROW_KEY, |
| 157 | '收支标志': BORROW_KEY, | ||
| 157 | '收/支': BORROW_KEY, | 158 | '收/支': BORROW_KEY, |
| 158 | } | 159 | } |
| 159 | ) | 160 | ) |
| ... | @@ -911,11 +912,11 @@ WECHART_HEADERS_MAPPING.update( | ... | @@ -911,11 +912,11 @@ WECHART_HEADERS_MAPPING.update( |
| 911 | } | 912 | } |
| 912 | ) | 913 | ) |
| 913 | 914 | ||
| 914 | PATTERN_LIST = ['收入/支出金额', '收入', '存入', '支出', '支取', '金额', '余额', '发生额', '借贷', '借贷状态', '收/支', '收入金额', | 915 | PATTERN_LIST = ['收入/支出金额', '收入', '存入', '支出', '支取', '金额', '余额', '发生额', '借贷', '借贷状态', '收支标志', '收/支', |
| 915 | '存入金额(贷)', '存入金额(贷)', '支出金额', '支取金额(借)', '支取金额(借)', '记账日期', '附言', '交易日期', '摘要', | 916 | '收入金额', '存入金额(贷)', '存入金额(贷)', '支出金额', '支取金额(借)', '支取金额(借)', '记账日期', '附言', |
| 916 | '业务摘要', '工作日期', '交易金额', '账户余额', '交易类型', '金额(元)', '金额(元)', '时间', '名称/备注', | 917 | '交易日期', '摘要', '业务摘要', '工作日期', '交易金额', '账户余额', '交易类型', '金额(元)', '金额(元)', '时间', |
| 917 | '摘要/附言', '交易发生额', '交易摘要', '借贷发生额(借:-贷:+)', '借贷发生额(借:-贷:+)', '联机余额', '交易金额(元)', | 918 | '名称/备注', '摘要/附言', '交易发生额', '交易摘要', '借贷发生额(借:-贷:+)', '借贷发生额(借:-贷:+)', '联机余额', |
| 918 | '交易金额(元)', '账户余额(元)', '账户余额(元)', '会计日期', '摘要代码', '摘要信息', '日期', '短摘要', '本次余额', | 919 | '交易金额(元)', '交易金额(元)', '账户余额(元)', '账户余额(元)', '会计日期', '摘要代码', '摘要信息', '日期', |
| 919 | '交易后余额', '交易说明', '帐户余额', '交易日期 记账日期'] | 920 | '短摘要', '本次余额', '交易后余额', '交易说明', '帐户余额', '交易日期 记账日期'] |
| 920 | 921 | ||
| 921 | CN_RE = re.compile(u'[\u4e00-\u9fa5]') | 922 | CN_RE = re.compile(u'[\u4e00-\u9fa5]') | ... | ... |
| ... | @@ -163,14 +163,19 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -163,14 +163,19 @@ class Command(BaseCommand, LoggerMixin): |
| 163 | shutil.move(path, img_save_path) | 163 | shutil.move(path, img_save_path) |
| 164 | 164 | ||
| 165 | def folder_process(self, input_dir, classify): | 165 | def folder_process(self, input_dir, classify): |
| 166 | while not os.path.isdir(input_dir): | ||
| 167 | self.folder_log.info('{0} [input dir is not dir] [input_dir={1}]'.format(self.log_base, input_dir)) | ||
| 168 | time.sleep(self.sleep_time) | ||
| 166 | output_dir = os.path.join(os.path.dirname(input_dir), 'Output') | 169 | output_dir = os.path.join(os.path.dirname(input_dir), 'Output') |
| 167 | img_output_dir = os.path.join(output_dir, 'image') | 170 | img_output_dir = os.path.join(output_dir, 'image') |
| 168 | wb_output_dir = os.path.join(output_dir, 'excel') | 171 | wb_output_dir = os.path.join(output_dir, 'excel') |
| 169 | pdf_output_dir = os.path.join(output_dir, 'pdf') | 172 | pdf_output_dir = os.path.join(output_dir, 'pdf') |
| 173 | failed_output_dir = os.path.join(output_dir, 'failed') | ||
| 170 | os.makedirs(output_dir, exist_ok=True) | 174 | os.makedirs(output_dir, exist_ok=True) |
| 171 | os.makedirs(img_output_dir, exist_ok=True) | 175 | os.makedirs(img_output_dir, exist_ok=True) |
| 172 | os.makedirs(wb_output_dir, exist_ok=True) | 176 | os.makedirs(wb_output_dir, exist_ok=True) |
| 173 | os.makedirs(pdf_output_dir, exist_ok=True) | 177 | os.makedirs(pdf_output_dir, exist_ok=True) |
| 178 | os.makedirs(failed_output_dir, exist_ok=True) | ||
| 174 | while self.switch: | 179 | while self.switch: |
| 175 | # 1. 从input dir获取pdf or image | 180 | # 1. 从input dir获取pdf or image |
| 176 | list_dir = os.listdir(input_dir) | 181 | list_dir = os.listdir(input_dir) |
| ... | @@ -178,6 +183,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -178,6 +183,7 @@ class Command(BaseCommand, LoggerMixin): |
| 178 | self.folder_log.info('{0} [input dir empty] [input_dir={1}]'.format(self.log_base, input_dir)) | 183 | self.folder_log.info('{0} [input dir empty] [input_dir={1}]'.format(self.log_base, input_dir)) |
| 179 | time.sleep(self.sleep_time) | 184 | time.sleep(self.sleep_time) |
| 180 | for name in list_dir: | 185 | for name in list_dir: |
| 186 | try: | ||
| 181 | path = os.path.join(input_dir, name) | 187 | path = os.path.join(input_dir, name) |
| 182 | if os.path.isfile(path): | 188 | if os.path.isfile(path): |
| 183 | self.folder_log.info('{0} [file start] [path={1}]'.format(self.log_base, path)) | 189 | self.folder_log.info('{0} [file start] [path={1}]'.format(self.log_base, path)) |
| ... | @@ -186,6 +192,17 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -186,6 +192,17 @@ class Command(BaseCommand, LoggerMixin): |
| 186 | else: | 192 | else: |
| 187 | self.img_process(name, path, classify, wb_output_dir, img_output_dir, pdf_output_dir) | 193 | self.img_process(name, path, classify, wb_output_dir, img_output_dir, pdf_output_dir) |
| 188 | self.folder_log.info('{0} [file end] [path={1}]'.format(self.log_base, path)) | 194 | self.folder_log.info('{0} [file end] [path={1}]'.format(self.log_base, path)) |
| 195 | except Exception as e: | ||
| 196 | try: | ||
| 197 | path = os.path.join(input_dir, name) | ||
| 198 | self.folder_log.error('{0} [file error] [path={1}] [error={2}]'.format(self.log_base, path, | ||
| 199 | traceback.format_exc())) | ||
| 200 | shutil.move(path, failed_output_dir) | ||
| 201 | continue | ||
| 202 | except Exception as e: | ||
| 203 | self.folder_log.error('{0} [file error] [error={1}]'.format( | ||
| 204 | self.log_base, traceback.format_exc())) | ||
| 205 | continue | ||
| 189 | 206 | ||
| 190 | def handle(self, *args, **kwargs): | 207 | def handle(self, *args, **kwargs): |
| 191 | process_list = [] | 208 | process_list = [] | ... | ... |
| ... | @@ -20,6 +20,7 @@ class EDMS: | ... | @@ -20,6 +20,7 @@ class EDMS: |
| 20 | self.user_name = conf.EDMS_USER | 20 | self.user_name = conf.EDMS_USER |
| 21 | self.pwd = conf.EDMS_PWD | 21 | self.pwd = conf.EDMS_PWD |
| 22 | self.session_id = None | 22 | self.session_id = None |
| 23 | self.prefix = 'OCR' | ||
| 23 | 24 | ||
| 24 | def set_session_id(self): | 25 | def set_session_id(self): |
| 25 | self.session_id = self.sm_client.service.StartSession(login=self.user_name, | 26 | self.session_id = self.sm_client.service.StartSession(login=self.user_name, |
| ... | @@ -83,12 +84,15 @@ class EDMS: | ... | @@ -83,12 +84,15 @@ class EDMS: |
| 83 | else: | 84 | else: |
| 84 | raise Exception | 85 | raise Exception |
| 85 | 86 | ||
| 86 | @staticmethod | 87 | def get_doc_file_name(self, doc_name): |
| 87 | def get_doc_file_name(doc_name): | 88 | if not isinstance(doc_name, str): |
| 88 | if doc_name.endswith('pdf'): | 89 | return self.prefix |
| 90 | if doc_name.endswith('.pdf') or doc_name.endswith('.PDF') or \ | ||
| 91 | doc_name.endswith('.pdF') or doc_name.endswith('.pDF') or doc_name.endswith('.pDf') or \ | ||
| 92 | doc_name.endswith('.Pdf') or doc_name.endswith('.PdF') or doc_name.endswith('.PDf'): | ||
| 89 | name, _ = os.path.splitext(doc_name) | 93 | name, _ = os.path.splitext(doc_name) |
| 90 | return name | 94 | return '{0}{1}'.format(self.prefix, name) |
| 91 | return doc_name | 95 | return '{0}{1}'.format(self.prefix, doc_name) |
| 92 | 96 | ||
| 93 | def get_doc_info(self, token, doc, business_type, file_path): | 97 | def get_doc_info(self, token, doc, business_type, file_path): |
| 94 | business_type = consts.BUSINESS_TYPE_DICT.get(business_type) | 98 | business_type = consts.BUSINESS_TYPE_DICT.get(business_type) |
| ... | @@ -140,5 +144,3 @@ class EDMS: | ... | @@ -140,5 +144,3 @@ class EDMS: |
| 140 | headers.pop('Content-Type') | 144 | headers.pop('Content-Type') |
| 141 | metadata_version_id = self.add_doc_info(headers, token, doc, business_type, file_path) | 145 | metadata_version_id = self.add_doc_info(headers, token, doc, business_type, file_path) |
| 142 | return metadata_version_id | 146 | return metadata_version_id |
| 143 | |||
| 144 | ... | ... |
| ... | @@ -574,12 +574,25 @@ class BSWorkbook(Workbook): | ... | @@ -574,12 +574,25 @@ class BSWorkbook(Workbook): |
| 574 | license_list = license_summary.get(classify) | 574 | license_list = license_summary.get(classify) |
| 575 | if not license_list: | 575 | if not license_list: |
| 576 | continue | 576 | continue |
| 577 | if classify == consts.IC_CLASSIFY: # 身份证、居住证先正面,后反面 | ||
| 578 | key, _, _ = consts.FIELD_ORDER_MAP.get(classify) | ||
| 579 | side1_list = [] | ||
| 580 | side2_list = [] | ||
| 581 | for license_dict in license_list: | ||
| 582 | if key in license_dict: | ||
| 583 | side2_list.append(license_dict) | ||
| 584 | else: | ||
| 585 | side1_list.append(license_dict) | ||
| 586 | side1_list.extend(side2_list) | ||
| 587 | license_list = side1_list | ||
| 588 | side2_list = None | ||
| 589 | side1_list = None | ||
| 577 | count = 0 | 590 | count = 0 |
| 578 | ws = self.create_sheet(name) | 591 | ws = self.create_sheet(name) |
| 579 | if scheme_diff and document_scheme == consts.DOC_SCHEME_LIST[1]: | 592 | if scheme_diff and document_scheme == consts.DOC_SCHEME_LIST[1]: |
| 580 | classify = consts.MVC_CLASSIFY_SE | 593 | classify = consts.MVC_CLASSIFY_SE |
| 581 | for license_dict in license_list: | 594 | for license_dict in license_list: |
| 582 | if classify == consts.IC_CLASSIFY and license_dict.get('类别') == '1': | 595 | if classify == consts.IC_CLASSIFY and license_dict.get('类别') == '1': # 居住证处理 |
| 583 | license_summary.setdefault(consts.RP_CLASSIFY, []).append(license_dict) | 596 | license_summary.setdefault(consts.RP_CLASSIFY, []).append(license_dict) |
| 584 | continue | 597 | continue |
| 585 | if side_diff: | 598 | if side_diff: |
| ... | @@ -632,6 +645,10 @@ class BSWorkbook(Workbook): | ... | @@ -632,6 +645,10 @@ class BSWorkbook(Workbook): |
| 632 | 645 | ||
| 633 | def rebuild(self, bs_summary, license_summary, res_list, document_scheme): | 646 | def rebuild(self, bs_summary, license_summary, res_list, document_scheme): |
| 634 | count_list = [(consts.MODEL_FIELD_BS, len(self.sheetnames) - 1)] | 647 | count_list = [(consts.MODEL_FIELD_BS, len(self.sheetnames) - 1)] |
| 648 | if document_scheme == consts.DOC_SCHEME_LIST[1]: | ||
| 649 | self.license_rebuild(license_summary, document_scheme, count_list) | ||
| 650 | self.bs_rebuild(bs_summary) | ||
| 651 | else: | ||
| 635 | self.bs_rebuild(bs_summary) | 652 | self.bs_rebuild(bs_summary) |
| 636 | self.license_rebuild(license_summary, document_scheme, count_list) | 653 | self.license_rebuild(license_summary, document_scheme, count_list) |
| 637 | self.res_sheet(res_list) | 654 | self.res_sheet(res_list) | ... | ... |
| ... | @@ -293,7 +293,8 @@ class DocView(GenericView, DocHandler): | ... | @@ -293,7 +293,8 @@ class DocView(GenericView, DocHandler): |
| 293 | metadata_version_id = str(int(time.time()) - random_int) | 293 | metadata_version_id = str(int(time.time()) - random_int) |
| 294 | 294 | ||
| 295 | pdf_file = args.get('pdf_file') | 295 | pdf_file = args.get('pdf_file') |
| 296 | if not pdf_file.name.endswith('pdf'): | 296 | if isinstance(pdf_file.name, str): |
| 297 | if not pdf_file.name.endswith('pdf') or not pdf_file.name.endswith('PDF'): | ||
| 297 | self.invalid_params(msg='invalid params: not a PDF file') | 298 | self.invalid_params(msg='invalid params: not a PDF file') |
| 298 | 299 | ||
| 299 | business_type = random.choice(consts.BUSINESS_TYPE_LIST) | 300 | business_type = random.choice(consts.BUSINESS_TYPE_LIST) | ... | ... |
| ... | @@ -8,7 +8,7 @@ SLEEP_SECOND_FOLDER = 2 | ... | @@ -8,7 +8,7 @@ SLEEP_SECOND_FOLDER = 2 |
| 8 | 8 | ||
| 9 | IMG_QUEUE_SIZE = 500 | 9 | IMG_QUEUE_SIZE = 500 |
| 10 | 10 | ||
| 11 | EDMS_DOWNLOAD_URL = https://edms-test.bmw.com/FH/FileHold/DocumentRepository/DownloadHandler.ashx | 11 | EDMS_DOWNLOAD_URL = http://sccn0639.bmwgroup.net/FH/FileHold/DocumentRepository/DownloadHandler.ashx |
| 12 | EDMS_UPLOAD_URL = https://edms-test.bmw.com/FH/FileHold/DocumentRepository/UploadHandler.ashx | 12 | EDMS_UPLOAD_URL = http://sccn0639.bmwgroup.net/FH/FileHold/DocumentRepository/UploadHandler.ashx |
| 13 | DEALER_CODE = ocr_situ_group | 13 | DEALER_CODE = ocr_group |
| 14 | 14 | ... | ... |
| ... | @@ -8,6 +8,6 @@ SLEEP_SECOND_FOLDER = 2 | ... | @@ -8,6 +8,6 @@ SLEEP_SECOND_FOLDER = 2 |
| 8 | 8 | ||
| 9 | IMG_QUEUE_SIZE = 500 | 9 | IMG_QUEUE_SIZE = 500 |
| 10 | 10 | ||
| 11 | EDMS_DOWNLOAD_URL = https://edms-test.bmw.com/FH/FileHold/DocumentRepository/DownloadHandler.ashx | 11 | EDMS_DOWNLOAD_URL = http://sccn0637.bmwgroup.net/FH/FileHold/DocumentRepository/DownloadHandler.ashx |
| 12 | EDMS_UPLOAD_URL = https://edms-test.bmw.com/FH/FileHold/DocumentRepository/UploadHandler.ashx | 12 | EDMS_UPLOAD_URL = http://sccn0637.bmwgroup.net/FH/FileHold/DocumentRepository/UploadHandler.ashx |
| 13 | DEALER_CODE = ocr_situ_group | 13 | DEALER_CODE = ocr_situ_group |
| ... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
-
Please register or sign in to post a comment