add 0318
Showing
7 changed files
with
107 additions
and
17 deletions
| ... | @@ -1096,11 +1096,23 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1096,11 +1096,23 @@ class Command(BaseCommand, LoggerMixin): |
| 1096 | 1096 | ||
| 1097 | try: | 1097 | try: |
| 1098 | doc.status = DocStatus.PROCESS_FAILED.value | 1098 | doc.status = DocStatus.PROCESS_FAILED.value |
| 1099 | doc.page_count = pdf_handler.page_count | ||
| 1099 | doc.save() | 1100 | doc.save() |
| 1100 | except Exception as e: | 1101 | except Exception as e: |
| 1101 | self.online_log.error('{0} [process error (db save)] [error={1}]'.format( | 1102 | self.online_log.error('{0} [process error (db save)] [error={1}]'.format( |
| 1102 | self.log_base, traceback.format_exc())) | 1103 | self.log_base, traceback.format_exc())) |
| 1103 | else: | 1104 | else: |
| 1105 | |||
| 1106 | try: | ||
| 1107 | if pdf_handler.is_e_pdf: | ||
| 1108 | doc.metadata = pdf_handler.metadata if pdf_handler.metadata is None else \ | ||
| 1109 | json.dumps(pdf_handler.metadata) | ||
| 1110 | doc.page_count = pdf_handler.page_count | ||
| 1111 | doc.save() | ||
| 1112 | except Exception as e: | ||
| 1113 | self.online_log.error('{0} [process error (db save)] [error={1}]'.format( | ||
| 1114 | self.log_base, traceback.format_exc())) | ||
| 1115 | |||
| 1104 | with lock: | 1116 | with lock: |
| 1105 | todo_count_dict[task_str] = pdf_handler.img_count | 1117 | todo_count_dict[task_str] = pdf_handler.img_count |
| 1106 | for img_idx, img_path in enumerate(pdf_handler.img_path_list): | 1118 | for img_idx, img_path in enumerate(pdf_handler.img_path_list): |
| ... | @@ -1147,6 +1159,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1147,6 +1159,7 @@ class Command(BaseCommand, LoggerMixin): |
| 1147 | 1159 | ||
| 1148 | try: | 1160 | try: |
| 1149 | doc.status = DocStatus.PROCESS_FAILED.value | 1161 | doc.status = DocStatus.PROCESS_FAILED.value |
| 1162 | doc.page_count = pdf_handler.page_count | ||
| 1150 | doc.save() | 1163 | doc.save() |
| 1151 | self.online_log.warn('{0} [process failed (pdf_2_img_2_queue)] [task={1}] ' | 1164 | self.online_log.warn('{0} [process failed (pdf_2_img_2_queue)] [task={1}] ' |
| 1152 | '[error={2}]'.format(self.log_base, task_str, traceback.format_exc())) | 1165 | '[error={2}]'.format(self.log_base, task_str, traceback.format_exc())) |
| ... | @@ -1178,6 +1191,13 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1178,6 +1191,13 @@ class Command(BaseCommand, LoggerMixin): |
| 1178 | else: | 1191 | else: |
| 1179 | raise Exception('download or pdf to img failed') | 1192 | raise Exception('download or pdf to img failed') |
| 1180 | 1193 | ||
| 1194 | try: | ||
| 1195 | doc.page_count = pdf_handler.page_count | ||
| 1196 | doc.save() | ||
| 1197 | except Exception as e: | ||
| 1198 | self.online_log.error('{0} [process error (db save)] [error={1}]'.format( | ||
| 1199 | self.log_base, traceback.format_exc())) | ||
| 1200 | |||
| 1181 | if classify_1_str == str(consts.CONTRACT_CLASSIFY): | 1201 | if classify_1_str == str(consts.CONTRACT_CLASSIFY): |
| 1182 | ocr_result = afc_predict(pdf_handler.pdf_info) | 1202 | ocr_result = afc_predict(pdf_handler.pdf_info) |
| 1183 | page_res = {} | 1203 | page_res = {} |
| ... | @@ -1234,6 +1254,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1234,6 +1254,7 @@ class Command(BaseCommand, LoggerMixin): |
| 1234 | except Exception as e: | 1254 | except Exception as e: |
| 1235 | try: | 1255 | try: |
| 1236 | doc.status = DocStatus.PROCESS_FAILED.value | 1256 | doc.status = DocStatus.PROCESS_FAILED.value |
| 1257 | doc.page_count = pdf_handler.page_count | ||
| 1237 | doc.save() | 1258 | doc.save() |
| 1238 | self.online_log.warn('{0} [process failed (e-contract)] [task={1}] ' | 1259 | self.online_log.warn('{0} [process failed (e-contract)] [task={1}] ' |
| 1239 | '[error={2}]'.format(self.e_log_base, task_str, traceback.format_exc())) | 1260 | '[error={2}]'.format(self.e_log_base, task_str, traceback.format_exc())) |
| ... | @@ -1560,7 +1581,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1560,7 +1581,7 @@ class Command(BaseCommand, LoggerMixin): |
| 1560 | # 重构Excel文件 | 1581 | # 重构Excel文件 |
| 1561 | # src_excel_path = os.path.join(doc_data_path, 'src.xlsx') | 1582 | # src_excel_path = os.path.join(doc_data_path, 'src.xlsx') |
| 1562 | # wb.save(src_excel_path) | 1583 | # wb.save(src_excel_path) |
| 1563 | count_list = wb.rebuild(merged_bs_summary, license_summary, res_list, doc.document_scheme, contract_result) | 1584 | count_list = wb.rebuild(merged_bs_summary, license_summary, res_list, doc.document_scheme, contract_result, doc.metadata) |
| 1564 | wb.save(excel_path) | 1585 | wb.save(excel_path) |
| 1565 | 1586 | ||
| 1566 | except Exception as e: | 1587 | except Exception as e: | ... | ... |
| ... | @@ -61,6 +61,9 @@ class HILDoc(models.Model): | ... | @@ -61,6 +61,9 @@ class HILDoc(models.Model): |
| 61 | mvc_count = models.IntegerField(default=0, verbose_name='机动车登记证书处理数目') | 61 | mvc_count = models.IntegerField(default=0, verbose_name='机动车登记证书处理数目') |
| 62 | vat_count = models.IntegerField(default=0, verbose_name='增值税发票处理数目') | 62 | vat_count = models.IntegerField(default=0, verbose_name='增值税发票处理数目') |
| 63 | 63 | ||
| 64 | page_count = models.IntegerField(null=True, verbose_name='文件page数目') | ||
| 65 | metadata = models.TextField(null=True, verbose_name="电子PDF专属,PDF信息") | ||
| 66 | |||
| 64 | class Meta: | 67 | class Meta: |
| 65 | managed = False | 68 | managed = False |
| 66 | db_table = 'hil_doc' | 69 | db_table = 'hil_doc' |
| ... | @@ -100,6 +103,9 @@ class AFCDoc(models.Model): | ... | @@ -100,6 +103,9 @@ class AFCDoc(models.Model): |
| 100 | mvc_count = models.IntegerField(default=0, verbose_name='机动车登记证书处理数目') | 103 | mvc_count = models.IntegerField(default=0, verbose_name='机动车登记证书处理数目') |
| 101 | vat_count = models.IntegerField(default=0, verbose_name='增值税发票处理数目') | 104 | vat_count = models.IntegerField(default=0, verbose_name='增值税发票处理数目') |
| 102 | 105 | ||
| 106 | page_count = models.IntegerField(null=True, verbose_name='文件page数目') | ||
| 107 | metadata = models.TextField(null=True, verbose_name="电子PDF专属,PDF信息") | ||
| 108 | |||
| 103 | class Meta: | 109 | class Meta: |
| 104 | managed = False | 110 | managed = False |
| 105 | situ_db_label = 'afc' | 111 | situ_db_label = 'afc' | ... | ... |
| 1 | import re | 1 | import re |
| 2 | import json | ||
| 2 | import random | 3 | import random |
| 3 | import locale | 4 | import locale |
| 4 | import numpy as np | 5 | import numpy as np |
| ... | @@ -311,7 +312,8 @@ class BSWorkbook(Workbook): | ... | @@ -311,7 +312,8 @@ class BSWorkbook(Workbook): |
| 311 | month_mapping.setdefault(item, []).append( | 312 | month_mapping.setdefault(item, []).append( |
| 312 | (ws.title, idx_list[i] + min_row, idx_list[i + 1] + min_row - 1, day_mean)) | 313 | (ws.title, idx_list[i] + min_row, idx_list[i + 1] + min_row - 1, day_mean)) |
| 313 | 314 | ||
| 314 | def build_metadata_rows(self, confidence, code, verify_list, print_time, start_date, end_date, res_count_tuple, is_verify_classify): | 315 | def build_metadata_rows(self, confidence, code, verify_list, print_time, start_date, end_date, |
| 316 | res_count_tuple, is_verify_classify, metadata): | ||
| 315 | metadata_rows = [('流水识别置信度', confidence)] | 317 | metadata_rows = [('流水识别置信度', confidence)] |
| 316 | if is_verify_classify: | 318 | if is_verify_classify: |
| 317 | verify_res = '疑似伪造' if len(verify_list) > 0 else '正常' | 319 | verify_res = '疑似伪造' if len(verify_list) > 0 else '正常' |
| ... | @@ -322,11 +324,26 @@ class BSWorkbook(Workbook): | ... | @@ -322,11 +324,26 @@ class BSWorkbook(Workbook): |
| 322 | metadata_rows.append(('识别成功', res_count_tuple[1])) | 324 | metadata_rows.append(('识别成功', res_count_tuple[1])) |
| 323 | metadata_rows.append(self.blank_row) | 325 | metadata_rows.append(self.blank_row) |
| 324 | 326 | ||
| 327 | # PDF info | ||
| 328 | metadata_highlight_row = [] | ||
| 329 | if isinstance(metadata, str): | ||
| 330 | metadata_dict = json.loads(metadata) | ||
| 331 | author = metadata_dict.pop('author', '') | ||
| 332 | producer = metadata_dict.pop('producer', '') | ||
| 333 | metadata_rows.append(('Author', author)) | ||
| 334 | metadata_rows.append(('Producer', producer)) | ||
| 335 | if len(author) > 0: | ||
| 336 | metadata_highlight_row.append(6) | ||
| 337 | if 'iText' not in producer and 'Qt' not in producer and 'Haru Free' not in producer: | ||
| 338 | metadata_highlight_row.append(7) | ||
| 339 | metadata_rows.append(self.blank_row) | ||
| 340 | |||
| 325 | verify_highlight_row = [] | 341 | verify_highlight_row = [] |
| 326 | if is_verify_classify and len(verify_list) > 0: | 342 | if is_verify_classify and len(verify_list) > 0: |
| 327 | metadata_rows.append(self.verify_header) | 343 | metadata_rows.append(self.verify_header) |
| 344 | verify_start = len(metadata_rows) | ||
| 328 | metadata_rows.extend(verify_list) | 345 | metadata_rows.extend(verify_list) |
| 329 | for r in range(6, len(metadata_rows)+1): | 346 | for r in range(verify_start, len(metadata_rows)+1): |
| 330 | verify_highlight_row.append(r) | 347 | verify_highlight_row.append(r) |
| 331 | 348 | ||
| 332 | metadata_rows.append(self.blank_row) | 349 | metadata_rows.append(self.blank_row) |
| ... | @@ -344,18 +361,23 @@ class BSWorkbook(Workbook): | ... | @@ -344,18 +361,23 @@ class BSWorkbook(Workbook): |
| 344 | self.blank_row, | 361 | self.blank_row, |
| 345 | self.interest_keyword_header] | 362 | self.interest_keyword_header] |
| 346 | ) | 363 | ) |
| 347 | return metadata_rows, verify_highlight_row, timedelta | 364 | return metadata_rows, verify_highlight_row, timedelta, metadata_highlight_row |
| 348 | 365 | ||
| 349 | def build_meta_sheet(self, role_name, card, confidence, code, verify_list, print_time, start_date, end_date, | 366 | def build_meta_sheet(self, role_name, card, confidence, code, verify_list, print_time, start_date, end_date, |
| 350 | res_count_tuple, is_verify_classify): | 367 | res_count_tuple, is_verify_classify, metadata): |
| 351 | metadata_rows, verify_highlight_row, timedelta = self.build_metadata_rows( | 368 | metadata_rows, verify_highlight_row, timedelta, metadata_highlight_row = \ |
| 352 | confidence, code, verify_list, print_time, start_date, end_date, res_count_tuple, is_verify_classify) | 369 | self.build_metadata_rows(confidence, code, verify_list, print_time, start_date, end_date, res_count_tuple, |
| 370 | is_verify_classify, metadata) | ||
| 353 | if not isinstance(role_name, str): | 371 | if not isinstance(role_name, str): |
| 354 | role_name = consts.UNKNOWN_ROLE | 372 | role_name = consts.UNKNOWN_ROLE |
| 355 | ms = self.create_sheet('{0}{1}({2})'.format(self.meta_sheet_title, role_name, card)) | 373 | ms = self.create_sheet('{0}{1}({2})'.format(self.meta_sheet_title, role_name, card)) |
| 356 | for row in metadata_rows: | 374 | for row in metadata_rows: |
| 357 | ms.append(row) | 375 | ms.append(row) |
| 358 | 376 | ||
| 377 | for row in metadata_highlight_row: | ||
| 378 | for cell in ms[row]: | ||
| 379 | cell.fill = self.amount_fill | ||
| 380 | |||
| 359 | if len(verify_highlight_row) > 0: | 381 | if len(verify_highlight_row) > 0: |
| 360 | for cell in ms[2]: | 382 | for cell in ms[2]: |
| 361 | cell.fill = self.amount_fill | 383 | cell.fill = self.amount_fill |
| ... | @@ -625,7 +647,7 @@ class BSWorkbook(Workbook): | ... | @@ -625,7 +647,7 @@ class BSWorkbook(Workbook): |
| 625 | ms.append(row) | 647 | ms.append(row) |
| 626 | self.remove(tmp2_ws) | 648 | self.remove(tmp2_ws) |
| 627 | 649 | ||
| 628 | def bs_rebuild(self, bs_summary, res_count_tuple): | 650 | def bs_rebuild(self, bs_summary, res_count_tuple, metadata=None): |
| 629 | # bs_summary = { | 651 | # bs_summary = { |
| 630 | # '卡号': { | 652 | # '卡号': { |
| 631 | # 'classify': 0, | 653 | # 'classify': 0, |
| ... | @@ -691,7 +713,8 @@ class BSWorkbook(Workbook): | ... | @@ -691,7 +713,8 @@ class BSWorkbook(Workbook): |
| 691 | start_date, | 713 | start_date, |
| 692 | end_date, | 714 | end_date, |
| 693 | res_count_tuple, | 715 | res_count_tuple, |
| 694 | is_verify_classify) | 716 | is_verify_classify, |
| 717 | metadata) | ||
| 695 | 718 | ||
| 696 | summary['timedelta'] = timedelta | 719 | summary['timedelta'] = timedelta |
| 697 | 720 | ||
| ... | @@ -846,16 +869,16 @@ class BSWorkbook(Workbook): | ... | @@ -846,16 +869,16 @@ class BSWorkbook(Workbook): |
| 846 | if len(self.sheetnames) > 1: | 869 | if len(self.sheetnames) > 1: |
| 847 | self.remove(self.get_sheet_by_name('Sheet')) | 870 | self.remove(self.get_sheet_by_name('Sheet')) |
| 848 | 871 | ||
| 849 | def rebuild(self, bs_summary, license_summary, res_list, document_scheme, contract_result): | 872 | def rebuild(self, bs_summary, license_summary, res_list, document_scheme, contract_result, metadata): |
| 850 | res_count_tuple = self.res_sheet(res_list) | 873 | res_count_tuple = self.res_sheet(res_list) |
| 851 | 874 | ||
| 852 | count_list = [(consts.MODEL_FIELD_BS, len(bs_summary))] | 875 | count_list = [(consts.MODEL_FIELD_BS, len(bs_summary))] |
| 853 | if document_scheme == consts.DOC_SCHEME_LIST[1]: | 876 | if document_scheme == consts.DOC_SCHEME_LIST[1]: |
| 854 | self.license_rebuild(license_summary, document_scheme, count_list) | 877 | self.license_rebuild(license_summary, document_scheme, count_list) |
| 855 | self.contract_rebuild(contract_result) | 878 | self.contract_rebuild(contract_result) |
| 856 | self.bs_rebuild(bs_summary, res_count_tuple) | 879 | self.bs_rebuild(bs_summary, res_count_tuple, metadata) |
| 857 | else: | 880 | else: |
| 858 | self.bs_rebuild(bs_summary, res_count_tuple) | 881 | self.bs_rebuild(bs_summary, res_count_tuple, metadata) |
| 859 | self.license_rebuild(license_summary, document_scheme, count_list) | 882 | self.license_rebuild(license_summary, document_scheme, count_list) |
| 860 | self.move_res_sheet() | 883 | self.move_res_sheet() |
| 861 | self.remove_base_sheet() | 884 | self.remove_base_sheet() | ... | ... |
| ... | @@ -889,7 +889,7 @@ class DocView(GenericView, DocHandler): | ... | @@ -889,7 +889,7 @@ class DocView(GenericView, DocHandler): |
| 889 | create_time__lt=create_time_end + datetime.timedelta(days=1))\ | 889 | create_time__lt=create_time_end + datetime.timedelta(days=1))\ |
| 890 | if create_time_start is not None and create_time_end is not None else Q() | 890 | if create_time_start is not None and create_time_end is not None else Q() |
| 891 | query = application_id_query & status_query & data_source_query & upload_finish_time_query & create_time_query | 891 | query = application_id_query & status_query & data_source_query & upload_finish_time_query & create_time_query |
| 892 | val_tuple = ('id', 'application_id', 'upload_finish_time', 'create_time', 'data_source', 'status') | 892 | val_tuple = ('id', 'application_id', 'upload_finish_time', 'create_time', 'document_scheme', 'data_source', 'status', 'page_count') |
| 893 | doc_class, prefix = self.get_doc_class(business_type) | 893 | doc_class, prefix = self.get_doc_class(business_type) |
| 894 | total = doc_class.objects.filter(query).count() | 894 | total = doc_class.objects.filter(query).count() |
| 895 | start_index = page_size * (page - 1) | 895 | start_index = page_size * (page - 1) |
| ... | @@ -898,14 +898,22 @@ class DocView(GenericView, DocHandler): | ... | @@ -898,14 +898,22 @@ class DocView(GenericView, DocHandler): |
| 898 | raise self.invalid_params('页数不存在') | 898 | raise self.invalid_params('页数不存在') |
| 899 | 899 | ||
| 900 | doc_queryset = doc_class.objects.filter(query).values(*val_tuple).order_by('-create_time')[start_index: end_index] | 900 | doc_queryset = doc_class.objects.filter(query).values(*val_tuple).order_by('-create_time')[start_index: end_index] |
| 901 | doc_list = self.get_doc_list(doc_queryset, prefix) | 901 | # doc_list = self.get_doc_list(doc_queryset, prefix) |
| 902 | for doc_dict in doc_queryset: | ||
| 903 | tmp_scheme = consts.COMPARE_DOC_SCHEME_LIST[0] if doc_dict['document_scheme'] == consts.DOC_SCHEME_LIST[0]\ | ||
| 904 | else consts.COMPARE_DOC_SCHEME_LIST[1] | ||
| 905 | application_link = '{0}/showList/showList?entity={1}&scheme={2}&case_id={3}'.format( | ||
| 906 | conf.BASE_URL, prefix, tmp_scheme, doc_dict['application_id']) | ||
| 907 | doc_dict['target_url'] = application_link | ||
| 902 | 908 | ||
| 903 | # total = len(doc_list) | 909 | # total = len(doc_list) |
| 904 | pagination = {'current': page, 'total': total, 'page_size': page_size} | 910 | pagination = {'current': page, 'total': total, 'page_size': page_size} |
| 905 | res = { | 911 | res = { |
| 906 | 'pagination': pagination, | 912 | 'pagination': pagination, |
| 907 | 'doc_list': doc_list | 913 | 'doc_list': list(doc_queryset) |
| 908 | } | 914 | } |
| 915 | # 新增scheme、处理时长、文件页数,删除下载切图 | ||
| 916 | # 新增链接跳转比对结果 | ||
| 909 | self.running_log.info('[get doc list] [args={0}] [res={1}]'.format(args, res)) | 917 | self.running_log.info('[get doc list] [args={0}] [res={1}]'.format(args, res)) |
| 910 | return response.ok(data=res) | 918 | return response.ok(data=res) |
| 911 | 919 | ... | ... |
| ... | @@ -2082,7 +2082,7 @@ def se_bs_compare(license_en, ocr_res_dict, strip_list, is_auto): | ... | @@ -2082,7 +2082,7 @@ def se_bs_compare(license_en, ocr_res_dict, strip_list, is_auto): |
| 2082 | dbr1_tmp_res_part = {} | 2082 | dbr1_tmp_res_part = {} |
| 2083 | for idx, (name, value) in enumerate(dbr1_field_list): | 2083 | for idx, (name, value) in enumerate(dbr1_field_list): |
| 2084 | ocr_str_or_list = ocr_res.get(compare_logic[name][0]) | 2084 | ocr_str_or_list = ocr_res.get(compare_logic[name][0]) |
| 2085 | if isinstance(ocr_str_or_list, str) or isinstance(ocr_str_or_list, list): | 2085 | if isinstance(ocr_str_or_list, str) or isinstance(ocr_str_or_list, list) or isinstance(ocr_str_or_list, int): |
| 2086 | result = getattr(cp, compare_logic[name][1])(value, ocr_str_or_list, **compare_logic[name][2]) | 2086 | result = getattr(cp, compare_logic[name][1])(value, ocr_str_or_list, **compare_logic[name][2]) |
| 2087 | if isinstance(ocr_str_or_list, list): | 2087 | if isinstance(ocr_str_or_list, list): |
| 2088 | ocr_str = json.dumps(ocr_str_or_list, ensure_ascii=False) | 2088 | ocr_str = json.dumps(ocr_str_or_list, ensure_ascii=False) |
| ... | @@ -2114,7 +2114,7 @@ def se_bs_compare(license_en, ocr_res_dict, strip_list, is_auto): | ... | @@ -2114,7 +2114,7 @@ def se_bs_compare(license_en, ocr_res_dict, strip_list, is_auto): |
| 2114 | dbr2_tmp_res_part = {} | 2114 | dbr2_tmp_res_part = {} |
| 2115 | for idx, (name, value) in enumerate(dbr2_field_list): | 2115 | for idx, (name, value) in enumerate(dbr2_field_list): |
| 2116 | ocr_str_or_list = ocr_res.get(compare_logic[name][0]) | 2116 | ocr_str_or_list = ocr_res.get(compare_logic[name][0]) |
| 2117 | if isinstance(ocr_str_or_list, str) or isinstance(ocr_str_or_list, list): | 2117 | if isinstance(ocr_str_or_list, str) or isinstance(ocr_str_or_list, list) or isinstance(ocr_str_or_list, int): |
| 2118 | result = getattr(cp, compare_logic[name][1])(value, ocr_str_or_list, **compare_logic[name][2]) | 2118 | result = getattr(cp, compare_logic[name][1])(value, ocr_str_or_list, **compare_logic[name][2]) |
| 2119 | if isinstance(ocr_str_or_list, list): | 2119 | if isinstance(ocr_str_or_list, list): |
| 2120 | ocr_str = json.dumps(ocr_str_or_list, ensure_ascii=False) | 2120 | ocr_str = json.dumps(ocr_str_or_list, ensure_ascii=False) | ... | ... |
src/common/tools/mssql_script16.py
0 → 100644
| 1 | import pyodbc | ||
| 2 | |||
| 3 | hil_sql = """ | ||
| 4 | ALTER TABLE hil_doc ADD page_count smallint; | ||
| 5 | ALTER TABLE hil_doc ADD metadata nvarchar(max); | ||
| 6 | """ | ||
| 7 | |||
| 8 | afc_sql = """ | ||
| 9 | ALTER TABLE afc_doc ADD page_count smallint; | ||
| 10 | ALTER TABLE afc_doc ADD metadata nvarchar(max); | ||
| 11 | """ | ||
| 12 | |||
| 13 | hil_cnxn = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server};', autocommit=True) | ||
| 14 | |||
| 15 | hil_cursor = hil_cnxn.cursor() | ||
| 16 | hil_cursor.execute(hil_sql) | ||
| 17 | |||
| 18 | hil_cursor.close() | ||
| 19 | hil_cnxn.close() | ||
| 20 | |||
| 21 | afc_cnxn = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server};', autocommit=True) | ||
| 22 | |||
| 23 | afc_cursor = afc_cnxn.cursor() | ||
| 24 | afc_cursor.execute(afc_sql) | ||
| 25 | |||
| 26 | afc_cursor.close() | ||
| 27 | afc_cnxn.close() |
| ... | @@ -47,6 +47,8 @@ class PDFHandler: | ... | @@ -47,6 +47,8 @@ class PDFHandler: |
| 47 | '中国建设银行个人活期账户全部交易明细', | 47 | '中国建设银行个人活期账户全部交易明细', |
| 48 | '平安银行个人账户交易明细清单', | 48 | '平安银行个人账户交易明细清单', |
| 49 | ] | 49 | ] |
| 50 | self.page_count = None | ||
| 51 | self.metadata = None | ||
| 50 | 52 | ||
| 51 | def get_suffix(self, file_name): | 53 | def get_suffix(self, file_name): |
| 52 | if file_name is None: | 54 | if file_name is None: |
| ... | @@ -321,6 +323,7 @@ class PDFHandler: | ... | @@ -321,6 +323,7 @@ class PDFHandler: |
| 321 | def e_contract_process(self): | 323 | def e_contract_process(self): |
| 322 | os.makedirs(self.img_dir_path, exist_ok=True) | 324 | os.makedirs(self.img_dir_path, exist_ok=True) |
| 323 | with fitz.Document(self.path) as pdf: | 325 | with fitz.Document(self.path) as pdf: |
| 326 | self.page_count = pdf.pageCount | ||
| 324 | for pno in range(pdf.pageCount): | 327 | for pno in range(pdf.pageCount): |
| 325 | page = pdf.loadPage(pno) | 328 | page = pdf.loadPage(pno) |
| 326 | self.pdf_info[str(pno)] = json.loads(page.getText('json')) | 329 | self.pdf_info[str(pno)] = json.loads(page.getText('json')) |
| ... | @@ -341,6 +344,8 @@ class PDFHandler: | ... | @@ -341,6 +344,8 @@ class PDFHandler: |
| 341 | self.img_path_list.append(img_save_path) | 344 | self.img_path_list.append(img_save_path) |
| 342 | else: | 345 | else: |
| 343 | with fitz.Document(self.path) as pdf: | 346 | with fitz.Document(self.path) as pdf: |
| 347 | self.metadata = pdf.metadata | ||
| 348 | self.page_count = pdf.pageCount | ||
| 344 | if isinstance(max_img_count, int) and pdf.pageCount >= max_img_count: | 349 | if isinstance(max_img_count, int) and pdf.pageCount >= max_img_count: |
| 345 | self.img_count = pdf.pageCount | 350 | self.img_count = pdf.pageCount |
| 346 | return | 351 | return | ... | ... |
-
Please register or sign in to post a comment