add 0318
Showing
7 changed files
with
107 additions
and
17 deletions
... | @@ -1096,11 +1096,23 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1096,11 +1096,23 @@ class Command(BaseCommand, LoggerMixin): |
1096 | 1096 | ||
1097 | try: | 1097 | try: |
1098 | doc.status = DocStatus.PROCESS_FAILED.value | 1098 | doc.status = DocStatus.PROCESS_FAILED.value |
1099 | doc.page_count = pdf_handler.page_count | ||
1099 | doc.save() | 1100 | doc.save() |
1100 | except Exception as e: | 1101 | except Exception as e: |
1101 | self.online_log.error('{0} [process error (db save)] [error={1}]'.format( | 1102 | self.online_log.error('{0} [process error (db save)] [error={1}]'.format( |
1102 | self.log_base, traceback.format_exc())) | 1103 | self.log_base, traceback.format_exc())) |
1103 | else: | 1104 | else: |
1105 | |||
1106 | try: | ||
1107 | if pdf_handler.is_e_pdf: | ||
1108 | doc.metadata = pdf_handler.metadata if pdf_handler.metadata is None else \ | ||
1109 | json.dumps(pdf_handler.metadata) | ||
1110 | doc.page_count = pdf_handler.page_count | ||
1111 | doc.save() | ||
1112 | except Exception as e: | ||
1113 | self.online_log.error('{0} [process error (db save)] [error={1}]'.format( | ||
1114 | self.log_base, traceback.format_exc())) | ||
1115 | |||
1104 | with lock: | 1116 | with lock: |
1105 | todo_count_dict[task_str] = pdf_handler.img_count | 1117 | todo_count_dict[task_str] = pdf_handler.img_count |
1106 | for img_idx, img_path in enumerate(pdf_handler.img_path_list): | 1118 | for img_idx, img_path in enumerate(pdf_handler.img_path_list): |
... | @@ -1147,6 +1159,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1147,6 +1159,7 @@ class Command(BaseCommand, LoggerMixin): |
1147 | 1159 | ||
1148 | try: | 1160 | try: |
1149 | doc.status = DocStatus.PROCESS_FAILED.value | 1161 | doc.status = DocStatus.PROCESS_FAILED.value |
1162 | doc.page_count = pdf_handler.page_count | ||
1150 | doc.save() | 1163 | doc.save() |
1151 | self.online_log.warn('{0} [process failed (pdf_2_img_2_queue)] [task={1}] ' | 1164 | self.online_log.warn('{0} [process failed (pdf_2_img_2_queue)] [task={1}] ' |
1152 | '[error={2}]'.format(self.log_base, task_str, traceback.format_exc())) | 1165 | '[error={2}]'.format(self.log_base, task_str, traceback.format_exc())) |
... | @@ -1178,6 +1191,13 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1178,6 +1191,13 @@ class Command(BaseCommand, LoggerMixin): |
1178 | else: | 1191 | else: |
1179 | raise Exception('download or pdf to img failed') | 1192 | raise Exception('download or pdf to img failed') |
1180 | 1193 | ||
1194 | try: | ||
1195 | doc.page_count = pdf_handler.page_count | ||
1196 | doc.save() | ||
1197 | except Exception as e: | ||
1198 | self.online_log.error('{0} [process error (db save)] [error={1}]'.format( | ||
1199 | self.log_base, traceback.format_exc())) | ||
1200 | |||
1181 | if classify_1_str == str(consts.CONTRACT_CLASSIFY): | 1201 | if classify_1_str == str(consts.CONTRACT_CLASSIFY): |
1182 | ocr_result = afc_predict(pdf_handler.pdf_info) | 1202 | ocr_result = afc_predict(pdf_handler.pdf_info) |
1183 | page_res = {} | 1203 | page_res = {} |
... | @@ -1234,6 +1254,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1234,6 +1254,7 @@ class Command(BaseCommand, LoggerMixin): |
1234 | except Exception as e: | 1254 | except Exception as e: |
1235 | try: | 1255 | try: |
1236 | doc.status = DocStatus.PROCESS_FAILED.value | 1256 | doc.status = DocStatus.PROCESS_FAILED.value |
1257 | doc.page_count = pdf_handler.page_count | ||
1237 | doc.save() | 1258 | doc.save() |
1238 | self.online_log.warn('{0} [process failed (e-contract)] [task={1}] ' | 1259 | self.online_log.warn('{0} [process failed (e-contract)] [task={1}] ' |
1239 | '[error={2}]'.format(self.e_log_base, task_str, traceback.format_exc())) | 1260 | '[error={2}]'.format(self.e_log_base, task_str, traceback.format_exc())) |
... | @@ -1560,7 +1581,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1560,7 +1581,7 @@ class Command(BaseCommand, LoggerMixin): |
1560 | # 重构Excel文件 | 1581 | # 重构Excel文件 |
1561 | # src_excel_path = os.path.join(doc_data_path, 'src.xlsx') | 1582 | # src_excel_path = os.path.join(doc_data_path, 'src.xlsx') |
1562 | # wb.save(src_excel_path) | 1583 | # wb.save(src_excel_path) |
1563 | count_list = wb.rebuild(merged_bs_summary, license_summary, res_list, doc.document_scheme, contract_result) | 1584 | count_list = wb.rebuild(merged_bs_summary, license_summary, res_list, doc.document_scheme, contract_result, doc.metadata) |
1564 | wb.save(excel_path) | 1585 | wb.save(excel_path) |
1565 | 1586 | ||
1566 | except Exception as e: | 1587 | except Exception as e: | ... | ... |
... | @@ -61,6 +61,9 @@ class HILDoc(models.Model): | ... | @@ -61,6 +61,9 @@ class HILDoc(models.Model): |
61 | mvc_count = models.IntegerField(default=0, verbose_name='机动车登记证书处理数目') | 61 | mvc_count = models.IntegerField(default=0, verbose_name='机动车登记证书处理数目') |
62 | vat_count = models.IntegerField(default=0, verbose_name='增值税发票处理数目') | 62 | vat_count = models.IntegerField(default=0, verbose_name='增值税发票处理数目') |
63 | 63 | ||
64 | page_count = models.IntegerField(null=True, verbose_name='文件page数目') | ||
65 | metadata = models.TextField(null=True, verbose_name="电子PDF专属,PDF信息") | ||
66 | |||
64 | class Meta: | 67 | class Meta: |
65 | managed = False | 68 | managed = False |
66 | db_table = 'hil_doc' | 69 | db_table = 'hil_doc' |
... | @@ -100,6 +103,9 @@ class AFCDoc(models.Model): | ... | @@ -100,6 +103,9 @@ class AFCDoc(models.Model): |
100 | mvc_count = models.IntegerField(default=0, verbose_name='机动车登记证书处理数目') | 103 | mvc_count = models.IntegerField(default=0, verbose_name='机动车登记证书处理数目') |
101 | vat_count = models.IntegerField(default=0, verbose_name='增值税发票处理数目') | 104 | vat_count = models.IntegerField(default=0, verbose_name='增值税发票处理数目') |
102 | 105 | ||
106 | page_count = models.IntegerField(null=True, verbose_name='文件page数目') | ||
107 | metadata = models.TextField(null=True, verbose_name="电子PDF专属,PDF信息") | ||
108 | |||
103 | class Meta: | 109 | class Meta: |
104 | managed = False | 110 | managed = False |
105 | situ_db_label = 'afc' | 111 | situ_db_label = 'afc' | ... | ... |
1 | import re | 1 | import re |
2 | import json | ||
2 | import random | 3 | import random |
3 | import locale | 4 | import locale |
4 | import numpy as np | 5 | import numpy as np |
... | @@ -311,7 +312,8 @@ class BSWorkbook(Workbook): | ... | @@ -311,7 +312,8 @@ class BSWorkbook(Workbook): |
311 | month_mapping.setdefault(item, []).append( | 312 | month_mapping.setdefault(item, []).append( |
312 | (ws.title, idx_list[i] + min_row, idx_list[i + 1] + min_row - 1, day_mean)) | 313 | (ws.title, idx_list[i] + min_row, idx_list[i + 1] + min_row - 1, day_mean)) |
313 | 314 | ||
314 | def build_metadata_rows(self, confidence, code, verify_list, print_time, start_date, end_date, res_count_tuple, is_verify_classify): | 315 | def build_metadata_rows(self, confidence, code, verify_list, print_time, start_date, end_date, |
316 | res_count_tuple, is_verify_classify, metadata): | ||
315 | metadata_rows = [('流水识别置信度', confidence)] | 317 | metadata_rows = [('流水识别置信度', confidence)] |
316 | if is_verify_classify: | 318 | if is_verify_classify: |
317 | verify_res = '疑似伪造' if len(verify_list) > 0 else '正常' | 319 | verify_res = '疑似伪造' if len(verify_list) > 0 else '正常' |
... | @@ -322,11 +324,26 @@ class BSWorkbook(Workbook): | ... | @@ -322,11 +324,26 @@ class BSWorkbook(Workbook): |
322 | metadata_rows.append(('识别成功', res_count_tuple[1])) | 324 | metadata_rows.append(('识别成功', res_count_tuple[1])) |
323 | metadata_rows.append(self.blank_row) | 325 | metadata_rows.append(self.blank_row) |
324 | 326 | ||
327 | # PDF info | ||
328 | metadata_highlight_row = [] | ||
329 | if isinstance(metadata, str): | ||
330 | metadata_dict = json.loads(metadata) | ||
331 | author = metadata_dict.pop('author', '') | ||
332 | producer = metadata_dict.pop('producer', '') | ||
333 | metadata_rows.append(('Author', author)) | ||
334 | metadata_rows.append(('Producer', producer)) | ||
335 | if len(author) > 0: | ||
336 | metadata_highlight_row.append(6) | ||
337 | if 'iText' not in producer and 'Qt' not in producer and 'Haru Free' not in producer: | ||
338 | metadata_highlight_row.append(7) | ||
339 | metadata_rows.append(self.blank_row) | ||
340 | |||
325 | verify_highlight_row = [] | 341 | verify_highlight_row = [] |
326 | if is_verify_classify and len(verify_list) > 0: | 342 | if is_verify_classify and len(verify_list) > 0: |
327 | metadata_rows.append(self.verify_header) | 343 | metadata_rows.append(self.verify_header) |
344 | verify_start = len(metadata_rows) | ||
328 | metadata_rows.extend(verify_list) | 345 | metadata_rows.extend(verify_list) |
329 | for r in range(6, len(metadata_rows)+1): | 346 | for r in range(verify_start, len(metadata_rows)+1): |
330 | verify_highlight_row.append(r) | 347 | verify_highlight_row.append(r) |
331 | 348 | ||
332 | metadata_rows.append(self.blank_row) | 349 | metadata_rows.append(self.blank_row) |
... | @@ -344,18 +361,23 @@ class BSWorkbook(Workbook): | ... | @@ -344,18 +361,23 @@ class BSWorkbook(Workbook): |
344 | self.blank_row, | 361 | self.blank_row, |
345 | self.interest_keyword_header] | 362 | self.interest_keyword_header] |
346 | ) | 363 | ) |
347 | return metadata_rows, verify_highlight_row, timedelta | 364 | return metadata_rows, verify_highlight_row, timedelta, metadata_highlight_row |
348 | 365 | ||
349 | def build_meta_sheet(self, role_name, card, confidence, code, verify_list, print_time, start_date, end_date, | 366 | def build_meta_sheet(self, role_name, card, confidence, code, verify_list, print_time, start_date, end_date, |
350 | res_count_tuple, is_verify_classify): | 367 | res_count_tuple, is_verify_classify, metadata): |
351 | metadata_rows, verify_highlight_row, timedelta = self.build_metadata_rows( | 368 | metadata_rows, verify_highlight_row, timedelta, metadata_highlight_row = \ |
352 | confidence, code, verify_list, print_time, start_date, end_date, res_count_tuple, is_verify_classify) | 369 | self.build_metadata_rows(confidence, code, verify_list, print_time, start_date, end_date, res_count_tuple, |
370 | is_verify_classify, metadata) | ||
353 | if not isinstance(role_name, str): | 371 | if not isinstance(role_name, str): |
354 | role_name = consts.UNKNOWN_ROLE | 372 | role_name = consts.UNKNOWN_ROLE |
355 | ms = self.create_sheet('{0}{1}({2})'.format(self.meta_sheet_title, role_name, card)) | 373 | ms = self.create_sheet('{0}{1}({2})'.format(self.meta_sheet_title, role_name, card)) |
356 | for row in metadata_rows: | 374 | for row in metadata_rows: |
357 | ms.append(row) | 375 | ms.append(row) |
358 | 376 | ||
377 | for row in metadata_highlight_row: | ||
378 | for cell in ms[row]: | ||
379 | cell.fill = self.amount_fill | ||
380 | |||
359 | if len(verify_highlight_row) > 0: | 381 | if len(verify_highlight_row) > 0: |
360 | for cell in ms[2]: | 382 | for cell in ms[2]: |
361 | cell.fill = self.amount_fill | 383 | cell.fill = self.amount_fill |
... | @@ -625,7 +647,7 @@ class BSWorkbook(Workbook): | ... | @@ -625,7 +647,7 @@ class BSWorkbook(Workbook): |
625 | ms.append(row) | 647 | ms.append(row) |
626 | self.remove(tmp2_ws) | 648 | self.remove(tmp2_ws) |
627 | 649 | ||
628 | def bs_rebuild(self, bs_summary, res_count_tuple): | 650 | def bs_rebuild(self, bs_summary, res_count_tuple, metadata=None): |
629 | # bs_summary = { | 651 | # bs_summary = { |
630 | # '卡号': { | 652 | # '卡号': { |
631 | # 'classify': 0, | 653 | # 'classify': 0, |
... | @@ -691,7 +713,8 @@ class BSWorkbook(Workbook): | ... | @@ -691,7 +713,8 @@ class BSWorkbook(Workbook): |
691 | start_date, | 713 | start_date, |
692 | end_date, | 714 | end_date, |
693 | res_count_tuple, | 715 | res_count_tuple, |
694 | is_verify_classify) | 716 | is_verify_classify, |
717 | metadata) | ||
695 | 718 | ||
696 | summary['timedelta'] = timedelta | 719 | summary['timedelta'] = timedelta |
697 | 720 | ||
... | @@ -846,16 +869,16 @@ class BSWorkbook(Workbook): | ... | @@ -846,16 +869,16 @@ class BSWorkbook(Workbook): |
846 | if len(self.sheetnames) > 1: | 869 | if len(self.sheetnames) > 1: |
847 | self.remove(self.get_sheet_by_name('Sheet')) | 870 | self.remove(self.get_sheet_by_name('Sheet')) |
848 | 871 | ||
849 | def rebuild(self, bs_summary, license_summary, res_list, document_scheme, contract_result): | 872 | def rebuild(self, bs_summary, license_summary, res_list, document_scheme, contract_result, metadata): |
850 | res_count_tuple = self.res_sheet(res_list) | 873 | res_count_tuple = self.res_sheet(res_list) |
851 | 874 | ||
852 | count_list = [(consts.MODEL_FIELD_BS, len(bs_summary))] | 875 | count_list = [(consts.MODEL_FIELD_BS, len(bs_summary))] |
853 | if document_scheme == consts.DOC_SCHEME_LIST[1]: | 876 | if document_scheme == consts.DOC_SCHEME_LIST[1]: |
854 | self.license_rebuild(license_summary, document_scheme, count_list) | 877 | self.license_rebuild(license_summary, document_scheme, count_list) |
855 | self.contract_rebuild(contract_result) | 878 | self.contract_rebuild(contract_result) |
856 | self.bs_rebuild(bs_summary, res_count_tuple) | 879 | self.bs_rebuild(bs_summary, res_count_tuple, metadata) |
857 | else: | 880 | else: |
858 | self.bs_rebuild(bs_summary, res_count_tuple) | 881 | self.bs_rebuild(bs_summary, res_count_tuple, metadata) |
859 | self.license_rebuild(license_summary, document_scheme, count_list) | 882 | self.license_rebuild(license_summary, document_scheme, count_list) |
860 | self.move_res_sheet() | 883 | self.move_res_sheet() |
861 | self.remove_base_sheet() | 884 | self.remove_base_sheet() | ... | ... |
... | @@ -889,7 +889,7 @@ class DocView(GenericView, DocHandler): | ... | @@ -889,7 +889,7 @@ class DocView(GenericView, DocHandler): |
889 | create_time__lt=create_time_end + datetime.timedelta(days=1))\ | 889 | create_time__lt=create_time_end + datetime.timedelta(days=1))\ |
890 | if create_time_start is not None and create_time_end is not None else Q() | 890 | if create_time_start is not None and create_time_end is not None else Q() |
891 | query = application_id_query & status_query & data_source_query & upload_finish_time_query & create_time_query | 891 | query = application_id_query & status_query & data_source_query & upload_finish_time_query & create_time_query |
892 | val_tuple = ('id', 'application_id', 'upload_finish_time', 'create_time', 'data_source', 'status') | 892 | val_tuple = ('id', 'application_id', 'upload_finish_time', 'create_time', 'document_scheme', 'data_source', 'status', 'page_count') |
893 | doc_class, prefix = self.get_doc_class(business_type) | 893 | doc_class, prefix = self.get_doc_class(business_type) |
894 | total = doc_class.objects.filter(query).count() | 894 | total = doc_class.objects.filter(query).count() |
895 | start_index = page_size * (page - 1) | 895 | start_index = page_size * (page - 1) |
... | @@ -898,14 +898,22 @@ class DocView(GenericView, DocHandler): | ... | @@ -898,14 +898,22 @@ class DocView(GenericView, DocHandler): |
898 | raise self.invalid_params('页数不存在') | 898 | raise self.invalid_params('页数不存在') |
899 | 899 | ||
900 | doc_queryset = doc_class.objects.filter(query).values(*val_tuple).order_by('-create_time')[start_index: end_index] | 900 | doc_queryset = doc_class.objects.filter(query).values(*val_tuple).order_by('-create_time')[start_index: end_index] |
901 | doc_list = self.get_doc_list(doc_queryset, prefix) | 901 | # doc_list = self.get_doc_list(doc_queryset, prefix) |
902 | for doc_dict in doc_queryset: | ||
903 | tmp_scheme = consts.COMPARE_DOC_SCHEME_LIST[0] if doc_dict['document_scheme'] == consts.DOC_SCHEME_LIST[0]\ | ||
904 | else consts.COMPARE_DOC_SCHEME_LIST[1] | ||
905 | application_link = '{0}/showList/showList?entity={1}&scheme={2}&case_id={3}'.format( | ||
906 | conf.BASE_URL, prefix, tmp_scheme, doc_dict['application_id']) | ||
907 | doc_dict['target_url'] = application_link | ||
902 | 908 | ||
903 | # total = len(doc_list) | 909 | # total = len(doc_list) |
904 | pagination = {'current': page, 'total': total, 'page_size': page_size} | 910 | pagination = {'current': page, 'total': total, 'page_size': page_size} |
905 | res = { | 911 | res = { |
906 | 'pagination': pagination, | 912 | 'pagination': pagination, |
907 | 'doc_list': doc_list | 913 | 'doc_list': list(doc_queryset) |
908 | } | 914 | } |
915 | # 新增scheme、处理时长、文件页数,删除下载切图 | ||
916 | # 新增链接跳转比对结果 | ||
909 | self.running_log.info('[get doc list] [args={0}] [res={1}]'.format(args, res)) | 917 | self.running_log.info('[get doc list] [args={0}] [res={1}]'.format(args, res)) |
910 | return response.ok(data=res) | 918 | return response.ok(data=res) |
911 | 919 | ... | ... |
... | @@ -2082,7 +2082,7 @@ def se_bs_compare(license_en, ocr_res_dict, strip_list, is_auto): | ... | @@ -2082,7 +2082,7 @@ def se_bs_compare(license_en, ocr_res_dict, strip_list, is_auto): |
2082 | dbr1_tmp_res_part = {} | 2082 | dbr1_tmp_res_part = {} |
2083 | for idx, (name, value) in enumerate(dbr1_field_list): | 2083 | for idx, (name, value) in enumerate(dbr1_field_list): |
2084 | ocr_str_or_list = ocr_res.get(compare_logic[name][0]) | 2084 | ocr_str_or_list = ocr_res.get(compare_logic[name][0]) |
2085 | if isinstance(ocr_str_or_list, str) or isinstance(ocr_str_or_list, list): | 2085 | if isinstance(ocr_str_or_list, str) or isinstance(ocr_str_or_list, list) or isinstance(ocr_str_or_list, int): |
2086 | result = getattr(cp, compare_logic[name][1])(value, ocr_str_or_list, **compare_logic[name][2]) | 2086 | result = getattr(cp, compare_logic[name][1])(value, ocr_str_or_list, **compare_logic[name][2]) |
2087 | if isinstance(ocr_str_or_list, list): | 2087 | if isinstance(ocr_str_or_list, list): |
2088 | ocr_str = json.dumps(ocr_str_or_list, ensure_ascii=False) | 2088 | ocr_str = json.dumps(ocr_str_or_list, ensure_ascii=False) |
... | @@ -2114,7 +2114,7 @@ def se_bs_compare(license_en, ocr_res_dict, strip_list, is_auto): | ... | @@ -2114,7 +2114,7 @@ def se_bs_compare(license_en, ocr_res_dict, strip_list, is_auto): |
2114 | dbr2_tmp_res_part = {} | 2114 | dbr2_tmp_res_part = {} |
2115 | for idx, (name, value) in enumerate(dbr2_field_list): | 2115 | for idx, (name, value) in enumerate(dbr2_field_list): |
2116 | ocr_str_or_list = ocr_res.get(compare_logic[name][0]) | 2116 | ocr_str_or_list = ocr_res.get(compare_logic[name][0]) |
2117 | if isinstance(ocr_str_or_list, str) or isinstance(ocr_str_or_list, list): | 2117 | if isinstance(ocr_str_or_list, str) or isinstance(ocr_str_or_list, list) or isinstance(ocr_str_or_list, int): |
2118 | result = getattr(cp, compare_logic[name][1])(value, ocr_str_or_list, **compare_logic[name][2]) | 2118 | result = getattr(cp, compare_logic[name][1])(value, ocr_str_or_list, **compare_logic[name][2]) |
2119 | if isinstance(ocr_str_or_list, list): | 2119 | if isinstance(ocr_str_or_list, list): |
2120 | ocr_str = json.dumps(ocr_str_or_list, ensure_ascii=False) | 2120 | ocr_str = json.dumps(ocr_str_or_list, ensure_ascii=False) | ... | ... |
src/common/tools/mssql_script16.py
0 → 100644
1 | import pyodbc | ||
2 | |||
3 | hil_sql = """ | ||
4 | ALTER TABLE hil_doc ADD page_count smallint; | ||
5 | ALTER TABLE hil_doc ADD metadata nvarchar(max); | ||
6 | """ | ||
7 | |||
8 | afc_sql = """ | ||
9 | ALTER TABLE afc_doc ADD page_count smallint; | ||
10 | ALTER TABLE afc_doc ADD metadata nvarchar(max); | ||
11 | """ | ||
12 | |||
13 | hil_cnxn = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server};', autocommit=True) | ||
14 | |||
15 | hil_cursor = hil_cnxn.cursor() | ||
16 | hil_cursor.execute(hil_sql) | ||
17 | |||
18 | hil_cursor.close() | ||
19 | hil_cnxn.close() | ||
20 | |||
21 | afc_cnxn = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server};', autocommit=True) | ||
22 | |||
23 | afc_cursor = afc_cnxn.cursor() | ||
24 | afc_cursor.execute(afc_sql) | ||
25 | |||
26 | afc_cursor.close() | ||
27 | afc_cnxn.close() |
... | @@ -47,6 +47,8 @@ class PDFHandler: | ... | @@ -47,6 +47,8 @@ class PDFHandler: |
47 | '中国建设银行个人活期账户全部交易明细', | 47 | '中国建设银行个人活期账户全部交易明细', |
48 | '平安银行个人账户交易明细清单', | 48 | '平安银行个人账户交易明细清单', |
49 | ] | 49 | ] |
50 | self.page_count = None | ||
51 | self.metadata = None | ||
50 | 52 | ||
51 | def get_suffix(self, file_name): | 53 | def get_suffix(self, file_name): |
52 | if file_name is None: | 54 | if file_name is None: |
... | @@ -321,6 +323,7 @@ class PDFHandler: | ... | @@ -321,6 +323,7 @@ class PDFHandler: |
321 | def e_contract_process(self): | 323 | def e_contract_process(self): |
322 | os.makedirs(self.img_dir_path, exist_ok=True) | 324 | os.makedirs(self.img_dir_path, exist_ok=True) |
323 | with fitz.Document(self.path) as pdf: | 325 | with fitz.Document(self.path) as pdf: |
326 | self.page_count = pdf.pageCount | ||
324 | for pno in range(pdf.pageCount): | 327 | for pno in range(pdf.pageCount): |
325 | page = pdf.loadPage(pno) | 328 | page = pdf.loadPage(pno) |
326 | self.pdf_info[str(pno)] = json.loads(page.getText('json')) | 329 | self.pdf_info[str(pno)] = json.loads(page.getText('json')) |
... | @@ -341,6 +344,8 @@ class PDFHandler: | ... | @@ -341,6 +344,8 @@ class PDFHandler: |
341 | self.img_path_list.append(img_save_path) | 344 | self.img_path_list.append(img_save_path) |
342 | else: | 345 | else: |
343 | with fitz.Document(self.path) as pdf: | 346 | with fitz.Document(self.path) as pdf: |
347 | self.metadata = pdf.metadata | ||
348 | self.page_count = pdf.pageCount | ||
344 | if isinstance(max_img_count, int) and pdf.pageCount >= max_img_count: | 349 | if isinstance(max_img_count, int) and pdf.pageCount >= max_img_count: |
345 | self.img_count = pdf.pageCount | 350 | self.img_count = pdf.pageCount |
346 | return | 351 | return | ... | ... |
-
Please register or sign in to post a comment