Merge branch 'feature/e-contract' into feature/1119
Showing
4 changed files
with
9 additions
and
4 deletions
| ... | @@ -227,6 +227,10 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -227,6 +227,10 @@ class Command(BaseCommand, LoggerMixin): |
| 227 | return | 227 | return |
| 228 | res_list.append((pno, ino, part_idx, consts.RES_SUCCESS)) | 228 | res_list.append((pno, ino, part_idx, consts.RES_SUCCESS)) |
| 229 | page_num = contract_dict.get('page_num') | 229 | page_num = contract_dict.get('page_num') |
| 230 | if page_num.startswith('page_'): | ||
| 231 | page_num_only = page_num.split('_')[-1] | ||
| 232 | else: | ||
| 233 | page_num_only = page_num | ||
| 230 | rebuild_page_info = [] | 234 | rebuild_page_info = [] |
| 231 | text_key = 'words' | 235 | text_key = 'words' |
| 232 | for key, value in contract_dict.get('page_info', {}).items(): | 236 | for key, value in contract_dict.get('page_info', {}).items(): |
| ... | @@ -256,8 +260,8 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -256,8 +260,8 @@ class Command(BaseCommand, LoggerMixin): |
| 256 | for row_list in sub_value[text_key]: | 260 | for row_list in sub_value[text_key]: |
| 257 | rebuild_page_info.append(row_list) | 261 | rebuild_page_info.append(row_list) |
| 258 | 262 | ||
| 259 | # contract_result.setdefault(page_num, []).append(rebuild_page_info) | 263 | # contract_result.setdefault(page_num_only, []).append(rebuild_page_info) |
| 260 | contract_result.setdefault(classify, dict()).setdefault(page_num, []).append(rebuild_page_info) | 264 | contract_result.setdefault(classify, dict()).setdefault(page_num_only, []).append(rebuild_page_info) |
| 261 | 265 | ||
| 262 | # def rebuild_result(self, ocr_data, classify, img_path): | 266 | # def rebuild_result(self, ocr_data, classify, img_path): |
| 263 | # license_data = ocr_data.get('data') | 267 | # license_data = ocr_data.get('data') | ... | ... |
| ... | @@ -710,7 +710,7 @@ class BSWorkbook(Workbook): | ... | @@ -710,7 +710,7 @@ class BSWorkbook(Workbook): |
| 710 | for i in range(30): | 710 | for i in range(30): |
| 711 | if str(i) in contract_result: | 711 | if str(i) in contract_result: |
| 712 | page_num = str(i) | 712 | page_num = str(i) |
| 713 | info_list = contract_result.get(page_num) | 713 | info_list = contract_result.get(page_num, []) |
| 714 | # for page_num, info_list in contract_result.items(): | 714 | # for page_num, info_list in contract_result.items(): |
| 715 | ws.append(('page {0}'.format(page_num), )) | 715 | ws.append(('page {0}'.format(page_num), )) |
| 716 | for info in info_list: | 716 | for info in info_list: | ... | ... |
| ... | @@ -550,7 +550,7 @@ class UploadDocView(GenericView, DocHandler): | ... | @@ -550,7 +550,7 @@ class UploadDocView(GenericView, DocHandler): |
| 550 | 550 | ||
| 551 | classify_1 = classify_2 = 0 | 551 | classify_1 = classify_2 = 0 |
| 552 | if data_source == consts.DATA_SOURCE_LIST[-1] and document_scheme == consts.DOC_SCHEME_LIST[1]: | 552 | if data_source == consts.DATA_SOURCE_LIST[-1] and document_scheme == consts.DOC_SCHEME_LIST[1]: |
| 553 | for keyword, classify_1_tmp, classify_2_tmp in consts.FILE_NAME_PREFIX_MAP.get(prefix): | 553 | for keyword, classify_1_tmp, classify_2_tmp in consts.ECONTRACT_KEYWORDS_MAP.get(prefix): |
| 554 | if keyword in document_name: | 554 | if keyword in document_name: |
| 555 | classify_1 = classify_1_tmp | 555 | classify_1 = classify_1_tmp |
| 556 | classify_2 = classify_2_tmp | 556 | classify_2 = classify_2_tmp | ... | ... |
| ... | @@ -302,6 +302,7 @@ class PDFHandler: | ... | @@ -302,6 +302,7 @@ class PDFHandler: |
| 302 | # self.page_text_list = page_text_list | 302 | # self.page_text_list = page_text_list |
| 303 | 303 | ||
| 304 | def e_contract_process(self): | 304 | def e_contract_process(self): |
| 305 | os.makedirs(self.img_dir_path, exist_ok=True) | ||
| 305 | with fitz.Document(self.path) as pdf: | 306 | with fitz.Document(self.path) as pdf: |
| 306 | for pno in range(pdf.pageCount): | 307 | for pno in range(pdf.pageCount): |
| 307 | page = pdf.loadPage(pno) | 308 | page = pdf.loadPage(pno) | ... | ... |
-
Please register or sign in to post a comment