Merge branch 'feature/e-contract' into feature/1119
Showing
4 changed files
with
9 additions
and
4 deletions
... | @@ -227,6 +227,10 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -227,6 +227,10 @@ class Command(BaseCommand, LoggerMixin): |
227 | return | 227 | return |
228 | res_list.append((pno, ino, part_idx, consts.RES_SUCCESS)) | 228 | res_list.append((pno, ino, part_idx, consts.RES_SUCCESS)) |
229 | page_num = contract_dict.get('page_num') | 229 | page_num = contract_dict.get('page_num') |
230 | if page_num.startswith('page_'): | ||
231 | page_num_only = page_num.split('_')[-1] | ||
232 | else: | ||
233 | page_num_only = page_num | ||
230 | rebuild_page_info = [] | 234 | rebuild_page_info = [] |
231 | text_key = 'words' | 235 | text_key = 'words' |
232 | for key, value in contract_dict.get('page_info', {}).items(): | 236 | for key, value in contract_dict.get('page_info', {}).items(): |
... | @@ -256,8 +260,8 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -256,8 +260,8 @@ class Command(BaseCommand, LoggerMixin): |
256 | for row_list in sub_value[text_key]: | 260 | for row_list in sub_value[text_key]: |
257 | rebuild_page_info.append(row_list) | 261 | rebuild_page_info.append(row_list) |
258 | 262 | ||
259 | # contract_result.setdefault(page_num, []).append(rebuild_page_info) | 263 | # contract_result.setdefault(page_num_only, []).append(rebuild_page_info) |
260 | contract_result.setdefault(classify, dict()).setdefault(page_num, []).append(rebuild_page_info) | 264 | contract_result.setdefault(classify, dict()).setdefault(page_num_only, []).append(rebuild_page_info) |
261 | 265 | ||
262 | # def rebuild_result(self, ocr_data, classify, img_path): | 266 | # def rebuild_result(self, ocr_data, classify, img_path): |
263 | # license_data = ocr_data.get('data') | 267 | # license_data = ocr_data.get('data') | ... | ... |
... | @@ -710,7 +710,7 @@ class BSWorkbook(Workbook): | ... | @@ -710,7 +710,7 @@ class BSWorkbook(Workbook): |
710 | for i in range(30): | 710 | for i in range(30): |
711 | if str(i) in contract_result: | 711 | if str(i) in contract_result: |
712 | page_num = str(i) | 712 | page_num = str(i) |
713 | info_list = contract_result.get(page_num) | 713 | info_list = contract_result.get(page_num, []) |
714 | # for page_num, info_list in contract_result.items(): | 714 | # for page_num, info_list in contract_result.items(): |
715 | ws.append(('page {0}'.format(page_num), )) | 715 | ws.append(('page {0}'.format(page_num), )) |
716 | for info in info_list: | 716 | for info in info_list: | ... | ... |
... | @@ -550,7 +550,7 @@ class UploadDocView(GenericView, DocHandler): | ... | @@ -550,7 +550,7 @@ class UploadDocView(GenericView, DocHandler): |
550 | 550 | ||
551 | classify_1 = classify_2 = 0 | 551 | classify_1 = classify_2 = 0 |
552 | if data_source == consts.DATA_SOURCE_LIST[-1] and document_scheme == consts.DOC_SCHEME_LIST[1]: | 552 | if data_source == consts.DATA_SOURCE_LIST[-1] and document_scheme == consts.DOC_SCHEME_LIST[1]: |
553 | for keyword, classify_1_tmp, classify_2_tmp in consts.FILE_NAME_PREFIX_MAP.get(prefix): | 553 | for keyword, classify_1_tmp, classify_2_tmp in consts.ECONTRACT_KEYWORDS_MAP.get(prefix): |
554 | if keyword in document_name: | 554 | if keyword in document_name: |
555 | classify_1 = classify_1_tmp | 555 | classify_1 = classify_1_tmp |
556 | classify_2 = classify_2_tmp | 556 | classify_2 = classify_2_tmp | ... | ... |
... | @@ -302,6 +302,7 @@ class PDFHandler: | ... | @@ -302,6 +302,7 @@ class PDFHandler: |
302 | # self.page_text_list = page_text_list | 302 | # self.page_text_list = page_text_list |
303 | 303 | ||
304 | def e_contract_process(self): | 304 | def e_contract_process(self): |
305 | os.makedirs(self.img_dir_path, exist_ok=True) | ||
305 | with fitz.Document(self.path) as pdf: | 306 | with fitz.Document(self.path) as pdf: |
306 | for pno in range(pdf.pageCount): | 307 | for pno in range(pdf.pageCount): |
307 | page = pdf.loadPage(pno) | 308 | page = pdf.loadPage(pno) | ... | ... |
-
Please register or sign in to post a comment