fix e-contract
Showing
4 changed files
with
9 additions
and
4 deletions
| ... | @@ -227,6 +227,10 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -227,6 +227,10 @@ class Command(BaseCommand, LoggerMixin): | 
| 227 | return | 227 | return | 
| 228 | res_list.append((pno, ino, part_idx, consts.RES_SUCCESS)) | 228 | res_list.append((pno, ino, part_idx, consts.RES_SUCCESS)) | 
| 229 | page_num = contract_dict.get('page_num') | 229 | page_num = contract_dict.get('page_num') | 
| 230 | if page_num.startswith('page_'): | ||
| 231 | page_num_only = page_num.split('_')[-1] | ||
| 232 | else: | ||
| 233 | page_num_only = page_num | ||
| 230 | rebuild_page_info = [] | 234 | rebuild_page_info = [] | 
| 231 | text_key = 'words' | 235 | text_key = 'words' | 
| 232 | for key, value in contract_dict.get('page_info', {}).items(): | 236 | for key, value in contract_dict.get('page_info', {}).items(): | 
| ... | @@ -256,8 +260,8 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -256,8 +260,8 @@ class Command(BaseCommand, LoggerMixin): | 
| 256 | for row_list in sub_value[text_key]: | 260 | for row_list in sub_value[text_key]: | 
| 257 | rebuild_page_info.append(row_list) | 261 | rebuild_page_info.append(row_list) | 
| 258 | 262 | ||
| 259 | # contract_result.setdefault(page_num, []).append(rebuild_page_info) | 263 | # contract_result.setdefault(page_num_only, []).append(rebuild_page_info) | 
| 260 | contract_result.setdefault(classify, dict()).setdefault(page_num, []).append(rebuild_page_info) | 264 | contract_result.setdefault(classify, dict()).setdefault(page_num_only, []).append(rebuild_page_info) | 
| 261 | 265 | ||
| 262 | # def rebuild_result(self, ocr_data, classify, img_path): | 266 | # def rebuild_result(self, ocr_data, classify, img_path): | 
| 263 | # license_data = ocr_data.get('data') | 267 | # license_data = ocr_data.get('data') | ... | ... | 
| ... | @@ -710,7 +710,7 @@ class BSWorkbook(Workbook): | ... | @@ -710,7 +710,7 @@ class BSWorkbook(Workbook): | 
| 710 | for i in range(30): | 710 | for i in range(30): | 
| 711 | if str(i) in contract_result: | 711 | if str(i) in contract_result: | 
| 712 | page_num = str(i) | 712 | page_num = str(i) | 
| 713 | info_list = contract_result.get(page_num) | 713 | info_list = contract_result.get(page_num, []) | 
| 714 | # for page_num, info_list in contract_result.items(): | 714 | # for page_num, info_list in contract_result.items(): | 
| 715 | ws.append(('page {0}'.format(page_num), )) | 715 | ws.append(('page {0}'.format(page_num), )) | 
| 716 | for info in info_list: | 716 | for info in info_list: | ... | ... | 
| ... | @@ -550,7 +550,7 @@ class UploadDocView(GenericView, DocHandler): | ... | @@ -550,7 +550,7 @@ class UploadDocView(GenericView, DocHandler): | 
| 550 | 550 | ||
| 551 | classify_1 = classify_2 = 0 | 551 | classify_1 = classify_2 = 0 | 
| 552 | if data_source == consts.DATA_SOURCE_LIST[-1] and document_scheme == consts.DOC_SCHEME_LIST[1]: | 552 | if data_source == consts.DATA_SOURCE_LIST[-1] and document_scheme == consts.DOC_SCHEME_LIST[1]: | 
| 553 | for keyword, classify_1_tmp, classify_2_tmp in consts.FILE_NAME_PREFIX_MAP.get(prefix): | 553 | for keyword, classify_1_tmp, classify_2_tmp in consts.ECONTRACT_KEYWORDS_MAP.get(prefix): | 
| 554 | if keyword in document_name: | 554 | if keyword in document_name: | 
| 555 | classify_1 = classify_1_tmp | 555 | classify_1 = classify_1_tmp | 
| 556 | classify_2 = classify_2_tmp | 556 | classify_2 = classify_2_tmp | ... | ... | 
| ... | @@ -300,6 +300,7 @@ class PDFHandler: | ... | @@ -300,6 +300,7 @@ class PDFHandler: | 
| 300 | self.page_text_list = page_text_list | 300 | self.page_text_list = page_text_list | 
| 301 | 301 | ||
| 302 | def e_contract_process(self): | 302 | def e_contract_process(self): | 
| 303 | os.makedirs(self.img_dir_path, exist_ok=True) | ||
| 303 | with fitz.Document(self.path) as pdf: | 304 | with fitz.Document(self.path) as pdf: | 
| 304 | for pno in range(pdf.pageCount): | 305 | for pno in range(pdf.pageCount): | 
| 305 | page = pdf.loadPage(pno) | 306 | page = pdf.loadPage(pno) | ... | ... | 
- 
Please register or sign in to post a comment