244bf189 by 周伟奇

Merge branch 'feature/e-contract' into feature/1119

2 parents 05f22d9d 83de3e22
......@@ -227,6 +227,10 @@ class Command(BaseCommand, LoggerMixin):
return
res_list.append((pno, ino, part_idx, consts.RES_SUCCESS))
page_num = contract_dict.get('page_num')
if page_num.startswith('page_'):
page_num_only = page_num.split('_')[-1]
else:
page_num_only = page_num
rebuild_page_info = []
text_key = 'words'
for key, value in contract_dict.get('page_info', {}).items():
......@@ -256,8 +260,8 @@ class Command(BaseCommand, LoggerMixin):
for row_list in sub_value[text_key]:
rebuild_page_info.append(row_list)
# contract_result.setdefault(page_num, []).append(rebuild_page_info)
contract_result.setdefault(classify, dict()).setdefault(page_num, []).append(rebuild_page_info)
# contract_result.setdefault(page_num_only, []).append(rebuild_page_info)
contract_result.setdefault(classify, dict()).setdefault(page_num_only, []).append(rebuild_page_info)
# def rebuild_result(self, ocr_data, classify, img_path):
# license_data = ocr_data.get('data')
......
......@@ -710,7 +710,7 @@ class BSWorkbook(Workbook):
for i in range(30):
if str(i) in contract_result:
page_num = str(i)
info_list = contract_result.get(page_num)
info_list = contract_result.get(page_num, [])
# for page_num, info_list in contract_result.items():
ws.append(('page {0}'.format(page_num), ))
for info in info_list:
......
......@@ -550,7 +550,7 @@ class UploadDocView(GenericView, DocHandler):
classify_1 = classify_2 = 0
if data_source == consts.DATA_SOURCE_LIST[-1] and document_scheme == consts.DOC_SCHEME_LIST[1]:
for keyword, classify_1_tmp, classify_2_tmp in consts.FILE_NAME_PREFIX_MAP.get(prefix):
for keyword, classify_1_tmp, classify_2_tmp in consts.ECONTRACT_KEYWORDS_MAP.get(prefix):
if keyword in document_name:
classify_1 = classify_1_tmp
classify_2 = classify_2_tmp
......
......@@ -302,6 +302,7 @@ class PDFHandler:
# self.page_text_list = page_text_list
def e_contract_process(self):
os.makedirs(self.img_dir_path, exist_ok=True)
with fitz.Document(self.path) as pdf:
for pno in range(pdf.pageCount):
page = pdf.loadPage(pno)
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!