ltgt part 2
Showing
2 changed files
with
178 additions
and
54 deletions
| ... | @@ -16,6 +16,7 @@ from openpyxl import load_workbook, Workbook | ... | @@ -16,6 +16,7 @@ from openpyxl import load_workbook, Workbook |
| 16 | from settings import conf | 16 | from settings import conf |
| 17 | from common.mixins import LoggerMixin | 17 | from common.mixins import LoggerMixin |
| 18 | from common.tools.pdf_to_img import PDFHandler | 18 | from common.tools.pdf_to_img import PDFHandler |
| 19 | from common.electronic_afc_contract.afc_contract_ocr import predict as afc_predict | ||
| 19 | from apps.doc import consts | 20 | from apps.doc import consts |
| 20 | from apps.doc.exceptions import OCR1Exception, OCR2Exception, LTGTException | 21 | from apps.doc.exceptions import OCR1Exception, OCR2Exception, LTGTException |
| 21 | from apps.doc.ocr.wb import BSWorkbook | 22 | from apps.doc.ocr.wb import BSWorkbook |
| ... | @@ -72,14 +73,16 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -72,14 +73,16 @@ class Command(BaseCommand, LoggerMixin): |
| 72 | # input folder | 73 | # input folder |
| 73 | self.input_dirs = conf.get_namespace('LTGT_DIR_') | 74 | self.input_dirs = conf.get_namespace('LTGT_DIR_') |
| 74 | # seperate folder name | 75 | # seperate folder name |
| 75 | self.seperate_map = { | 76 | self.combined_map = { |
| 76 | consts.IC_CLASSIFY: 'IDCard', | 77 | consts.IC_CLASSIFY: 'IDCard', |
| 77 | consts.MVC_CLASSIFY: 'GreenBook', | 78 | consts.MVC_CLASSIFY: 'GreenBook', |
| 78 | consts.CONTRACT_CLASSIFY: 'Contract', | 79 | consts.CONTRACT_CLASSIFY: 'Contract', |
| 79 | } | 80 | } |
| 80 | self.field_map = { | 81 | self.field_map = { |
| 82 | # sheet_name, key_field, side_field_order, src_field_order | ||
| 81 | consts.VAT_CLASSIFY: (consts.VAT_CN_NAME, None, None, consts.VATS_FIELD_ORDER), | 83 | consts.VAT_CLASSIFY: (consts.VAT_CN_NAME, None, None, consts.VATS_FIELD_ORDER), |
| 82 | consts.IC_CLASSIFY: (consts.IC_CN_NAME, '有效期限', consts.IC_FIELD_ORDER_3, consts.IC_FIELD_ORDER_2), | 84 | consts.IC_CLASSIFY: (consts.IC_CN_NAME, '有效期限', consts.IC_FIELD_ORDER_3, consts.IC_FIELD_ORDER_2), |
| 85 | consts.MVC_CLASSIFY: (consts.MVC_CN_NAME, '机动车登记证书编号', consts.MVC_SE_FIELD_ORDER_3_4, consts.MVC_SE_FIELD_ORDER_1_2), | ||
| 83 | } | 86 | } |
| 84 | # ocr相关 | 87 | # ocr相关 |
| 85 | self.ocr_url = conf.OCR_URL_FOLDER | 88 | self.ocr_url = conf.OCR_URL_FOLDER |
| ... | @@ -92,18 +95,96 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -92,18 +95,96 @@ class Command(BaseCommand, LoggerMixin): |
| 92 | def signal_handler(self, sig, frame): | 95 | def signal_handler(self, sig, frame): |
| 93 | self.switch = False # 停止处理文件 | 96 | self.switch = False # 停止处理文件 |
| 94 | 97 | ||
| 98 | def contract_process(self, ocr_data, contract_result, classify): | ||
| 99 | contract_dict = ocr_data.get('data') | ||
| 100 | if not contract_dict or contract_dict.get('page_num') is None or contract_dict.get('page_info') is None: | ||
| 101 | return | ||
| 102 | page_num = contract_dict.get('page_num') | ||
| 103 | if page_num.startswith('page_'): | ||
| 104 | page_num_only = page_num.split('_')[-1] | ||
| 105 | else: | ||
| 106 | page_num_only = page_num | ||
| 107 | rebuild_page_info = [] | ||
| 108 | text_key = 'words' | ||
| 109 | for key, value in contract_dict.get('page_info', {}).items(): | ||
| 110 | if value is None: | ||
| 111 | rebuild_page_info.append((key,)) | ||
| 112 | elif text_key in value: | ||
| 113 | if value[text_key] is None: | ||
| 114 | rebuild_page_info.append((key,)) | ||
| 115 | elif isinstance(value[text_key], str): | ||
| 116 | rebuild_page_info.append((key, value[text_key])) | ||
| 117 | elif isinstance(value[text_key], list): | ||
| 118 | rebuild_page_info.append((key,)) | ||
| 119 | for row_list in value[text_key]: | ||
| 120 | rebuild_page_info.append(row_list) | ||
| 121 | else: | ||
| 122 | rebuild_page_info.append((key,)) | ||
| 123 | for sub_key, sub_value in value.items(): | ||
| 124 | if sub_value is None: | ||
| 125 | rebuild_page_info.append((sub_key,)) | ||
| 126 | elif text_key in sub_value: | ||
| 127 | if sub_value[text_key] is None: | ||
| 128 | rebuild_page_info.append((sub_key,)) | ||
| 129 | elif isinstance(sub_value[text_key], str): | ||
| 130 | rebuild_page_info.append((sub_key, sub_value[text_key])) | ||
| 131 | elif isinstance(sub_value[text_key], list): | ||
| 132 | rebuild_page_info.append((sub_key,)) | ||
| 133 | for row_list in sub_value[text_key]: | ||
| 134 | rebuild_page_info.append(row_list) | ||
| 135 | |||
| 136 | contract_result.setdefault(classify, dict()).setdefault(page_num_only, []).append(rebuild_page_info) | ||
| 137 | |||
| 95 | def license1_process(self, ocr_data, all_res, classify): | 138 | def license1_process(self, ocr_data, all_res, classify): |
| 96 | # 类别:'0'身份证, '1'居住证 | 139 | # 类别:'0'身份证, '1'居住证 |
| 97 | license_data = ocr_data.get('data', []) | 140 | license_data = ocr_data.get('data') |
| 98 | if not license_data: | 141 | if not license_data: |
| 99 | return | 142 | return |
| 143 | if isinstance(license_data, dict): | ||
| 144 | license_data.pop('base64_img', '') | ||
| 100 | if classify == consts.IC_CLASSIFY: | 145 | if classify == consts.IC_CLASSIFY: |
| 101 | for id_card_dict in license_data: | 146 | id_card_dict = {} |
| 102 | try: | 147 | card_type = license_data.get('type', '') |
| 103 | id_card_dict.pop('base64_img') | 148 | is_ic = card_type.startswith('身份证') |
| 104 | except Exception as e: | 149 | is_info_side = card_type.endswith('信息面') |
| 105 | continue | 150 | id_card_dict['类别'] = '0' if is_ic else '1' |
| 106 | all_res.extend(license_data) | 151 | if is_ic: |
| 152 | field_map = consts.IC_MAP_0 if is_info_side else consts.IC_MAP_1 | ||
| 153 | else: | ||
| 154 | field_map = consts.RP_MAP_0 if is_info_side else consts.RP_MAP_1 | ||
| 155 | for write_field, search_field in field_map: | ||
| 156 | id_card_dict[write_field] = license_data.get('words_result', {}).get(search_field, {}).get('words', '') | ||
| 157 | if not is_info_side: | ||
| 158 | start_time = license_data.get('words_result', {}).get('签发日期', {}).get('words', '') | ||
| 159 | end_time = license_data.get('words_result', {}).get('失效日期', {}).get('words', '') | ||
| 160 | id_card_dict['有效期限'] = '{0}-{1}'.format(start_time, end_time) | ||
| 161 | # for id_card_dict in license_data: | ||
| 162 | # try: | ||
| 163 | # id_card_dict.pop('base64_img') | ||
| 164 | # except Exception as e: | ||
| 165 | # continue | ||
| 166 | all_res.setdefault(classify, []).append(id_card_dict) | ||
| 167 | elif classify == consts.MVC_CLASSIFY: | ||
| 168 | rebuild_data_dict = {} | ||
| 169 | mvc_page = license_data.pop('page', 'VehicleRCI') | ||
| 170 | mvc_res = license_data.pop('results', {}) | ||
| 171 | if mvc_page == 'VehicleRegArea': | ||
| 172 | rebuild_data_dict['机动车登记证书编号'] = mvc_res.get('机动车登记证书编号', {}).get('words', '') | ||
| 173 | for register_info in mvc_res.get('登记信息', []): | ||
| 174 | register_info.pop('register_type', None) | ||
| 175 | register_info.pop('register_type_name', None) | ||
| 176 | for cn_key, detail_dict in register_info.items(): | ||
| 177 | rebuild_data_dict.setdefault(cn_key, []).append( | ||
| 178 | detail_dict.get('words', '')) | ||
| 179 | else: | ||
| 180 | for cn_key, detail_dict in mvc_res.items(): | ||
| 181 | rebuild_data_dict[cn_key] = detail_dict.get('words', '') | ||
| 182 | all_res.setdefault(classify, []).append(rebuild_data_dict) | ||
| 183 | elif classify == consts.CONTRACT_CLASSIFY: | ||
| 184 | pass | ||
| 185 | else: | ||
| 186 | # all_res.extend(license_data) | ||
| 187 | all_res.setdefault(classify, []).extend(license_data) | ||
| 107 | 188 | ||
| 108 | def license2_process(self, ocr_data, all_res, classify, img_path): | 189 | def license2_process(self, ocr_data, all_res, classify, img_path): |
| 109 | pid, _, _, _, _, _ = consts.LICENSE_CLASSIFY_MAPPING.get(classify) | 190 | pid, _, _, _, _, _ = consts.LICENSE_CLASSIFY_MAPPING.get(classify) |
| ... | @@ -159,20 +240,24 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -159,20 +240,24 @@ class Command(BaseCommand, LoggerMixin): |
| 159 | return img_name, 1, 1 | 240 | return img_name, 1, 1 |
| 160 | 241 | ||
| 161 | @staticmethod | 242 | @staticmethod |
| 162 | def get_path(name, img_output_dir, wb_output_dir, pdf_output_dir, seperate_dir): | 243 | def get_path(name, img_output_dir, wb_output_dir, pdf_output_dir, seperate_dir_map): |
| 163 | time_stamp = datetime.now().strftime('%Y-%m-%d_%H:%M:%S') | 244 | time_stamp = datetime.now().strftime('%Y-%m-%d_%H:%M:%S') |
| 164 | new_name = '{0}_{1}'.format(time_stamp, name) | 245 | new_name = '{0}_{1}'.format(time_stamp, name) |
| 165 | img_save_path = os.path.join(img_output_dir, new_name) | 246 | img_save_path = os.path.join(img_output_dir, new_name) |
| 166 | pdf_save_path = os.path.join(pdf_output_dir, new_name) | 247 | pdf_save_path = os.path.join(pdf_output_dir, new_name) |
| 167 | excel_name = '{0}.xlsx'.format(os.path.splitext(new_name)[0]) | 248 | excel_name = '{0}.xlsx'.format(os.path.splitext(new_name)[0]) |
| 168 | excel_path = os.path.join(wb_output_dir, excel_name) | 249 | excel_path = os.path.join(wb_output_dir, excel_name) |
| 169 | seperate_path = None if seperate_dir is None else os.path.join(seperate_dir, new_name) | 250 | seperate_path_map = dict() |
| 170 | return img_save_path, excel_path, pdf_save_path, seperate_path | 251 | if len(seperate_dir_map) > 0: |
| 252 | for c, seperate_dir in seperate_dir_map.items(): | ||
| 253 | seperate_path_map[c] = os.path.join(seperate_dir, new_name) | ||
| 254 | return img_save_path, excel_path, pdf_save_path, seperate_path_map | ||
| 171 | 255 | ||
| 172 | def res_process(self, all_res, excel_path, classify): | 256 | def res_process(self, all_res, excel_path, classify, contract_result): |
| 173 | try: | 257 | try: |
| 174 | wb = BSWorkbook(set(), set(), set(), set(), set()) | 258 | wb = BSWorkbook(set(), set(), set(), set(), set()) |
| 175 | sheet_name, key_field, side_field_order, src_field_order = self.field_map.get(classify) | 259 | for c, res_list in all_res.items(): |
| 260 | sheet_name, key_field, side_field_order, src_field_order = self.field_map.get(c) | ||
| 176 | ws = wb.create_sheet(sheet_name) | 261 | ws = wb.create_sheet(sheet_name) |
| 177 | for res in all_res: | 262 | for res in all_res: |
| 178 | if key_field is not None and key_field in res: | 263 | if key_field is not None and key_field in res: |
| ... | @@ -186,6 +271,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -186,6 +271,7 @@ class Command(BaseCommand, LoggerMixin): |
| 186 | else: | 271 | else: |
| 187 | ws.append((write_field, field_value)) | 272 | ws.append((write_field, field_value)) |
| 188 | ws.append((None,)) | 273 | ws.append((None,)) |
| 274 | wb.contract_rebuild(contract_result) | ||
| 189 | wb.remove_base_sheet() | 275 | wb.remove_base_sheet() |
| 190 | wb.save(excel_path) | 276 | wb.save(excel_path) |
| 191 | except Exception as e: | 277 | except Exception as e: |
| ... | @@ -198,7 +284,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -198,7 +284,7 @@ class Command(BaseCommand, LoggerMixin): |
| 198 | sep = os.path.sep + (os.path.altsep or '') | 284 | sep = os.path.sep + (os.path.altsep or '') |
| 199 | return os.path.basename(path.rstrip(sep)) | 285 | return os.path.basename(path.rstrip(sep)) |
| 200 | 286 | ||
| 201 | def ocr_process(self, img_path, classify, all_res, seperate_dir): | 287 | def ocr_process(self, img_path, classify, all_res, seperate_path_map, contract_result): |
| 202 | if os.path.exists(img_path): | 288 | if os.path.exists(img_path): |
| 203 | # TODO 图片验证 | 289 | # TODO 图片验证 |
| 204 | with open(img_path, 'rb') as f: | 290 | with open(img_path, 'rb') as f: |
| ... | @@ -208,7 +294,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -208,7 +294,7 @@ class Command(BaseCommand, LoggerMixin): |
| 208 | json_data = { | 294 | json_data = { |
| 209 | "file": file_data, | 295 | "file": file_data, |
| 210 | } | 296 | } |
| 211 | if seperate_dir is None: | 297 | if len(seperate_path_map) > 0: |
| 212 | json_data["classify"] = classify | 298 | json_data["classify"] = classify |
| 213 | 299 | ||
| 214 | for times in range(consts.RETRY_TIMES): | 300 | for times in range(consts.RETRY_TIMES): |
| ... | @@ -232,8 +318,9 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -232,8 +318,9 @@ class Command(BaseCommand, LoggerMixin): |
| 232 | data_list = ocr_res.get('data', []) | 318 | data_list = ocr_res.get('data', []) |
| 233 | if isinstance(data_list, list): | 319 | if isinstance(data_list, list): |
| 234 | for ocr_data in data_list: | 320 | for ocr_data in data_list: |
| 235 | if ocr_data.get('classify') == classify: | 321 | if ocr_data.get('classify') in seperate_path_map or ocr_data.get('classify') == classify: |
| 236 | if seperate_dir is not None: | 322 | if ocr_data.get('classify') in seperate_path_map: |
| 323 | seperate_dir = seperate_path_map[ocr_data.get('classify')] | ||
| 237 | os.makedirs(seperate_dir, exist_ok=True) | 324 | os.makedirs(seperate_dir, exist_ok=True) |
| 238 | real_dst = os.path.join(seperate_dir, self.basename(img_path)) | 325 | real_dst = os.path.join(seperate_dir, self.basename(img_path)) |
| 239 | if not os.path.exists(real_dst): | 326 | if not os.path.exists(real_dst): |
| ... | @@ -242,6 +329,8 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -242,6 +329,8 @@ class Command(BaseCommand, LoggerMixin): |
| 242 | self.license1_process(ocr_data, all_res, classify) | 329 | self.license1_process(ocr_data, all_res, classify) |
| 243 | elif classify in consts.LICENSE_CLASSIFY_SET_2: | 330 | elif classify in consts.LICENSE_CLASSIFY_SET_2: |
| 244 | self.license2_process(ocr_data, all_res, classify, img_path) | 331 | self.license2_process(ocr_data, all_res, classify, img_path) |
| 332 | elif classify in consts.CONTRACT_SET: | ||
| 333 | self.contract_process(ocr_data, contract_result, classify) | ||
| 245 | break | 334 | break |
| 246 | else: | 335 | else: |
| 247 | self.folder_log.warn('{0} [ocr failed] [img_path={1}]'.format(self.log_base, img_path)) | 336 | self.folder_log.warn('{0} [ocr failed] [img_path={1}]'.format(self.log_base, img_path)) |
| ... | @@ -301,25 +390,56 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -301,25 +390,56 @@ class Command(BaseCommand, LoggerMixin): |
| 301 | rebuild_res = self.ltgt_res_process(ocr_res, label, excel_path) | 390 | rebuild_res = self.ltgt_res_process(ocr_res, label, excel_path) |
| 302 | return rebuild_res | 391 | return rebuild_res |
| 303 | 392 | ||
| 304 | def images_process(self, img_path_list, classify, excel_path, seperate_dir): | 393 | def images_process(self, img_path_list, classify, excel_path, seperate_path_map): |
| 305 | all_res = [] | 394 | all_res = dict() |
| 395 | contract_result = dict() | ||
| 306 | for img_path in img_path_list: | 396 | for img_path in img_path_list: |
| 307 | self.ocr_process(img_path, classify, all_res, seperate_dir) | 397 | self.ocr_process(img_path, classify, all_res, seperate_path_map, contract_result) |
| 308 | # if len(all_res) > 0: | 398 | # if len(all_res) > 0: |
| 309 | self.res_process(all_res, excel_path, classify) | 399 | self.res_process(all_res, excel_path, classify, contract_result) |
| 310 | return all_res | 400 | return all_res |
| 311 | 401 | ||
| 312 | def pdf_process(self, name, path, classify, img_output_dir, wb_output_dir, pdf_output_dir, seperate_dir): | 402 | def pdf_process(self, name, path, classify, img_output_dir, wb_output_dir, pdf_output_dir, seperate_dir_map): |
| 313 | if os.path.exists(path): | 403 | if os.path.exists(path): |
| 314 | rebuild_res = None | 404 | rebuild_res = None |
| 405 | img_save_path, excel_path, pdf_save_path, seperate_path_map = self.get_path( | ||
| 406 | name, img_output_dir, wb_output_dir, pdf_output_dir, seperate_dir_map) | ||
| 407 | pdf_handler = PDFHandler(path, img_save_path) | ||
| 408 | |||
| 315 | if classify == consts.CONTRACT_CLASSIFY: | 409 | if classify == consts.CONTRACT_CLASSIFY: |
| 316 | pass | 410 | try: |
| 411 | self.folder_log.info('{0} [e-contract pdf to img start] [path={1}]'.format(self.log_base, path)) | ||
| 412 | pdf_handler.e_contract_process() | ||
| 413 | self.folder_log.info('{0} [e-contract pdf to img end] [path={1}]'.format(self.log_base, path)) | ||
| 414 | except Exception as e: | ||
| 415 | self.folder_log.error('{0} [e-contract pdf to img error] [path={1}] [error={2}]'.format( | ||
| 416 | self.log_base, path, traceback.format_exc())) | ||
| 417 | raise e | ||
| 418 | else: | ||
| 419 | ocr_result = afc_predict(pdf_handler.pdf_info) | ||
| 420 | contract_result = dict() | ||
| 421 | page_res = {} | ||
| 422 | for page_num, page_info in ocr_result.get('page_info', {}).items(): | ||
| 423 | if isinstance(page_num, str) and page_num.startswith('page_'): | ||
| 424 | page_res[page_num] = { | ||
| 425 | 'classify': classify, | ||
| 426 | "is_asp": ocr_result.get('is_asp', False), | ||
| 427 | 'page_num': page_num, | ||
| 428 | 'page_info': page_info | ||
| 429 | } | ||
| 430 | for _, page_key in pdf_handler.img_path_pno_list: | ||
| 431 | if page_key in page_res: | ||
| 432 | ocr_data = { | ||
| 433 | 'classify': page_res[page_key].pop('classify', consts.OTHER_CLASSIFY), | ||
| 434 | 'data': page_res[page_key] | ||
| 435 | } | ||
| 436 | self.contract_process(ocr_data, contract_result, classify) | ||
| 437 | self.res_process({}, excel_path, classify, contract_result) | ||
| 438 | shutil.move(path, pdf_save_path) | ||
| 317 | else: | 439 | else: |
| 318 | try: | 440 | try: |
| 319 | img_save_path, excel_path, pdf_save_path, seperate_path = self.get_path( | ||
| 320 | name, img_output_dir, wb_output_dir, pdf_output_dir, seperate_dir) | ||
| 321 | self.folder_log.info('{0} [pdf to img start] [path={1}]'.format(self.log_base, path)) | 441 | self.folder_log.info('{0} [pdf to img start] [path={1}]'.format(self.log_base, path)) |
| 322 | pdf_handler = PDFHandler(path, img_save_path) | 442 | |
| 323 | if classify in self.ltgt_classify_mapping: | 443 | if classify in self.ltgt_classify_mapping: |
| 324 | pdf_handler.extract_page_image() | 444 | pdf_handler.extract_page_image() |
| 325 | else: | 445 | else: |
| ... | @@ -331,19 +451,22 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -331,19 +451,22 @@ class Command(BaseCommand, LoggerMixin): |
| 331 | raise e | 451 | raise e |
| 332 | else: | 452 | else: |
| 333 | if classify in self.ltgt_classify_mapping: | 453 | if classify in self.ltgt_classify_mapping: |
| 334 | rebuild_res = self.ltgt_process(pdf_handler.img_path_list, self.ltgt_classify_mapping[classify], | 454 | ltgt_res = self.ltgt_process(pdf_handler.img_path_list, self.ltgt_classify_mapping[classify], |
| 335 | excel_path, path) | 455 | excel_path, path) |
| 456 | rebuild_res = { | ||
| 457 | classify: [ltgt_res] | ||
| 458 | } | ||
| 336 | else: | 459 | else: |
| 337 | rebuild_res = self.images_process(pdf_handler.img_path_list, classify, excel_path, seperate_path) | 460 | rebuild_res = self.images_process(pdf_handler.img_path_list, classify, excel_path, seperate_path_map) |
| 338 | shutil.move(path, pdf_save_path) | 461 | shutil.move(path, pdf_save_path) |
| 339 | return rebuild_res | 462 | return rebuild_res |
| 340 | 463 | ||
| 341 | def tif_process(self, name, path, classify, img_output_dir, wb_output_dir, tiff_output_dir, seperate_dir): | 464 | def tif_process(self, name, path, classify, img_output_dir, wb_output_dir, tiff_output_dir, seperate_dir_map): |
| 342 | if os.path.exists(path): | 465 | if os.path.exists(path): |
| 343 | rebuild_res = None | 466 | rebuild_res = None |
| 344 | try: | 467 | try: |
| 345 | img_save_path, excel_path, tiff_save_path, seperate_path = self.get_path( | 468 | img_save_path, excel_path, tiff_save_path, seperate_path_map = self.get_path( |
| 346 | name, img_output_dir, wb_output_dir, tiff_output_dir, seperate_dir) | 469 | name, img_output_dir, wb_output_dir, tiff_output_dir, seperate_dir_map) |
| 347 | self.folder_log.info('{0} [tiff to img start] [path={1}]'.format(self.log_base, path)) | 470 | self.folder_log.info('{0} [tiff to img start] [path={1}]'.format(self.log_base, path)) |
| 348 | tiff_handler = TIFFHandler(path, img_save_path) | 471 | tiff_handler = TIFFHandler(path, img_save_path) |
| 349 | tiff_handler.extract_image() | 472 | tiff_handler.extract_image() |
| ... | @@ -354,26 +477,32 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -354,26 +477,32 @@ class Command(BaseCommand, LoggerMixin): |
| 354 | raise e | 477 | raise e |
| 355 | else: | 478 | else: |
| 356 | if classify in self.ltgt_classify_mapping: | 479 | if classify in self.ltgt_classify_mapping: |
| 357 | rebuild_res = self.ltgt_process(tiff_handler.img_path_list, self.ltgt_classify_mapping[classify], | 480 | ltgt_res = self.ltgt_process(tiff_handler.img_path_list, self.ltgt_classify_mapping[classify], |
| 358 | excel_path, path) | 481 | excel_path, path) |
| 482 | rebuild_res = { | ||
| 483 | classify: [ltgt_res] | ||
| 484 | } | ||
| 359 | else: | 485 | else: |
| 360 | rebuild_res = self.images_process(tiff_handler.img_path_list, classify, excel_path, seperate_path) | 486 | rebuild_res = self.images_process(tiff_handler.img_path_list, classify, excel_path, seperate_path_map) |
| 361 | shutil.move(path, tiff_save_path) | 487 | shutil.move(path, tiff_save_path) |
| 362 | return rebuild_res | 488 | return rebuild_res |
| 363 | 489 | ||
| 364 | def img_process(self, name, path, classify, wb_output_dir, img_output_dir, pdf_output_dir, seperate_dir): | 490 | def img_process(self, name, path, classify, wb_output_dir, img_output_dir, pdf_output_dir, seperate_dir_map): |
| 365 | rebuild_res = None | 491 | rebuild_res = None |
| 366 | try: | 492 | try: |
| 367 | img_save_path, excel_path, _, seperate_path = self.get_path( | 493 | img_save_path, excel_path, _, seperate_path_map = self.get_path( |
| 368 | name, img_output_dir, wb_output_dir, pdf_output_dir, seperate_dir) | 494 | name, img_output_dir, wb_output_dir, pdf_output_dir, seperate_dir_map) |
| 369 | except Exception as e: | 495 | except Exception as e: |
| 370 | self.folder_log.error('{0} [get path error] [path={1}] [error={2}]'.format( | 496 | self.folder_log.error('{0} [get path error] [path={1}] [error={2}]'.format( |
| 371 | self.log_base, path, traceback.format_exc())) | 497 | self.log_base, path, traceback.format_exc())) |
| 372 | else: | 498 | else: |
| 373 | if classify in self.ltgt_classify_mapping: | 499 | if classify in self.ltgt_classify_mapping: |
| 374 | rebuild_res = self.ltgt_process([path], self.ltgt_classify_mapping[classify], excel_path, path) | 500 | ltgt_res = self.ltgt_process([path], self.ltgt_classify_mapping[classify], excel_path, path) |
| 501 | rebuild_res = { | ||
| 502 | classify: [ltgt_res] | ||
| 503 | } | ||
| 375 | else: | 504 | else: |
| 376 | rebuild_res = self.images_process([path], classify, excel_path, seperate_path) | 505 | rebuild_res = self.images_process([path], classify, excel_path, seperate_path_map) |
| 377 | shutil.move(path, img_save_path) | 506 | shutil.move(path, img_save_path) |
| 378 | return rebuild_res | 507 | return rebuild_res |
| 379 | 508 | ||
| ... | @@ -450,10 +579,13 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -450,10 +579,13 @@ class Command(BaseCommand, LoggerMixin): |
| 450 | os.makedirs(failed_output_dir, exist_ok=True) | 579 | os.makedirs(failed_output_dir, exist_ok=True) |
| 451 | 580 | ||
| 452 | if is_combined: | 581 | if is_combined: |
| 453 | seperate_dir = os.path.join(output_dir, self.seperate_map.get(classify, 'Unknown')) | 582 | seperate_dir_map = dict() |
| 583 | for c in self.combined_map.keys(): | ||
| 584 | seperate_dir = os.path.join(output_dir, self.combined_map[c]) | ||
| 454 | os.makedirs(seperate_dir, exist_ok=True) | 585 | os.makedirs(seperate_dir, exist_ok=True) |
| 586 | seperate_dir_map[c] = seperate_dir | ||
| 455 | else: | 587 | else: |
| 456 | seperate_dir = None | 588 | seperate_dir_map = dict() |
| 457 | 589 | ||
| 458 | os_error_filename_set = set() | 590 | os_error_filename_set = set() |
| 459 | while self.switch: | 591 | while self.switch: |
| ... | @@ -479,17 +611,17 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -479,17 +611,17 @@ class Command(BaseCommand, LoggerMixin): |
| 479 | self.folder_log.info('{0} [file start] [path={1}]'.format(self.log_base, path)) | 611 | self.folder_log.info('{0} [file start] [path={1}]'.format(self.log_base, path)) |
| 480 | if name.endswith('.pdf') or name.endswith('.PDF'): | 612 | if name.endswith('.pdf') or name.endswith('.PDF'): |
| 481 | result = self.pdf_process(name, path, classify, img_output_dir, wb_output_dir, | 613 | result = self.pdf_process(name, path, classify, img_output_dir, wb_output_dir, |
| 482 | pdf_output_dir, seperate_dir) | 614 | pdf_output_dir, seperate_dir_map) |
| 483 | elif name.endswith('.tif') or name.endswith('.TIF'): | 615 | elif name.endswith('.tif') or name.endswith('.TIF'): |
| 484 | if classify == consts.CONTRACT_CLASSIFY: | 616 | if classify == consts.CONTRACT_CLASSIFY: |
| 485 | raise LTGTException('e-contract must be pdf') | 617 | raise LTGTException('e-contract must be pdf') |
| 486 | result = self.tif_process(name, path, classify, img_output_dir, wb_output_dir, | 618 | result = self.tif_process(name, path, classify, img_output_dir, wb_output_dir, |
| 487 | tiff_output_dir, seperate_dir) | 619 | tiff_output_dir, seperate_dir_map) |
| 488 | else: | 620 | else: |
| 489 | if classify == consts.CONTRACT_CLASSIFY: | 621 | if classify == consts.CONTRACT_CLASSIFY: |
| 490 | raise LTGTException('e-contract must be pdf') | 622 | raise LTGTException('e-contract must be pdf') |
| 491 | result = self.img_process(name, path, classify, wb_output_dir, img_output_dir, | 623 | result = self.img_process(name, path, classify, wb_output_dir, img_output_dir, |
| 492 | pdf_output_dir, seperate_dir) | 624 | pdf_output_dir, seperate_dir_map) |
| 493 | self.folder_log.info('{0} [file end] [path={1}]'.format(self.log_base, path)) | 625 | self.folder_log.info('{0} [file end] [path={1}]'.format(self.log_base, path)) |
| 494 | else: | 626 | else: |
| 495 | result = None | 627 | result = None |
| ... | @@ -513,19 +645,11 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -513,19 +645,11 @@ class Command(BaseCommand, LoggerMixin): |
| 513 | else: | 645 | else: |
| 514 | if isinstance(result, dict) and len(result) > 0: | 646 | if isinstance(result, dict) and len(result) > 0: |
| 515 | date_str = time.strftime("%Y-%m-%d") | 647 | date_str = time.strftime("%Y-%m-%d") |
| 648 | for c, res_list in result.items(): | ||
| 649 | for res in res_list: | ||
| 516 | result_queue.put( | 650 | result_queue.put( |
| 517 | { | 651 | { |
| 518 | self.CLASSIFY_KEY: classify, | 652 | self.CLASSIFY_KEY: c, |
| 519 | self.RESULT_KEY: result, | ||
| 520 | self.DATE_KEY: date_str | ||
| 521 | } | ||
| 522 | ) | ||
| 523 | elif isinstance(result, list) and len(result) > 0: | ||
| 524 | date_str = time.strftime("%Y-%m-%d") | ||
| 525 | for res in result: | ||
| 526 | result_queue.put( | ||
| 527 | { | ||
| 528 | self.CLASSIFY_KEY: classify, | ||
| 529 | self.RESULT_KEY: res, | 653 | self.RESULT_KEY: res, |
| 530 | self.DATE_KEY: date_str | 654 | self.DATE_KEY: date_str |
| 531 | } | 655 | } | ... | ... |
-
Please register or sign in to post a comment