a94ce721 by 周伟奇

ltgt part 2

1 parent f417660d
...@@ -16,6 +16,7 @@ from openpyxl import load_workbook, Workbook ...@@ -16,6 +16,7 @@ from openpyxl import load_workbook, Workbook
16 from settings import conf 16 from settings import conf
17 from common.mixins import LoggerMixin 17 from common.mixins import LoggerMixin
18 from common.tools.pdf_to_img import PDFHandler 18 from common.tools.pdf_to_img import PDFHandler
19 from common.electronic_afc_contract.afc_contract_ocr import predict as afc_predict
19 from apps.doc import consts 20 from apps.doc import consts
20 from apps.doc.exceptions import OCR1Exception, OCR2Exception, LTGTException 21 from apps.doc.exceptions import OCR1Exception, OCR2Exception, LTGTException
21 from apps.doc.ocr.wb import BSWorkbook 22 from apps.doc.ocr.wb import BSWorkbook
...@@ -72,14 +73,16 @@ class Command(BaseCommand, LoggerMixin): ...@@ -72,14 +73,16 @@ class Command(BaseCommand, LoggerMixin):
72 # input folder 73 # input folder
73 self.input_dirs = conf.get_namespace('LTGT_DIR_') 74 self.input_dirs = conf.get_namespace('LTGT_DIR_')
74 # seperate folder name 75 # seperate folder name
75 self.seperate_map = { 76 self.combined_map = {
76 consts.IC_CLASSIFY: 'IDCard', 77 consts.IC_CLASSIFY: 'IDCard',
77 consts.MVC_CLASSIFY: 'GreenBook', 78 consts.MVC_CLASSIFY: 'GreenBook',
78 consts.CONTRACT_CLASSIFY: 'Contract', 79 consts.CONTRACT_CLASSIFY: 'Contract',
79 } 80 }
80 self.field_map = { 81 self.field_map = {
82 # sheet_name, key_field, side_field_order, src_field_order
81 consts.VAT_CLASSIFY: (consts.VAT_CN_NAME, None, None, consts.VATS_FIELD_ORDER), 83 consts.VAT_CLASSIFY: (consts.VAT_CN_NAME, None, None, consts.VATS_FIELD_ORDER),
82 consts.IC_CLASSIFY: (consts.IC_CN_NAME, '有效期限', consts.IC_FIELD_ORDER_3, consts.IC_FIELD_ORDER_2), 84 consts.IC_CLASSIFY: (consts.IC_CN_NAME, '有效期限', consts.IC_FIELD_ORDER_3, consts.IC_FIELD_ORDER_2),
85 consts.MVC_CLASSIFY: (consts.MVC_CN_NAME, '机动车登记证书编号', consts.MVC_SE_FIELD_ORDER_3_4, consts.MVC_SE_FIELD_ORDER_1_2),
83 } 86 }
84 # ocr相关 87 # ocr相关
85 self.ocr_url = conf.OCR_URL_FOLDER 88 self.ocr_url = conf.OCR_URL_FOLDER
...@@ -92,18 +95,96 @@ class Command(BaseCommand, LoggerMixin): ...@@ -92,18 +95,96 @@ class Command(BaseCommand, LoggerMixin):
92 def signal_handler(self, sig, frame): 95 def signal_handler(self, sig, frame):
93 self.switch = False # 停止处理文件 96 self.switch = False # 停止处理文件
94 97
98 def contract_process(self, ocr_data, contract_result, classify):
99 contract_dict = ocr_data.get('data')
100 if not contract_dict or contract_dict.get('page_num') is None or contract_dict.get('page_info') is None:
101 return
102 page_num = contract_dict.get('page_num')
103 if page_num.startswith('page_'):
104 page_num_only = page_num.split('_')[-1]
105 else:
106 page_num_only = page_num
107 rebuild_page_info = []
108 text_key = 'words'
109 for key, value in contract_dict.get('page_info', {}).items():
110 if value is None:
111 rebuild_page_info.append((key,))
112 elif text_key in value:
113 if value[text_key] is None:
114 rebuild_page_info.append((key,))
115 elif isinstance(value[text_key], str):
116 rebuild_page_info.append((key, value[text_key]))
117 elif isinstance(value[text_key], list):
118 rebuild_page_info.append((key,))
119 for row_list in value[text_key]:
120 rebuild_page_info.append(row_list)
121 else:
122 rebuild_page_info.append((key,))
123 for sub_key, sub_value in value.items():
124 if sub_value is None:
125 rebuild_page_info.append((sub_key,))
126 elif text_key in sub_value:
127 if sub_value[text_key] is None:
128 rebuild_page_info.append((sub_key,))
129 elif isinstance(sub_value[text_key], str):
130 rebuild_page_info.append((sub_key, sub_value[text_key]))
131 elif isinstance(sub_value[text_key], list):
132 rebuild_page_info.append((sub_key,))
133 for row_list in sub_value[text_key]:
134 rebuild_page_info.append(row_list)
135
136 contract_result.setdefault(classify, dict()).setdefault(page_num_only, []).append(rebuild_page_info)
137
95 def license1_process(self, ocr_data, all_res, classify): 138 def license1_process(self, ocr_data, all_res, classify):
96 # 类别:'0'身份证, '1'居住证 139 # 类别:'0'身份证, '1'居住证
97 license_data = ocr_data.get('data', []) 140 license_data = ocr_data.get('data')
98 if not license_data: 141 if not license_data:
99 return 142 return
143 if isinstance(license_data, dict):
144 license_data.pop('base64_img', '')
100 if classify == consts.IC_CLASSIFY: 145 if classify == consts.IC_CLASSIFY:
101 for id_card_dict in license_data: 146 id_card_dict = {}
102 try: 147 card_type = license_data.get('type', '')
103 id_card_dict.pop('base64_img') 148 is_ic = card_type.startswith('身份证')
104 except Exception as e: 149 is_info_side = card_type.endswith('信息面')
105 continue 150 id_card_dict['类别'] = '0' if is_ic else '1'
106 all_res.extend(license_data) 151 if is_ic:
152 field_map = consts.IC_MAP_0 if is_info_side else consts.IC_MAP_1
153 else:
154 field_map = consts.RP_MAP_0 if is_info_side else consts.RP_MAP_1
155 for write_field, search_field in field_map:
156 id_card_dict[write_field] = license_data.get('words_result', {}).get(search_field, {}).get('words', '')
157 if not is_info_side:
158 start_time = license_data.get('words_result', {}).get('签发日期', {}).get('words', '')
159 end_time = license_data.get('words_result', {}).get('失效日期', {}).get('words', '')
160 id_card_dict['有效期限'] = '{0}-{1}'.format(start_time, end_time)
161 # for id_card_dict in license_data:
162 # try:
163 # id_card_dict.pop('base64_img')
164 # except Exception as e:
165 # continue
166 all_res.setdefault(classify, []).append(id_card_dict)
167 elif classify == consts.MVC_CLASSIFY:
168 rebuild_data_dict = {}
169 mvc_page = license_data.pop('page', 'VehicleRCI')
170 mvc_res = license_data.pop('results', {})
171 if mvc_page == 'VehicleRegArea':
172 rebuild_data_dict['机动车登记证书编号'] = mvc_res.get('机动车登记证书编号', {}).get('words', '')
173 for register_info in mvc_res.get('登记信息', []):
174 register_info.pop('register_type', None)
175 register_info.pop('register_type_name', None)
176 for cn_key, detail_dict in register_info.items():
177 rebuild_data_dict.setdefault(cn_key, []).append(
178 detail_dict.get('words', ''))
179 else:
180 for cn_key, detail_dict in mvc_res.items():
181 rebuild_data_dict[cn_key] = detail_dict.get('words', '')
182 all_res.setdefault(classify, []).append(rebuild_data_dict)
183 elif classify == consts.CONTRACT_CLASSIFY:
184 pass
185 else:
186 # all_res.extend(license_data)
187 all_res.setdefault(classify, []).extend(license_data)
107 188
108 def license2_process(self, ocr_data, all_res, classify, img_path): 189 def license2_process(self, ocr_data, all_res, classify, img_path):
109 pid, _, _, _, _, _ = consts.LICENSE_CLASSIFY_MAPPING.get(classify) 190 pid, _, _, _, _, _ = consts.LICENSE_CLASSIFY_MAPPING.get(classify)
...@@ -159,20 +240,24 @@ class Command(BaseCommand, LoggerMixin): ...@@ -159,20 +240,24 @@ class Command(BaseCommand, LoggerMixin):
159 return img_name, 1, 1 240 return img_name, 1, 1
160 241
161 @staticmethod 242 @staticmethod
162 def get_path(name, img_output_dir, wb_output_dir, pdf_output_dir, seperate_dir): 243 def get_path(name, img_output_dir, wb_output_dir, pdf_output_dir, seperate_dir_map):
163 time_stamp = datetime.now().strftime('%Y-%m-%d_%H:%M:%S') 244 time_stamp = datetime.now().strftime('%Y-%m-%d_%H:%M:%S')
164 new_name = '{0}_{1}'.format(time_stamp, name) 245 new_name = '{0}_{1}'.format(time_stamp, name)
165 img_save_path = os.path.join(img_output_dir, new_name) 246 img_save_path = os.path.join(img_output_dir, new_name)
166 pdf_save_path = os.path.join(pdf_output_dir, new_name) 247 pdf_save_path = os.path.join(pdf_output_dir, new_name)
167 excel_name = '{0}.xlsx'.format(os.path.splitext(new_name)[0]) 248 excel_name = '{0}.xlsx'.format(os.path.splitext(new_name)[0])
168 excel_path = os.path.join(wb_output_dir, excel_name) 249 excel_path = os.path.join(wb_output_dir, excel_name)
169 seperate_path = None if seperate_dir is None else os.path.join(seperate_dir, new_name) 250 seperate_path_map = dict()
170 return img_save_path, excel_path, pdf_save_path, seperate_path 251 if len(seperate_dir_map) > 0:
252 for c, seperate_dir in seperate_dir_map.items():
253 seperate_path_map[c] = os.path.join(seperate_dir, new_name)
254 return img_save_path, excel_path, pdf_save_path, seperate_path_map
171 255
172 def res_process(self, all_res, excel_path, classify): 256 def res_process(self, all_res, excel_path, classify, contract_result):
173 try: 257 try:
174 wb = BSWorkbook(set(), set(), set(), set(), set()) 258 wb = BSWorkbook(set(), set(), set(), set(), set())
175 sheet_name, key_field, side_field_order, src_field_order = self.field_map.get(classify) 259 for c, res_list in all_res.items():
260 sheet_name, key_field, side_field_order, src_field_order = self.field_map.get(c)
176 ws = wb.create_sheet(sheet_name) 261 ws = wb.create_sheet(sheet_name)
177 for res in all_res: 262 for res in all_res:
178 if key_field is not None and key_field in res: 263 if key_field is not None and key_field in res:
...@@ -186,6 +271,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -186,6 +271,7 @@ class Command(BaseCommand, LoggerMixin):
186 else: 271 else:
187 ws.append((write_field, field_value)) 272 ws.append((write_field, field_value))
188 ws.append((None,)) 273 ws.append((None,))
274 wb.contract_rebuild(contract_result)
189 wb.remove_base_sheet() 275 wb.remove_base_sheet()
190 wb.save(excel_path) 276 wb.save(excel_path)
191 except Exception as e: 277 except Exception as e:
...@@ -198,7 +284,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -198,7 +284,7 @@ class Command(BaseCommand, LoggerMixin):
198 sep = os.path.sep + (os.path.altsep or '') 284 sep = os.path.sep + (os.path.altsep or '')
199 return os.path.basename(path.rstrip(sep)) 285 return os.path.basename(path.rstrip(sep))
200 286
201 def ocr_process(self, img_path, classify, all_res, seperate_dir): 287 def ocr_process(self, img_path, classify, all_res, seperate_path_map, contract_result):
202 if os.path.exists(img_path): 288 if os.path.exists(img_path):
203 # TODO 图片验证 289 # TODO 图片验证
204 with open(img_path, 'rb') as f: 290 with open(img_path, 'rb') as f:
...@@ -208,7 +294,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -208,7 +294,7 @@ class Command(BaseCommand, LoggerMixin):
208 json_data = { 294 json_data = {
209 "file": file_data, 295 "file": file_data,
210 } 296 }
211 if seperate_dir is None: 297 if len(seperate_path_map) > 0:
212 json_data["classify"] = classify 298 json_data["classify"] = classify
213 299
214 for times in range(consts.RETRY_TIMES): 300 for times in range(consts.RETRY_TIMES):
...@@ -232,8 +318,9 @@ class Command(BaseCommand, LoggerMixin): ...@@ -232,8 +318,9 @@ class Command(BaseCommand, LoggerMixin):
232 data_list = ocr_res.get('data', []) 318 data_list = ocr_res.get('data', [])
233 if isinstance(data_list, list): 319 if isinstance(data_list, list):
234 for ocr_data in data_list: 320 for ocr_data in data_list:
235 if ocr_data.get('classify') == classify: 321 if ocr_data.get('classify') in seperate_path_map or ocr_data.get('classify') == classify:
236 if seperate_dir is not None: 322 if ocr_data.get('classify') in seperate_path_map:
323 seperate_dir = seperate_path_map[ocr_data.get('classify')]
237 os.makedirs(seperate_dir, exist_ok=True) 324 os.makedirs(seperate_dir, exist_ok=True)
238 real_dst = os.path.join(seperate_dir, self.basename(img_path)) 325 real_dst = os.path.join(seperate_dir, self.basename(img_path))
239 if not os.path.exists(real_dst): 326 if not os.path.exists(real_dst):
...@@ -242,6 +329,8 @@ class Command(BaseCommand, LoggerMixin): ...@@ -242,6 +329,8 @@ class Command(BaseCommand, LoggerMixin):
242 self.license1_process(ocr_data, all_res, classify) 329 self.license1_process(ocr_data, all_res, classify)
243 elif classify in consts.LICENSE_CLASSIFY_SET_2: 330 elif classify in consts.LICENSE_CLASSIFY_SET_2:
244 self.license2_process(ocr_data, all_res, classify, img_path) 331 self.license2_process(ocr_data, all_res, classify, img_path)
332 elif classify in consts.CONTRACT_SET:
333 self.contract_process(ocr_data, contract_result, classify)
245 break 334 break
246 else: 335 else:
247 self.folder_log.warn('{0} [ocr failed] [img_path={1}]'.format(self.log_base, img_path)) 336 self.folder_log.warn('{0} [ocr failed] [img_path={1}]'.format(self.log_base, img_path))
...@@ -301,25 +390,56 @@ class Command(BaseCommand, LoggerMixin): ...@@ -301,25 +390,56 @@ class Command(BaseCommand, LoggerMixin):
301 rebuild_res = self.ltgt_res_process(ocr_res, label, excel_path) 390 rebuild_res = self.ltgt_res_process(ocr_res, label, excel_path)
302 return rebuild_res 391 return rebuild_res
303 392
304 def images_process(self, img_path_list, classify, excel_path, seperate_dir): 393 def images_process(self, img_path_list, classify, excel_path, seperate_path_map):
305 all_res = [] 394 all_res = dict()
395 contract_result = dict()
306 for img_path in img_path_list: 396 for img_path in img_path_list:
307 self.ocr_process(img_path, classify, all_res, seperate_dir) 397 self.ocr_process(img_path, classify, all_res, seperate_path_map, contract_result)
308 # if len(all_res) > 0: 398 # if len(all_res) > 0:
309 self.res_process(all_res, excel_path, classify) 399 self.res_process(all_res, excel_path, classify, contract_result)
310 return all_res 400 return all_res
311 401
312 def pdf_process(self, name, path, classify, img_output_dir, wb_output_dir, pdf_output_dir, seperate_dir): 402 def pdf_process(self, name, path, classify, img_output_dir, wb_output_dir, pdf_output_dir, seperate_dir_map):
313 if os.path.exists(path): 403 if os.path.exists(path):
314 rebuild_res = None 404 rebuild_res = None
405 img_save_path, excel_path, pdf_save_path, seperate_path_map = self.get_path(
406 name, img_output_dir, wb_output_dir, pdf_output_dir, seperate_dir_map)
407 pdf_handler = PDFHandler(path, img_save_path)
408
315 if classify == consts.CONTRACT_CLASSIFY: 409 if classify == consts.CONTRACT_CLASSIFY:
316 pass 410 try:
411 self.folder_log.info('{0} [e-contract pdf to img start] [path={1}]'.format(self.log_base, path))
412 pdf_handler.e_contract_process()
413 self.folder_log.info('{0} [e-contract pdf to img end] [path={1}]'.format(self.log_base, path))
414 except Exception as e:
415 self.folder_log.error('{0} [e-contract pdf to img error] [path={1}] [error={2}]'.format(
416 self.log_base, path, traceback.format_exc()))
417 raise e
418 else:
419 ocr_result = afc_predict(pdf_handler.pdf_info)
420 contract_result = dict()
421 page_res = {}
422 for page_num, page_info in ocr_result.get('page_info', {}).items():
423 if isinstance(page_num, str) and page_num.startswith('page_'):
424 page_res[page_num] = {
425 'classify': classify,
426 "is_asp": ocr_result.get('is_asp', False),
427 'page_num': page_num,
428 'page_info': page_info
429 }
430 for _, page_key in pdf_handler.img_path_pno_list:
431 if page_key in page_res:
432 ocr_data = {
433 'classify': page_res[page_key].pop('classify', consts.OTHER_CLASSIFY),
434 'data': page_res[page_key]
435 }
436 self.contract_process(ocr_data, contract_result, classify)
437 self.res_process({}, excel_path, classify, contract_result)
438 shutil.move(path, pdf_save_path)
317 else: 439 else:
318 try: 440 try:
319 img_save_path, excel_path, pdf_save_path, seperate_path = self.get_path(
320 name, img_output_dir, wb_output_dir, pdf_output_dir, seperate_dir)
321 self.folder_log.info('{0} [pdf to img start] [path={1}]'.format(self.log_base, path)) 441 self.folder_log.info('{0} [pdf to img start] [path={1}]'.format(self.log_base, path))
322 pdf_handler = PDFHandler(path, img_save_path) 442
323 if classify in self.ltgt_classify_mapping: 443 if classify in self.ltgt_classify_mapping:
324 pdf_handler.extract_page_image() 444 pdf_handler.extract_page_image()
325 else: 445 else:
...@@ -331,19 +451,22 @@ class Command(BaseCommand, LoggerMixin): ...@@ -331,19 +451,22 @@ class Command(BaseCommand, LoggerMixin):
331 raise e 451 raise e
332 else: 452 else:
333 if classify in self.ltgt_classify_mapping: 453 if classify in self.ltgt_classify_mapping:
334 rebuild_res = self.ltgt_process(pdf_handler.img_path_list, self.ltgt_classify_mapping[classify], 454 ltgt_res = self.ltgt_process(pdf_handler.img_path_list, self.ltgt_classify_mapping[classify],
335 excel_path, path) 455 excel_path, path)
456 rebuild_res = {
457 classify: [ltgt_res]
458 }
336 else: 459 else:
337 rebuild_res = self.images_process(pdf_handler.img_path_list, classify, excel_path, seperate_path) 460 rebuild_res = self.images_process(pdf_handler.img_path_list, classify, excel_path, seperate_path_map)
338 shutil.move(path, pdf_save_path) 461 shutil.move(path, pdf_save_path)
339 return rebuild_res 462 return rebuild_res
340 463
341 def tif_process(self, name, path, classify, img_output_dir, wb_output_dir, tiff_output_dir, seperate_dir): 464 def tif_process(self, name, path, classify, img_output_dir, wb_output_dir, tiff_output_dir, seperate_dir_map):
342 if os.path.exists(path): 465 if os.path.exists(path):
343 rebuild_res = None 466 rebuild_res = None
344 try: 467 try:
345 img_save_path, excel_path, tiff_save_path, seperate_path = self.get_path( 468 img_save_path, excel_path, tiff_save_path, seperate_path_map = self.get_path(
346 name, img_output_dir, wb_output_dir, tiff_output_dir, seperate_dir) 469 name, img_output_dir, wb_output_dir, tiff_output_dir, seperate_dir_map)
347 self.folder_log.info('{0} [tiff to img start] [path={1}]'.format(self.log_base, path)) 470 self.folder_log.info('{0} [tiff to img start] [path={1}]'.format(self.log_base, path))
348 tiff_handler = TIFFHandler(path, img_save_path) 471 tiff_handler = TIFFHandler(path, img_save_path)
349 tiff_handler.extract_image() 472 tiff_handler.extract_image()
...@@ -354,26 +477,32 @@ class Command(BaseCommand, LoggerMixin): ...@@ -354,26 +477,32 @@ class Command(BaseCommand, LoggerMixin):
354 raise e 477 raise e
355 else: 478 else:
356 if classify in self.ltgt_classify_mapping: 479 if classify in self.ltgt_classify_mapping:
357 rebuild_res = self.ltgt_process(tiff_handler.img_path_list, self.ltgt_classify_mapping[classify], 480 ltgt_res = self.ltgt_process(tiff_handler.img_path_list, self.ltgt_classify_mapping[classify],
358 excel_path, path) 481 excel_path, path)
482 rebuild_res = {
483 classify: [ltgt_res]
484 }
359 else: 485 else:
360 rebuild_res = self.images_process(tiff_handler.img_path_list, classify, excel_path, seperate_path) 486 rebuild_res = self.images_process(tiff_handler.img_path_list, classify, excel_path, seperate_path_map)
361 shutil.move(path, tiff_save_path) 487 shutil.move(path, tiff_save_path)
362 return rebuild_res 488 return rebuild_res
363 489
364 def img_process(self, name, path, classify, wb_output_dir, img_output_dir, pdf_output_dir, seperate_dir): 490 def img_process(self, name, path, classify, wb_output_dir, img_output_dir, pdf_output_dir, seperate_dir_map):
365 rebuild_res = None 491 rebuild_res = None
366 try: 492 try:
367 img_save_path, excel_path, _, seperate_path = self.get_path( 493 img_save_path, excel_path, _, seperate_path_map = self.get_path(
368 name, img_output_dir, wb_output_dir, pdf_output_dir, seperate_dir) 494 name, img_output_dir, wb_output_dir, pdf_output_dir, seperate_dir_map)
369 except Exception as e: 495 except Exception as e:
370 self.folder_log.error('{0} [get path error] [path={1}] [error={2}]'.format( 496 self.folder_log.error('{0} [get path error] [path={1}] [error={2}]'.format(
371 self.log_base, path, traceback.format_exc())) 497 self.log_base, path, traceback.format_exc()))
372 else: 498 else:
373 if classify in self.ltgt_classify_mapping: 499 if classify in self.ltgt_classify_mapping:
374 rebuild_res = self.ltgt_process([path], self.ltgt_classify_mapping[classify], excel_path, path) 500 ltgt_res = self.ltgt_process([path], self.ltgt_classify_mapping[classify], excel_path, path)
501 rebuild_res = {
502 classify: [ltgt_res]
503 }
375 else: 504 else:
376 rebuild_res = self.images_process([path], classify, excel_path, seperate_path) 505 rebuild_res = self.images_process([path], classify, excel_path, seperate_path_map)
377 shutil.move(path, img_save_path) 506 shutil.move(path, img_save_path)
378 return rebuild_res 507 return rebuild_res
379 508
...@@ -450,10 +579,13 @@ class Command(BaseCommand, LoggerMixin): ...@@ -450,10 +579,13 @@ class Command(BaseCommand, LoggerMixin):
450 os.makedirs(failed_output_dir, exist_ok=True) 579 os.makedirs(failed_output_dir, exist_ok=True)
451 580
452 if is_combined: 581 if is_combined:
453 seperate_dir = os.path.join(output_dir, self.seperate_map.get(classify, 'Unknown')) 582 seperate_dir_map = dict()
583 for c in self.combined_map.keys():
584 seperate_dir = os.path.join(output_dir, self.combined_map[c])
454 os.makedirs(seperate_dir, exist_ok=True) 585 os.makedirs(seperate_dir, exist_ok=True)
586 seperate_dir_map[c] = seperate_dir
455 else: 587 else:
456 seperate_dir = None 588 seperate_dir_map = dict()
457 589
458 os_error_filename_set = set() 590 os_error_filename_set = set()
459 while self.switch: 591 while self.switch:
...@@ -479,17 +611,17 @@ class Command(BaseCommand, LoggerMixin): ...@@ -479,17 +611,17 @@ class Command(BaseCommand, LoggerMixin):
479 self.folder_log.info('{0} [file start] [path={1}]'.format(self.log_base, path)) 611 self.folder_log.info('{0} [file start] [path={1}]'.format(self.log_base, path))
480 if name.endswith('.pdf') or name.endswith('.PDF'): 612 if name.endswith('.pdf') or name.endswith('.PDF'):
481 result = self.pdf_process(name, path, classify, img_output_dir, wb_output_dir, 613 result = self.pdf_process(name, path, classify, img_output_dir, wb_output_dir,
482 pdf_output_dir, seperate_dir) 614 pdf_output_dir, seperate_dir_map)
483 elif name.endswith('.tif') or name.endswith('.TIF'): 615 elif name.endswith('.tif') or name.endswith('.TIF'):
484 if classify == consts.CONTRACT_CLASSIFY: 616 if classify == consts.CONTRACT_CLASSIFY:
485 raise LTGTException('e-contract must be pdf') 617 raise LTGTException('e-contract must be pdf')
486 result = self.tif_process(name, path, classify, img_output_dir, wb_output_dir, 618 result = self.tif_process(name, path, classify, img_output_dir, wb_output_dir,
487 tiff_output_dir, seperate_dir) 619 tiff_output_dir, seperate_dir_map)
488 else: 620 else:
489 if classify == consts.CONTRACT_CLASSIFY: 621 if classify == consts.CONTRACT_CLASSIFY:
490 raise LTGTException('e-contract must be pdf') 622 raise LTGTException('e-contract must be pdf')
491 result = self.img_process(name, path, classify, wb_output_dir, img_output_dir, 623 result = self.img_process(name, path, classify, wb_output_dir, img_output_dir,
492 pdf_output_dir, seperate_dir) 624 pdf_output_dir, seperate_dir_map)
493 self.folder_log.info('{0} [file end] [path={1}]'.format(self.log_base, path)) 625 self.folder_log.info('{0} [file end] [path={1}]'.format(self.log_base, path))
494 else: 626 else:
495 result = None 627 result = None
...@@ -513,19 +645,11 @@ class Command(BaseCommand, LoggerMixin): ...@@ -513,19 +645,11 @@ class Command(BaseCommand, LoggerMixin):
513 else: 645 else:
514 if isinstance(result, dict) and len(result) > 0: 646 if isinstance(result, dict) and len(result) > 0:
515 date_str = time.strftime("%Y-%m-%d") 647 date_str = time.strftime("%Y-%m-%d")
648 for c, res_list in result.items():
649 for res in res_list:
516 result_queue.put( 650 result_queue.put(
517 { 651 {
518 self.CLASSIFY_KEY: classify, 652 self.CLASSIFY_KEY: c,
519 self.RESULT_KEY: result,
520 self.DATE_KEY: date_str
521 }
522 )
523 elif isinstance(result, list) and len(result) > 0:
524 date_str = time.strftime("%Y-%m-%d")
525 for res in result:
526 result_queue.put(
527 {
528 self.CLASSIFY_KEY: classify,
529 self.RESULT_KEY: res, 653 self.RESULT_KEY: res,
530 self.DATE_KEY: date_str 654 self.DATE_KEY: date_str
531 } 655 }
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!