d213eb78 by 周伟奇

part 2

1 parent 68d7dd98
...@@ -1449,4 +1449,16 @@ SE_SECOND_ID_FIELD_MAPPING = { ...@@ -1449,4 +1449,16 @@ SE_SECOND_ID_FIELD_MAPPING = {
1449 1449
1450 HEAD_LIST = ['Info', 'Index', 'License', 'Field', 'Input', 'OCR', 'Result', 'Position', 'Image', 'errorType'] 1450 HEAD_LIST = ['Info', 'Index', 'License', 'Field', 'Input', 'OCR', 'Result', 'Position', 'Image', 'errorType']
1451 1451
1452 # ----------------litigation------------------------
1453 IC_FIELD_ORDER_2 = (('姓名', '姓名'),
1454 ('公民身份号码', '公民身份号码'),
1455 ('出生年月', '出生年月'),
1456 ('住址', '住址'),
1457 ('性别', '性别'),
1458 ('民族', '民族'),)
1459 IC_FIELD_ORDER_3 = (('有效期限', '有效期限'), ('签发机关', '签发机关'),)
1460
1461 BC_FIELD_ORDER_2 = (('BankName', '发卡行名称'),
1462 ('CardNum', '银行卡号'),
1463 ('CardType', '银行卡类型'),)
1452 1464
......
1 import os 1 import os
2 import re 2 import re
3 import time 3 import time
4 import json
4 import shutil 5 import shutil
5 import base64 6 import base64
6 import signal 7 import signal
...@@ -16,7 +17,7 @@ from settings import conf ...@@ -16,7 +17,7 @@ from settings import conf
16 from common.mixins import LoggerMixin 17 from common.mixins import LoggerMixin
17 from common.tools.pdf_to_img import PDFHandler 18 from common.tools.pdf_to_img import PDFHandler
18 from apps.doc import consts 19 from apps.doc import consts
19 from apps.doc.exceptions import OCR1Exception, OCR4Exception, LTGTException 20 from apps.doc.exceptions import OCR1Exception, OCR2Exception, LTGTException
20 from apps.doc.ocr.wb import BSWorkbook 21 from apps.doc.ocr.wb import BSWorkbook
21 22
22 23
...@@ -69,8 +70,19 @@ class Command(BaseCommand, LoggerMixin): ...@@ -69,8 +70,19 @@ class Command(BaseCommand, LoggerMixin):
69 self.sleep_time = float(conf.SLEEP_SECOND_FOLDER) 70 self.sleep_time = float(conf.SLEEP_SECOND_FOLDER)
70 # input folder 71 # input folder
71 self.input_dirs = conf.get_namespace('LTGT_DIR_') 72 self.input_dirs = conf.get_namespace('LTGT_DIR_')
73 # seperate folder name
74 self.seperate_map = {
75 consts.IC_CLASSIFY: 'IDCard',
76 consts.BC_CLASSIFY: 'BankCard'
77 }
78 self.field_map = {
79 consts.VAT_CLASSIFY: (consts.VAT_CN_NAME, None, None, consts.VATS_FIELD_ORDER),
80 consts.IC_CLASSIFY: (consts.IC_CN_NAME, '有效期限', consts.IC_FIELD_ORDER_3, consts.IC_FIELD_ORDER_2),
81 consts.BC_CLASSIFY: (consts.BC_CN_NAME, None, None, consts.BC_FIELD_ORDER_2)
82 }
72 # ocr相关 83 # ocr相关
73 # self.ocr_url = conf.OCR_URL_FOLDER 84 self.ocr_url = conf.OCR_URL_FOLDER
85 self.ocr_url_2 = conf.OCR2_URL_FOLDER
74 # self.ocr_url_4 = conf.IC_URL 86 # self.ocr_url_4 = conf.IC_URL
75 self.ltgt_ocr_url = conf.LTGT_URL 87 self.ltgt_ocr_url = conf.LTGT_URL
76 # 优雅退出信号:15 88 # 优雅退出信号:15
...@@ -79,73 +91,60 @@ class Command(BaseCommand, LoggerMixin): ...@@ -79,73 +91,60 @@ class Command(BaseCommand, LoggerMixin):
79 def signal_handler(self, sig, frame): 91 def signal_handler(self, sig, frame):
80 self.switch = False # 停止处理文件 92 self.switch = False # 停止处理文件
81 93
82 def license1_process(self, ocr_data, license_summary, classify, img_path): 94 def license1_process(self, ocr_data, all_res, classify):
83 # 类别:'0'身份证, '1'居住证 95 # 类别:'0'身份证, '1'居住证
84 license_data = ocr_data.get('data', []) 96 license_data = ocr_data.get('data', [])
85 if not license_data: 97 if not license_data:
86 return 98 return
87 if classify == consts.MVC_CLASSIFY: # 车辆登记证 3/4页结果整合
88 for mvc_dict in license_data:
89 try:
90 mvc_page = mvc_dict.pop('page')
91 except Exception as e:
92 pass
93 else:
94 if mvc_page == 'VehicleRegArea':
95 mvc_res = mvc_dict.pop('results', {})
96 mvc_dict['机动车登记证书编号'] = mvc_res.get('register_no', {}).get('words', '')
97 for register_info in mvc_res.get('register_info', []):
98 for detail_dict in register_info.get('details', {}).values():
99 mvc_dict.setdefault(detail_dict.get('chinese_key', '未知'), []).append(
100 detail_dict.get('words', ''))
101 del mvc_res
102 if classify == consts.IC_CLASSIFY: 99 if classify == consts.IC_CLASSIFY:
103 for id_card_dict in license_data: 100 for id_card_dict in license_data:
104 try: 101 try:
105 base64_img = id_card_dict.pop('base64_img') 102 id_card_dict.pop('base64_img')
106 except Exception as e: 103 except Exception as e:
107 continue 104 continue
108 else: 105 all_res.append(license_data)
109 card_type = -1 106
110 json_data_4 = { 107 def license2_process(self, ocr_data, all_res, classify, img_path):
111 'mode': 1, 108 pid, _, _, _, _, _ = consts.LICENSE_CLASSIFY_MAPPING.get(classify)
112 'user_info': { 109 file_data = ocr_data.get('section_img')
113 'image_content': base64_img, 110 if file_data is None:
114 }, 111 with open(img_path, 'rb') as f:
115 'options': { 112 base64_data = base64.b64encode(f.read())
116 'distinguish_type': 1, 113 # 获取解码后的base64值
117 'auto_rotate': True, 114 file_data = base64_data.decode()
118 }, 115 json_data_2 = {
116 "pid": str(pid),
117 "filedata": file_data
119 } 118 }
119
120 for times in range(consts.RETRY_TIMES): 120 for times in range(consts.RETRY_TIMES):
121 try: 121 try:
122 start_time = time.time() 122 start_time = time.time()
123 ocr_4_response = requests.post(self.ocr_url_4, json=json_data_4) 123 ocr_2_response = requests.post(self.ocr_url_2, data=json_data_2)
124 if ocr_4_response.status_code != 200: 124 if ocr_2_response.status_code != 200:
125 raise OCR4Exception('ocr_4 status code: {0}'.format(ocr_4_response.status_code)) 125 raise OCR2Exception('ocr_2 status code: {0}'.format(ocr_2_response.status_code))
126 except Exception as e: 126 except Exception as e:
127 self.folder_log.warn( 127 self.folder_log.warn(
128 '{0} [ocr_4 failed] [times={1}] [img_path={2}] [error={3}]'.format( 128 '{0} [ocr_2 failed] [times={1}] [img_path={2}] [error={3}]'.format(
129 self.log_base, times, img_path, traceback.format_exc())) 129 self.log_base, times, img_path, traceback.format_exc()))
130 else: 130 else:
131 ocr_4_res = ocr_4_response.json() 131 ocr_res_2 = json.loads(ocr_2_response.text)
132 end_time = time.time() 132 end_time = time.time()
133 speed_time = int(end_time - start_time) 133 speed_time = int(end_time - start_time)
134
135 if ocr_4_res.get('code') == 0 and ocr_4_res.get('result', {}).get('rtn') == 0:
136 card_type = ocr_4_res.get('result', {}).get(
137 'idcard_distinguish_result', {}).get('result', -1)
138
139 self.folder_log.info( 134 self.folder_log.info(
140 '{0} [ocr_4 success] [img_path={1}] [speed_time={2}]'.format( 135 '{0} [ocr_2 success] [img={1}] [speed_time={2}]'.format(
141 self.log_base, img_path, speed_time)) 136 self.log_base, img_path, speed_time))
142 break
143 else:
144 self.folder_log.warn(
145 '{0} [ocr_4 failed] [img_path={1}]'.format(self.log_base, img_path))
146 137
147 id_card_dict[consts.IC_TURE_OR_FALSE] = consts.IC_RES_MAPPING.get(card_type) 138 if ocr_res_2.get('ErrorCode') in consts.SUCCESS_CODE_SET:
148 license_summary.setdefault(classify, []).extend(license_data) 139 if pid == consts.BC_PID:
140 all_res.append(ocr_res_2)
141 else:
142 # 营业执照等
143 for result_dict in ocr_res_2.get('ResultList', []):
144 res_dict = {}
145 for field_dict in result_dict.get('FieldList', []):
146 res_dict[field_dict.get('chn_key', '')] = field_dict.get('value', '')
147 all_res.append(res_dict)
149 148
150 @staticmethod 149 @staticmethod
151 def parse_img_path(img_path): 150 def parse_img_path(img_path):
...@@ -158,43 +157,38 @@ class Command(BaseCommand, LoggerMixin): ...@@ -158,43 +157,38 @@ class Command(BaseCommand, LoggerMixin):
158 return img_name, 1, 1 157 return img_name, 1, 1
159 158
160 @staticmethod 159 @staticmethod
161 def get_path(name, img_output_dir, wb_output_dir, pdf_output_dir): 160 def get_path(name, img_output_dir, wb_output_dir, pdf_output_dir, seperate_dir):
162 time_stamp = datetime.now().strftime('%Y-%m-%d_%H:%M:%S') 161 time_stamp = datetime.now().strftime('%Y-%m-%d_%H:%M:%S')
163 new_name = '{0}_{1}'.format(time_stamp, name) 162 new_name = '{0}_{1}'.format(time_stamp, name)
164 img_save_path = os.path.join(img_output_dir, new_name) 163 img_save_path = os.path.join(img_output_dir, new_name)
165 pdf_save_path = os.path.join(pdf_output_dir, new_name) 164 pdf_save_path = os.path.join(pdf_output_dir, new_name)
166 excel_name = '{0}.xlsx'.format(os.path.splitext(new_name)[0]) 165 excel_name = '{0}.xlsx'.format(os.path.splitext(new_name)[0])
167 excel_path = os.path.join(wb_output_dir, excel_name) 166 excel_path = os.path.join(wb_output_dir, excel_name)
168 return img_save_path, excel_path, pdf_save_path 167 seperate_path = None if seperate_dir is None else os.path.join(seperate_dir, new_name)
168 return img_save_path, excel_path, pdf_save_path, seperate_path
169 169
170 def res_process(self, all_res, classify, excel_path): 170 def res_process(self, all_res, excel_path, classify):
171 try: 171 try:
172 license_summary = {}
173
174 if not all_res:
175 return
176 else:
177 for img_path, ocr_res in all_res.items():
178 # img_name, pno, ino = self.parse_img_path(img_path)
179 # part_idx = 1
180
181 if isinstance(ocr_res, dict):
182 if ocr_res.get('code') == 1:
183 data_list = ocr_res.get('data', [])
184 if isinstance(data_list, list):
185 for ocr_data in data_list:
186 # part_idx = part_idx + 1
187 self.license1_process(ocr_data, license_summary, classify, img_path)
188
189 wb = BSWorkbook(set(), set(), set(), set(), set()) 172 wb = BSWorkbook(set(), set(), set(), set(), set())
190 wb.simple_license_rebuild(license_summary, consts.DOC_SCHEME_LIST[0]) 173 sheet_name, key_field, side_field_order, field_order = self.field_map.get(classify)
174 ws = wb.create_sheet(sheet_name)
175 for res in all_res:
176 if key_field is not None and key_field in res:
177 field_order = side_field_order
178 for search_field, write_field in field_order:
179 field_value = res.get(search_field, '')
180 if isinstance(field_value, list):
181 ws.append((write_field, *field_value))
182 else:
183 ws.append((write_field, field_value))
184 ws.append((None,))
191 wb.remove_base_sheet() 185 wb.remove_base_sheet()
192 wb.save(excel_path) 186 wb.save(excel_path)
193 except Exception as e: 187 except Exception as e:
194 self.folder_log.error('{0} [wb build error] [path={1}] [error={2}]'.format( 188 self.folder_log.error('{0} [wb build error] [path={1}] [error={2}]'.format(
195 self.log_base, excel_path, traceback.format_exc())) 189 self.log_base, excel_path, traceback.format_exc()))
196 190
197 def ocr_process(self, img_path, classify): 191 def ocr_process(self, img_path, classify, all_res, seperate_dir):
198 if os.path.exists(img_path): 192 if os.path.exists(img_path):
199 # TODO 图片验证 193 # TODO 图片验证
200 with open(img_path, 'rb') as f: 194 with open(img_path, 'rb') as f:
...@@ -203,8 +197,9 @@ class Command(BaseCommand, LoggerMixin): ...@@ -203,8 +197,9 @@ class Command(BaseCommand, LoggerMixin):
203 file_data = base64_data.decode() 197 file_data = base64_data.decode()
204 json_data = { 198 json_data = {
205 "file": file_data, 199 "file": file_data,
206 "classify": classify
207 } 200 }
201 if seperate_dir is None:
202 json_data["classify"] = classify
208 203
209 for times in range(consts.RETRY_TIMES): 204 for times in range(consts.RETRY_TIMES):
210 try: 205 try:
...@@ -221,7 +216,20 @@ class Command(BaseCommand, LoggerMixin): ...@@ -221,7 +216,20 @@ class Command(BaseCommand, LoggerMixin):
221 speed_time = int(end_time - start_time) 216 speed_time = int(end_time - start_time)
222 self.folder_log.info('{0} [ocr success] [img={1}] [res={2}] [speed_time={3}]'.format( 217 self.folder_log.info('{0} [ocr success] [img={1}] [res={2}] [speed_time={3}]'.format(
223 self.log_base, img_path, ocr_res, speed_time)) 218 self.log_base, img_path, ocr_res, speed_time))
224 return ocr_res 219
220 if isinstance(ocr_res, dict):
221 if ocr_res.get('code') == 1:
222 data_list = ocr_res.get('data', [])
223 if isinstance(data_list, list):
224 for ocr_data in data_list:
225 if ocr_data.get('classify') == classify:
226 if seperate_dir is not None:
227 os.makedirs(seperate_dir, exist_ok=True)
228 shutil.move(img_path, seperate_dir)
229 if classify in consts.LICENSE_CLASSIFY_SET_1:
230 self.license1_process(ocr_data, all_res, classify)
231 elif classify in consts.LICENSE_CLASSIFY_SET_2:
232 self.license2_process(ocr_data, all_res, classify, img_path)
225 else: 233 else:
226 self.folder_log.warn('{0} [ocr failed] [img_path={1}]'.format(self.log_base, img_path)) 234 self.folder_log.warn('{0} [ocr failed] [img_path={1}]'.format(self.log_base, img_path))
227 235
...@@ -280,18 +288,20 @@ class Command(BaseCommand, LoggerMixin): ...@@ -280,18 +288,20 @@ class Command(BaseCommand, LoggerMixin):
280 rebuild_res = self.ltgt_res_process(ocr_res, label, excel_path) 288 rebuild_res = self.ltgt_res_process(ocr_res, label, excel_path)
281 return rebuild_res 289 return rebuild_res
282 290
283 def images_process(self, img_path_list, classify, excel_path): 291 def images_process(self, img_path_list, classify, excel_path, seperate_dir):
284 all_res = {} 292 all_res = []
285 for img_path in img_path_list: 293 for img_path in img_path_list:
286 ocr_res = self.ocr_process(img_path, classify) 294 self.ocr_process(img_path, classify, all_res, seperate_dir)
287 all_res[img_path] = ocr_res 295 if len(all_res) > 0:
288 self.res_process(all_res, classify, excel_path) 296 self.res_process(all_res, excel_path, classify)
297 return all_res
289 298
290 def pdf_process(self, name, path, classify, img_output_dir, wb_output_dir, pdf_output_dir): 299 def pdf_process(self, name, path, classify, img_output_dir, wb_output_dir, pdf_output_dir, seperate_dir):
291 if os.path.exists(path): 300 if os.path.exists(path):
292 rebuild_res = None 301 rebuild_res = None
293 try: 302 try:
294 img_save_path, excel_path, pdf_save_path = self.get_path(name, img_output_dir, wb_output_dir, pdf_output_dir) 303 img_save_path, excel_path, pdf_save_path, seperate_path = self.get_path(
304 name, img_output_dir, wb_output_dir, pdf_output_dir, seperate_dir)
295 self.folder_log.info('{0} [pdf to img start] [path={1}]'.format(self.log_base, path)) 305 self.folder_log.info('{0} [pdf to img start] [path={1}]'.format(self.log_base, path))
296 pdf_handler = PDFHandler(path, img_save_path) 306 pdf_handler = PDFHandler(path, img_save_path)
297 if classify in self.ltgt_classify_mapping: 307 if classify in self.ltgt_classify_mapping:
...@@ -308,15 +318,16 @@ class Command(BaseCommand, LoggerMixin): ...@@ -308,15 +318,16 @@ class Command(BaseCommand, LoggerMixin):
308 rebuild_res = self.ltgt_process(pdf_handler.img_path_list, self.ltgt_classify_mapping[classify], 318 rebuild_res = self.ltgt_process(pdf_handler.img_path_list, self.ltgt_classify_mapping[classify],
309 excel_path, path) 319 excel_path, path)
310 else: 320 else:
311 self.images_process(pdf_handler.img_path_list, classify, excel_path) 321 rebuild_res = self.images_process(pdf_handler.img_path_list, classify, excel_path, seperate_path)
312 shutil.move(path, pdf_save_path) 322 shutil.move(path, pdf_save_path)
313 return rebuild_res 323 return rebuild_res
314 324
315 def tif_process(self, name, path, classify, img_output_dir, wb_output_dir, tiff_output_dir): 325 def tif_process(self, name, path, classify, img_output_dir, wb_output_dir, tiff_output_dir, seperate_dir):
316 if os.path.exists(path): 326 if os.path.exists(path):
317 rebuild_res = None 327 rebuild_res = None
318 try: 328 try:
319 img_save_path, excel_path, tiff_save_path = self.get_path(name, img_output_dir, wb_output_dir, tiff_output_dir) 329 img_save_path, excel_path, tiff_save_path, seperate_path = self.get_path(
330 name, img_output_dir, wb_output_dir, tiff_output_dir, seperate_dir)
320 self.folder_log.info('{0} [tiff to img start] [path={1}]'.format(self.log_base, path)) 331 self.folder_log.info('{0} [tiff to img start] [path={1}]'.format(self.log_base, path))
321 tiff_handler = TIFFHandler(path, img_save_path) 332 tiff_handler = TIFFHandler(path, img_save_path)
322 tiff_handler.extract_image() 333 tiff_handler.extract_image()
...@@ -330,14 +341,15 @@ class Command(BaseCommand, LoggerMixin): ...@@ -330,14 +341,15 @@ class Command(BaseCommand, LoggerMixin):
330 rebuild_res = self.ltgt_process(tiff_handler.img_path_list, self.ltgt_classify_mapping[classify], 341 rebuild_res = self.ltgt_process(tiff_handler.img_path_list, self.ltgt_classify_mapping[classify],
331 excel_path, path) 342 excel_path, path)
332 else: 343 else:
333 self.images_process(tiff_handler.img_path_list, classify, excel_path) 344 rebuild_res = self.images_process(tiff_handler.img_path_list, classify, excel_path, seperate_path)
334 shutil.move(path, tiff_save_path) 345 shutil.move(path, tiff_save_path)
335 return rebuild_res 346 return rebuild_res
336 347
337 def img_process(self, name, path, classify, wb_output_dir, img_output_dir, pdf_output_dir): 348 def img_process(self, name, path, classify, wb_output_dir, img_output_dir, pdf_output_dir, seperate_dir):
338 rebuild_res = None 349 rebuild_res = None
339 try: 350 try:
340 img_save_path, excel_path, _ = self.get_path(name, img_output_dir, wb_output_dir, pdf_output_dir) 351 img_save_path, excel_path, _, seperate_path = self.get_path(
352 name, img_output_dir, wb_output_dir, pdf_output_dir, seperate_dir)
341 except Exception as e: 353 except Exception as e:
342 self.folder_log.error('{0} [get path error] [path={1}] [error={2}]'.format( 354 self.folder_log.error('{0} [get path error] [path={1}] [error={2}]'.format(
343 self.log_base, path, traceback.format_exc())) 355 self.log_base, path, traceback.format_exc()))
...@@ -345,9 +357,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -345,9 +357,7 @@ class Command(BaseCommand, LoggerMixin):
345 if classify in self.ltgt_classify_mapping: 357 if classify in self.ltgt_classify_mapping:
346 rebuild_res = self.ltgt_process([path], self.ltgt_classify_mapping[classify], excel_path, path) 358 rebuild_res = self.ltgt_process([path], self.ltgt_classify_mapping[classify], excel_path, path)
347 else: 359 else:
348 ocr_res = self.ocr_process(path, classify) 360 rebuild_res = self.images_process([img_save_path], classify, excel_path, seperate_path)
349 all_res = {path: ocr_res}
350 self.res_process(all_res, classify, excel_path)
351 shutil.move(path, img_save_path) 361 shutil.move(path, img_save_path)
352 return rebuild_res 362 return rebuild_res
353 363
...@@ -380,7 +390,14 @@ class Command(BaseCommand, LoggerMixin): ...@@ -380,7 +390,14 @@ class Command(BaseCommand, LoggerMixin):
380 wb = Workbook() 390 wb = Workbook()
381 for result in result_list: 391 for result in result_list:
382 try: 392 try:
393 if result[self.CLASSIFY_KEY] in self.sheet_content:
383 sheet_name, head_fields = self.sheet_content[result[self.CLASSIFY_KEY]] 394 sheet_name, head_fields = self.sheet_content[result[self.CLASSIFY_KEY]]
395 else:
396 sheet_name, key_field, side_field_order, field_order = self.field_map[result[self.CLASSIFY_KEY]]
397 if key_field is not None and key_field in result[self.RESULT_KEY]:
398 head_fields = [b for _, b in side_field_order]
399 else:
400 head_fields = [b for _, b in field_order]
384 row = [] 401 row = []
385 for field in head_fields: 402 for field in head_fields:
386 row.append(result[self.RESULT_KEY].get(field)) 403 row.append(result[self.RESULT_KEY].get(field))
...@@ -395,7 +412,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -395,7 +412,7 @@ class Command(BaseCommand, LoggerMixin):
395 self.log_base, result, traceback.format_exc())) 412 self.log_base, result, traceback.format_exc()))
396 wb.save(wb_path) 413 wb.save(wb_path)
397 414
398 def folder_process(self, input_dir, classify, result_queue): 415 def folder_process(self, input_dir, classify, is_combined, result_queue):
399 while not os.path.isdir(input_dir): 416 while not os.path.isdir(input_dir):
400 self.folder_log.info('{0} [input dir is not dir] [input_dir={1}]'.format(self.log_base, input_dir)) 417 self.folder_log.info('{0} [input dir is not dir] [input_dir={1}]'.format(self.log_base, input_dir))
401 if self.switch: 418 if self.switch:
...@@ -404,6 +421,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -404,6 +421,7 @@ class Command(BaseCommand, LoggerMixin):
404 else: 421 else:
405 return 422 return
406 output_dir = os.path.join(os.path.dirname(input_dir), 'Output') 423 output_dir = os.path.join(os.path.dirname(input_dir), 'Output')
424 seperate_dir = os.path.join(output_dir, self.seperate_map.get(classify, 'Unknown')) if is_combined else None
407 img_output_dir = os.path.join(output_dir, 'image') 425 img_output_dir = os.path.join(output_dir, 'image')
408 wb_output_dir = os.path.join(output_dir, 'excel') 426 wb_output_dir = os.path.join(output_dir, 'excel')
409 pdf_output_dir = os.path.join(output_dir, 'pdf') 427 pdf_output_dir = os.path.join(output_dir, 'pdf')
...@@ -415,6 +433,8 @@ class Command(BaseCommand, LoggerMixin): ...@@ -415,6 +433,8 @@ class Command(BaseCommand, LoggerMixin):
415 os.makedirs(pdf_output_dir, exist_ok=True) 433 os.makedirs(pdf_output_dir, exist_ok=True)
416 os.makedirs(tiff_output_dir, exist_ok=True) 434 os.makedirs(tiff_output_dir, exist_ok=True)
417 os.makedirs(failed_output_dir, exist_ok=True) 435 os.makedirs(failed_output_dir, exist_ok=True)
436 if seperate_dir is not None:
437 os.makedirs(seperate_dir, exist_ok=True)
418 os_error_filename_set = set() 438 os_error_filename_set = set()
419 while self.switch: 439 while self.switch:
420 # if not os.path.isdir(input_dir): 440 # if not os.path.isdir(input_dir):
...@@ -438,14 +458,14 @@ class Command(BaseCommand, LoggerMixin): ...@@ -438,14 +458,14 @@ class Command(BaseCommand, LoggerMixin):
438 if os.path.isfile(path): 458 if os.path.isfile(path):
439 self.folder_log.info('{0} [file start] [path={1}]'.format(self.log_base, path)) 459 self.folder_log.info('{0} [file start] [path={1}]'.format(self.log_base, path))
440 if name.endswith('.pdf') or name.endswith('.PDF'): 460 if name.endswith('.pdf') or name.endswith('.PDF'):
441 result = self.pdf_process(name, path, classify, img_output_dir, 461 result = self.pdf_process(name, path, classify, img_output_dir, wb_output_dir,
442 wb_output_dir, pdf_output_dir) 462 pdf_output_dir, seperate_dir)
443 elif name.endswith('.tif') or name.endswith('.TIF'): 463 elif name.endswith('.tif') or name.endswith('.TIF'):
444 result = self.tif_process(name, path, classify, img_output_dir, 464 result = self.tif_process(name, path, classify, img_output_dir, wb_output_dir,
445 wb_output_dir, tiff_output_dir) 465 tiff_output_dir, seperate_dir)
446 else: 466 else:
447 result = self.img_process(name, path, classify, wb_output_dir, 467 result = self.img_process(name, path, classify, wb_output_dir, img_output_dir,
448 img_output_dir, pdf_output_dir) 468 pdf_output_dir, seperate_dir)
449 self.folder_log.info('{0} [file end] [path={1}]'.format(self.log_base, path)) 469 self.folder_log.info('{0} [file end] [path={1}]'.format(self.log_base, path))
450 else: 470 else:
451 result = None 471 result = None
...@@ -497,7 +517,8 @@ class Command(BaseCommand, LoggerMixin): ...@@ -497,7 +517,8 @@ class Command(BaseCommand, LoggerMixin):
497 if one_input_dir is None: 517 if one_input_dir is None:
498 one_input_dir = input_dir 518 one_input_dir = input_dir
499 classify = int(classify_idx.split('_')[0]) 519 classify = int(classify_idx.split('_')[0])
500 process = Process(target=self.folder_process, args=(input_dir, classify, result_queue)) 520 is_combined = True if int(classify_idx.split('_')[2]) == 1 else False
521 process = Process(target=self.folder_process, args=(input_dir, classify, is_combined, result_queue))
501 process_list.append(process) 522 process_list.append(process)
502 523
503 wb_dir = os.path.dirname(os.path.dirname(one_input_dir)) 524 wb_dir = os.path.dirname(os.path.dirname(one_input_dir))
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!