6966f069 by 周伟奇

Merge branch 'feature/zip'

2 parents 7cf03ec9 6010c32f
...@@ -16,7 +16,7 @@ from multiprocessing import Process, Queue, Manager, Lock ...@@ -16,7 +16,7 @@ from multiprocessing import Process, Queue, Manager, Lock
16 16
17 from settings import conf 17 from settings import conf
18 from common.mixins import LoggerMixin 18 from common.mixins import LoggerMixin
19 from common.tools.file_tools import write_zip_file 19 from common.tools.file_tools import get_pwd_list_from_str, extract_zip_or_rar, get_file_paths
20 from common.tools.pdf_to_img import PDFHandler 20 from common.tools.pdf_to_img import PDFHandler
21 from common.electronic_afc_contract.afc_contract_ocr import predict as afc_predict 21 from common.electronic_afc_contract.afc_contract_ocr import predict as afc_predict
22 from common.electronic_hil_contract.hil_contract_ocr import predict as hil_predict 22 from common.electronic_hil_contract.hil_contract_ocr import predict as hil_predict
...@@ -89,14 +89,39 @@ class Command(BaseCommand, LoggerMixin): ...@@ -89,14 +89,39 @@ class Command(BaseCommand, LoggerMixin):
89 # doc = doc_class.objects.filter(id=doc_id).first() 89 # doc = doc_class.objects.filter(id=doc_id).first()
90 # return doc, business_type 90 # return doc, business_type
91 91
92 def get_doc_info(self): 92 def get_zip_doc_info(self, task_str):
93 task_str, is_priority = rh.dequeue() 93 try:
94 if task_str is None: 94 info_tuple = task_str.split(consts.SPLIT_STR)
95 self.online_log.info('{0} [get_doc_info] [queue empty]'.format(self.log_base)) 95 if len(info_tuple) == 2:
96 return None, None, None, None 96 business_type, doc_id_str = info_tuple
97 else:
98 business_type, doc_id_str, classify_1_str = info_tuple
99 doc_id = int(doc_id_str)
100 doc_class = HILDoc if business_type == consts.HIL_PREFIX else AFCDoc
101 zip_doc = doc_class.objects.filter(id=doc_id).first()
97 102
98 self.online_log.info('{0} [get_doc_info] [task={1}] [is_priority={2}]'.format( 103 if zip_doc is None:
99 self.log_base, task_str, is_priority)) 104 self.online_log.warn('{0} [zip_2_pdfs] [doc not exist] [task_str={1}]'.format(
105 self.log_base, task_str))
106 return None, business_type
107 elif zip_doc.status != DocStatus.INIT.value:
108 self.online_log.warn('{0} [zip_2_pdfs] [doc status error] [task_str={1}] [doc_status={2}]'.format(
109 self.log_base, task_str, zip_doc.status))
110 return None, business_type
111
112 zip_doc.status = DocStatus.PROCESSING.value
113 zip_doc.start_time = timezone.now()
114 zip_doc.save()
115 except Exception as e:
116 self.online_log.error('{0} [process error (zip_2_pdfs)] [error={1}]'.format(
117 self.log_base, traceback.format_exc()))
118 return None, None
119 else:
120 self.online_log.info('{0} [zip_2_pdfs] [db save end] [task_str={1}]'.format(
121 self.log_base, task_str))
122 return zip_doc, business_type
123
124 def get_doc_info(self, task_str, is_priority=False):
100 try: 125 try:
101 # doc, business_type = self.get_doc_object(task_str) 126 # doc, business_type = self.get_doc_object(task_str)
102 info_tuple = task_str.split(consts.SPLIT_STR) 127 info_tuple = task_str.split(consts.SPLIT_STR)
...@@ -1094,11 +1119,153 @@ class Command(BaseCommand, LoggerMixin): ...@@ -1094,11 +1119,153 @@ class Command(BaseCommand, LoggerMixin):
1094 # summary['confidence'] = max(summary['confidence']) 1119 # summary['confidence'] = max(summary['confidence'])
1095 return merged_bs_summary 1120 return merged_bs_summary
1096 1121
1097 def pdf_2_img_2_queue(self, img_queue, todo_count_dict, lock, error_list, res_dict, finish_queue): 1122 def zip_2_pdfs(self, zip_task_queue, error_list):
1123 while len(error_list) == 0:
1124 # 1. 从redis队列中读取任务: AFC_111_0
1125 task_str = rh.dequeue_zip()
1126 if task_str is None:
1127 self.online_log.info('{0} [zip_2_pdfs] [zip queue empty]'.format(self.log_base))
1128 time.sleep(self.sleep_time_doc_get)
1129 continue
1130
1131 self.online_log.info('{0} [zip_2_pdfs] [task={1}]'.format(self.log_base, task_str))
1132
1133 # 2. 修改doc状态: 识别中
1134 zip_doc, business_type = self.get_zip_doc_info(task_str)
1135 if zip_doc is None:
1136 time.sleep(self.sleep_time_doc_get)
1137 continue
1138
1139 # 3. 从ECM下载压缩包
1140 doc_data_path = os.path.join(self.data_dir, business_type, consts.TMP_DIR_NAME, str(zip_doc.id))
1141 os.makedirs(doc_data_path, exist_ok=True)
1142 zip_path = os.path.join(doc_data_path, zip_doc.document_name)
1143 for times in range(consts.RETRY_TIMES):
1144 try:
1145 self.edms.download(zip_path, zip_doc.metadata_version_id, zip_doc.document_scheme, business_type)
1146 except Exception as e:
1147 self.online_log.warn('{0} [zip_2_pdfs] [ecm download failed] [task={1}] [times={2}] '
1148 '[error={3}]'.format(self.log_base, task_str, times,
1149 traceback.format_exc()))
1150 else:
1151 self.online_log.info('{0} [zip_2_pdfs] [ecm download success] [task={1}] [times={2}] '
1152 '[zip_path={3}]'.format(self.log_base, task_str, times, zip_path))
1153 break
1154 else:
1155 try:
1156 zip_doc.status = DocStatus.PROCESS_FAILED.value
1157 zip_doc.save()
1158 except Exception as e:
1159 self.online_log.error('{0} [zip_2_pdfs] [process error (db save)] [task={1}] [error={2}]'.format(
1160 self.log_base, task_str, traceback.format_exc()))
1161 time.sleep(self.sleep_time_doc_get)
1162 continue
1163
1164 # 4. 解压
1165 extract_path = os.path.join(doc_data_path, 'extract_content')
1166 os.makedirs(extract_path, exist_ok=True)
1167 try:
1168 pwd_list = get_pwd_list_from_str(zip_doc.document_name)
1169 is_success = extract_zip_or_rar(zip_path, extract_path, pwd_list)
1170 except Exception as e:
1171 is_success = False
1172
1173 if not is_success:
1174 self.online_log.warn('{0} [zip_2_pdfs] [extract failed] [task={1}] [error={2}]'.format(
1175 self.log_base, task_str, traceback.format_exc()))
1176 try:
1177 zip_doc.status = DocStatus.PROCESS_FAILED.value
1178 zip_doc.save()
1179 except Exception as e:
1180 self.online_log.error('{0} [zip_2_pdfs] [process error (db save)] [task={1}] [error={2}]'.format(
1181 self.log_base, task_str, traceback.format_exc()))
1182 time.sleep(self.sleep_time_doc_get)
1183 continue
1184
1185 self.online_log.info('{0} [zip_2_pdfs] [extract success] [task={1}] [extract_path={2}]'.format(
1186 self.log_base, task_str, extract_path))
1187
1188 # 5. 找出PDF文件重命名并移动到目标文件夹中。新建doc记录,新建task_str进入队列
1189 pdf_paths = get_file_paths(extract_path, ['.pdf', '.PDF'])
1190 count = 0
1191 pdf_task_str_list = []
1192 for pdf_path in pdf_paths:
1193 if count > 50:
1194 self.online_log.info('{0} [zip_2_pdfs] [pdf count > 50, skip] [task={1}]'.format(
1195 self.log_base, task_str))
1196 break
1197
1198 count += 1
1199 try:
1200 doc_class = HILDoc if business_type == consts.HIL_PREFIX else AFCDoc
1201 pdf_doc = doc_class.objects.create(
1202 metadata_version_id='from: {0}'.format(zip_doc.id),
1203 application_id=zip_doc.application_id,
1204 # main_applicant=applicant_data.get('mainApplicantName'),
1205 # co_applicant=applicant_data.get('coApplicantName'),
1206 # guarantor_1=applicant_data.get('guarantor1Name'),
1207 # guarantor_2=applicant_data.get('guarantor2Name'),
1208 document_name=os.path.basename(pdf_path),
1209 document_scheme=zip_doc.document_scheme,
1210 data_source=zip_doc.data_source,
1211 upload_finish_time=zip_doc.upload_finish_time,
1212 )
1213
1214 pdf_doc_data_path = os.path.join(self.data_dir, business_type, consts.TMP_DIR_NAME, str(pdf_doc.id))
1215 os.makedirs(pdf_doc_data_path, exist_ok=True)
1216 target_pdf_path = os.path.join(pdf_doc_data_path, '{0}.pdf'.format(pdf_doc.id))
1217 shutil.move(pdf_path, target_pdf_path)
1218
1219 pdf_task_str = consts.SPLIT_STR.join([business_type, str(pdf_doc.id), '0'])
1220 pdf_task_str_list.append(pdf_task_str)
1221 except Exception as e:
1222 self.online_log.warn('{0} [zip_2_pdfs] [recreate pdf task failed] [task={1}] [pdf_path={2}]'
1223 ' [error={3}]'.format(self.log_base, task_str, pdf_path,
1224 traceback.format_exc()))
1225 else:
1226 self.online_log.info('{0} [zip_2_pdfs] [recreate pdf task success] [task={1}] '
1227 '[pdf_task={2}]'.format(self.log_base, task_str, pdf_path,
1228 traceback.format_exc()))
1229
1230 if len(pdf_task_str_list) > 0:
1231 for pdf_task_str in pdf_task_str_list:
1232 try:
1233 zip_task_queue.put(pdf_task_str)
1234 except Exception as e:
1235 self.online_log.warn('{0} [zip_2_pdfs] [put pdf task failed] [task={1}] [pdf_task={2}]'
1236 ' [error={3}]'.format(self.log_base, task_str, pdf_task_str,
1237 traceback.format_exc()))
1238 else:
1239 self.online_log.info('{0} [zip_2_pdfs] [zip task no pdf] [task={1}]'.format(self.log_base, task_str))
1240
1241 # 6. 完成,修改doc状态:识别完成
1242 try:
1243 zip_doc.status = DocStatus.COMPLETE.value
1244 zip_doc.end_time = timezone.now()
1245 zip_doc.duration = min((zip_doc.end_time - zip_doc.start_time).seconds, 32760)
1246 zip_doc.save()
1247 except Exception as e:
1248 self.online_log.error('{0} [zip_2_pdfs] [process error (db save)] [task={1}] [error={2}]'.format(
1249 self.log_base, task_str, traceback.format_exc()))
1250
1251 def pdf_2_img_2_queue(self, img_queue, todo_count_dict, lock, error_list, res_dict, finish_queue, zip_task_queue):
1098 while self.switch: 1252 while self.switch:
1099 try: 1253 try:
1254 task_str = zip_task_queue.get(block=False)
1255 is_priority = False
1256 except Exception as e:
1257 task_str, is_priority = rh.dequeue()
1258 if task_str is None:
1259 self.online_log.info('{0} [get_doc_info] [queue empty]'.format(self.log_base))
1260 time.sleep(self.sleep_time_doc_get)
1261 continue
1262
1263 self.online_log.info('{0} [get_doc_info] [task={1}] [is_priority={2}]'.format(
1264 self.log_base, task_str, is_priority))
1265
1266 try:
1100 # 1. 从队列获取文件信息 1267 # 1. 从队列获取文件信息
1101 doc, business_type, task_str, classify_1_str = self.get_doc_info() 1268 doc, business_type, task_str, classify_1_str = self.get_doc_info(task_str, is_priority)
1102 # 队列为空时的处理 1269 # 队列为空时的处理
1103 if doc is None: 1270 if doc is None:
1104 time.sleep(self.sleep_time_doc_get) 1271 time.sleep(self.sleep_time_doc_get)
...@@ -1119,19 +1286,29 @@ class Command(BaseCommand, LoggerMixin): ...@@ -1119,19 +1286,29 @@ class Command(BaseCommand, LoggerMixin):
1119 if classify_1_str == '0': 1286 if classify_1_str == '0':
1120 try: 1287 try:
1121 # 2. 从EDMS获取PDF文件 1288 # 2. 从EDMS获取PDF文件
1122 max_count_obj = Configs.objects.filter(id=2).first() 1289 # max_count_obj = Configs.objects.filter(id=2).first()
1123 try: 1290 # try:
1124 max_img_count = int(max_count_obj.value) 1291 # max_img_count = int(max_count_obj.value)
1125 except Exception as e: 1292 # except Exception as e:
1126 max_img_count = 500 1293 max_img_count = 500
1127 1294
1128 for times in range(consts.RETRY_TIMES): 1295 for times in range(consts.RETRY_TIMES):
1129 try: 1296 try:
1130 if not doc.application_id.startswith(consts.FIXED_APPLICATION_ID_PREFIX): 1297 if doc.application_id.startswith(consts.FIXED_APPLICATION_ID_PREFIX):
1298 self.online_log.info('{0} [mo ni xia dan] [task={1}] [times={2}] '
1299 '[pdf_path={3}]'.format(self.log_base, task_str,
1300 times, pdf_path))
1301 elif os.path.exists(pdf_path):
1302 self.online_log.info('{0} [pdf from zip file] [task={1}] [times={2}] '
1303 '[pdf_path={3}]'.format(self.log_base, task_str,
1304 times, pdf_path))
1305 else:
1131 # self.edms.download(pdf_path, doc.metadata_version_id) 1306 # self.edms.download(pdf_path, doc.metadata_version_id)
1132 self.edms.download(pdf_path, doc.metadata_version_id, doc.document_scheme, business_type) 1307 self.edms.download(pdf_path, doc.metadata_version_id, doc.document_scheme,
1133 self.online_log.info('{0} [edms download success] [task={1}] [times={2}] ' 1308 business_type)
1134 '[pdf_path={3}]'.format(self.log_base, task_str, times, pdf_path)) 1309 self.online_log.info('{0} [ecm download success] [task={1}] [times={2}] '
1310 '[pdf_path={3}]'.format(self.log_base, task_str,
1311 times, pdf_path))
1135 1312
1136 # 3.PDF文件提取图片 1313 # 3.PDF文件提取图片
1137 self.online_log.info('{0} [pdf to img start] [task={1}] [times={2}]'.format( 1314 self.online_log.info('{0} [pdf to img start] [task={1}] [times={2}]'.format(
...@@ -2098,9 +2275,16 @@ class Command(BaseCommand, LoggerMixin): ...@@ -2098,9 +2275,16 @@ class Command(BaseCommand, LoggerMixin):
2098 res_dict = manager.dict() 2275 res_dict = manager.dict()
2099 img_queue = Queue(self.img_queue_size) 2276 img_queue = Queue(self.img_queue_size)
2100 finish_queue = Queue() 2277 finish_queue = Queue()
2278 zip_task_queue = Queue()
2101 2279
2102 process_list = [] 2280 process_list = []
2103 pdf_process = Process(target=self.pdf_2_img_2_queue, args=(img_queue, todo_count_dict, lock, error_list, res_dict, finish_queue)) 2281 zip_process = Process(target=self.zip_2_pdfs,
2282 args=(zip_task_queue, error_list))
2283 process_list.append(zip_process)
2284
2285 pdf_process = Process(target=self.pdf_2_img_2_queue,
2286 args=(img_queue, todo_count_dict, lock, error_list, res_dict,
2287 finish_queue, zip_task_queue))
2104 process_list.append(pdf_process) 2288 process_list.append(pdf_process)
2105 2289
2106 for url in self.ocr_1_urls.values(): 2290 for url in self.ocr_1_urls.values():
......
...@@ -570,12 +570,11 @@ class UploadDocView(GenericView, DocHandler): ...@@ -570,12 +570,11 @@ class UploadDocView(GenericView, DocHandler):
570 data_source = self.fix_data_source(data_source) 570 data_source = self.fix_data_source(data_source)
571 document_scheme = self.fix_scheme(document_scheme) 571 document_scheme = self.fix_scheme(document_scheme)
572 572
573 if document_name.endswith('.zip'): 573 # if document_name.endswith('.zip'):
574 self.running_log.info('[doc upload success] [zip file skip] [args={0}]'.format(args)) 574 # self.running_log.info('[doc upload success] [zip file skip] [args={0}]'.format(args))
575 return response.ok() 575 # return response.ok()
576 576
577 if data_source == consts.DATA_SOURCE_LIST[1]: 577 if data_source == consts.DATA_SOURCE_LIST[1]:
578 if isinstance(document_name, str):
579 if document_name.endswith('-证书.pdf') or document_name.endswith('-证书'): 578 if document_name.endswith('-证书.pdf') or document_name.endswith('-证书'):
580 self.running_log.info('[doc upload success] [eapp license skip] [args={0}]'.format(args)) 579 self.running_log.info('[doc upload success] [eapp license skip] [args={0}]'.format(args))
581 return response.ok() 580 return response.ok()
...@@ -594,17 +593,24 @@ class UploadDocView(GenericView, DocHandler): ...@@ -594,17 +593,24 @@ class UploadDocView(GenericView, DocHandler):
594 data_source=data_source, 593 data_source=data_source,
595 upload_finish_time=document.get('uploadFinishTime'), 594 upload_finish_time=document.get('uploadFinishTime'),
596 ) 595 )
596
597 # 3. 选择队列进入 597 # 3. 选择队列进入
598 is_priority = PriorityApplication.objects.filter(application_id=application_id, on_off=True).exists() 598 is_priority = PriorityApplication.objects.filter(application_id=application_id, on_off=True).exists()
599 is_zip = False
599 600
600 classify_1 = 0 601 classify_1 = 0
602 # 电子合同
601 if data_source == consts.DATA_SOURCE_LIST[-1] and document_scheme == consts.DOC_SCHEME_LIST[1]: 603 if data_source == consts.DATA_SOURCE_LIST[-1] and document_scheme == consts.DOC_SCHEME_LIST[1]:
602 for keyword, classify_1_tmp in consts.ECONTRACT_KEYWORDS_MAP.get(prefix): 604 for keyword, classify_1_tmp in consts.ECONTRACT_KEYWORDS_MAP.get(prefix):
603 if keyword in document_name: 605 if keyword in document_name:
604 classify_1 = classify_1_tmp 606 classify_1 = classify_1_tmp
605 break 607 break
608 elif document_name.endswith('.zip') or document_name.endswith('.rar') or document_name.endswith('.ZIP') \
609 or document_name.endswith('.RAR'):
610 is_zip = True
611
606 task = consts.SPLIT_STR.join([prefix, str(doc.id), str(classify_1)]) 612 task = consts.SPLIT_STR.join([prefix, str(doc.id), str(classify_1)])
607 enqueue_res = rh.enqueue([task], is_priority) 613 enqueue_res = rh.enqueue([task], is_priority, is_zip)
608 self.running_log.info('[doc upload success] [args={0}] [business_type={1}] [doc_id={2}] ' 614 self.running_log.info('[doc upload success] [args={0}] [business_type={1}] [doc_id={2}] '
609 '[is_priority={3}] [enqueue_res={4}]'.format(args, prefix, doc.id, 615 '[is_priority={3}] [enqueue_res={4}]'.format(args, prefix, doc.id,
610 is_priority, enqueue_res)) 616 is_priority, enqueue_res))
...@@ -669,7 +675,7 @@ class PriorityDocView(GenericView, DocHandler): ...@@ -669,7 +675,7 @@ class PriorityDocView(GenericView, DocHandler):
669 self.running_log.info( 675 self.running_log.info(
670 '[priority doc success] [args={0}]'.format(args)) 676 '[priority doc success] [args={0}]'.format(args))
671 else: 677 else:
672 enqueue_res = rh.enqueue(tasks_list, is_priority=True) 678 enqueue_res = rh.enqueue(tasks_list, is_priority=True) # TODO 可能把压缩文件放入优先队列
673 self.running_log.info('[priority doc success] [args={0}] [tasks_list={1}] [enqueue_res={2}]'.format( 679 self.running_log.info('[priority doc success] [args={0}] [tasks_list={1}] [enqueue_res={2}]'.format(
674 args, tasks_list, enqueue_res)) 680 args, tasks_list, enqueue_res))
675 return response.ok() 681 return response.ok()
......
...@@ -35,16 +35,27 @@ class RedisHandler: ...@@ -35,16 +35,27 @@ class RedisHandler:
35 self.prefix = 'bwm_ocr' 35 self.prefix = 'bwm_ocr'
36 self.common_queue_key = '{0}:common_queue'.format(self.prefix) 36 self.common_queue_key = '{0}:common_queue'.format(self.prefix)
37 self.priority_queue_key = '{0}:priority_queue'.format(self.prefix) 37 self.priority_queue_key = '{0}:priority_queue'.format(self.prefix)
38 self.zip_queue_key = '{0}:zip_queue'.format(self.prefix)
38 self.session_id_key = '{0}:session_id'.format(self.prefix) 39 self.session_id_key = '{0}:session_id'.format(self.prefix)
39 self.cms_token_key = '{0}:cms_token'.format(self.prefix) 40 self.cms_token_key = '{0}:cms_token'.format(self.prefix)
40 self.ecm_token_key = '{0}:ecm_token'.format(self.prefix) 41 self.ecm_token_key = '{0}:ecm_token'.format(self.prefix)
41 self.login_limit_key = '{0}:login_limit'.format(self.prefix) 42 self.login_limit_key = '{0}:login_limit'.format(self.prefix)
42 43
43 def enqueue(self, tasks, is_priority=False): 44 def enqueue(self, tasks, is_priority=False, is_zip=False):
44 # 1 45 # 1
45 key = self.priority_queue_key if is_priority else self.common_queue_key 46 if is_zip:
47 key = self.zip_queue_key
48 elif is_priority:
49 key = self.priority_queue_key
50 else:
51 key = self.common_queue_key
46 return self.redis.lpush(key, tasks) 52 return self.redis.lpush(key, tasks)
47 53
54 def dequeue_zip(self):
55 # task or None
56 task = self.redis.rpop(self.zip_queue_key)
57 return task
58
48 def dequeue(self): 59 def dequeue(self):
49 # task or None 60 # task or None
50 task = self.redis.rpop(self.priority_queue_key) 61 task = self.redis.rpop(self.priority_queue_key)
......
1 import os 1 import os
2 import re
3 import zipfile
4
5 import rarfile
2 from zipfile import ZipFile 6 from zipfile import ZipFile
3 7
4 8
...@@ -18,3 +22,77 @@ def write_zip_file(dir_name, zipfile_path): ...@@ -18,3 +22,77 @@ def write_zip_file(dir_name, zipfile_path):
18 src_file_path = os.path.join(root, single_file) 22 src_file_path = os.path.join(root, single_file)
19 file_target_path = os.path.join(root_target_path, single_file) 23 file_target_path = os.path.join(root_target_path, single_file)
20 z.write(src_file_path, file_target_path) 24 z.write(src_file_path, file_target_path)
25
26
27 def get_pwd_list_from_str(doc_name):
28 try:
29 pwd_list = re.findall(r'\d{6}', doc_name)
30 return pwd_list
31 except Exception as e:
32 return []
33
34
35 def extract_zip_or_rar(file_path, extract_path, pwd_list=[]):
36 if file_path.endswith('.zip') or file_path.endswith('.ZIP'):
37 if len(pwd_list) > 0:
38 for password in pwd_list:
39 try:
40 with zipfile.ZipFile(file_path) as zf:
41 zf.extractall(extract_path, pwd=bytes(password, 'utf-8'))
42 except Exception as e:
43 continue
44 else:
45 return True
46 else:
47 return False
48 else:
49 try:
50 with zipfile.ZipFile(file_path) as zf:
51 zf.extractall(extract_path)
52 except Exception as e:
53 return False
54 else:
55 return True
56 elif file_path.endswith('.rar') or file_path.endswith('.RAR'):
57 if len(pwd_list) > 0:
58 for password in pwd_list:
59 try:
60 with rarfile.RarFile(file_path) as rf:
61 rf.extractall(extract_path, pwd=password)
62 except Exception as e:
63 continue
64 else:
65 return True
66 else:
67 return False
68 else:
69 try:
70 with rarfile.RarFile(file_path) as rf:
71 rf.extractall(extract_path)
72 except Exception as e:
73 return False
74 else:
75 return True
76 else:
77 return False
78
79
80 def get_file_paths(input_path, suffix_list):
81 """
82
83 Args:
84 input_path: str 目标目录
85 suffix_list: list 搜索的文件的后缀列表
86
87 Returns: list 搜索到的相关文件绝对路径列表
88
89 """
90 for parent, _, filenames in os.walk(input_path):
91 for filename in filenames:
92 for suffix in suffix_list:
93 if filename.endswith(suffix):
94 file_path = os.path.join(parent, filename)
95 break
96 else:
97 continue
98 yield file_path
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!