6010c32f by 周伟奇

add zip & rar file

1 parent f80b8302
...@@ -566,15 +566,14 @@ class UploadDocView(GenericView, DocHandler): ...@@ -566,15 +566,14 @@ class UploadDocView(GenericView, DocHandler):
566 data_source = self.fix_data_source(data_source) 566 data_source = self.fix_data_source(data_source)
567 document_scheme = self.fix_scheme(document_scheme) 567 document_scheme = self.fix_scheme(document_scheme)
568 568
569 if document_name.endswith('.zip'): 569 # if document_name.endswith('.zip'):
570 self.running_log.info('[doc upload success] [zip file skip] [args={0}]'.format(args)) 570 # self.running_log.info('[doc upload success] [zip file skip] [args={0}]'.format(args))
571 return response.ok() 571 # return response.ok()
572 572
573 if data_source == consts.DATA_SOURCE_LIST[1]: 573 if data_source == consts.DATA_SOURCE_LIST[1]:
574 if isinstance(document_name, str): 574 if document_name.endswith('-证书.pdf') or document_name.endswith('-证书'):
575 if document_name.endswith('-证书.pdf') or document_name.endswith('-证书'): 575 self.running_log.info('[doc upload success] [eapp license skip] [args={0}]'.format(args))
576 self.running_log.info('[doc upload success] [eapp license skip] [args={0}]'.format(args)) 576 return response.ok()
577 return response.ok()
578 577
579 # 2. 根据业务类型分库存储 578 # 2. 根据业务类型分库存储
580 doc_class, prefix = self.get_doc_class(business_type) 579 doc_class, prefix = self.get_doc_class(business_type)
...@@ -590,17 +589,24 @@ class UploadDocView(GenericView, DocHandler): ...@@ -590,17 +589,24 @@ class UploadDocView(GenericView, DocHandler):
590 data_source=data_source, 589 data_source=data_source,
591 upload_finish_time=document.get('uploadFinishTime'), 590 upload_finish_time=document.get('uploadFinishTime'),
592 ) 591 )
592
593 # 3. 选择队列进入 593 # 3. 选择队列进入
594 is_priority = PriorityApplication.objects.filter(application_id=application_id, on_off=True).exists() 594 is_priority = PriorityApplication.objects.filter(application_id=application_id, on_off=True).exists()
595 595 is_zip = False
596
596 classify_1 = 0 597 classify_1 = 0
598 # 电子合同
597 if data_source == consts.DATA_SOURCE_LIST[-1] and document_scheme == consts.DOC_SCHEME_LIST[1]: 599 if data_source == consts.DATA_SOURCE_LIST[-1] and document_scheme == consts.DOC_SCHEME_LIST[1]:
598 for keyword, classify_1_tmp in consts.ECONTRACT_KEYWORDS_MAP.get(prefix): 600 for keyword, classify_1_tmp in consts.ECONTRACT_KEYWORDS_MAP.get(prefix):
599 if keyword in document_name: 601 if keyword in document_name:
600 classify_1 = classify_1_tmp 602 classify_1 = classify_1_tmp
601 break 603 break
604 elif document_name.endswith('.zip') or document_name.endswith('.rar') or document_name.endswith('.ZIP') \
605 or document_name.endswith('.RAR'):
606 is_zip = True
607
602 task = consts.SPLIT_STR.join([prefix, str(doc.id), str(classify_1)]) 608 task = consts.SPLIT_STR.join([prefix, str(doc.id), str(classify_1)])
603 enqueue_res = rh.enqueue([task], is_priority) 609 enqueue_res = rh.enqueue([task], is_priority, is_zip)
604 self.running_log.info('[doc upload success] [args={0}] [business_type={1}] [doc_id={2}] ' 610 self.running_log.info('[doc upload success] [args={0}] [business_type={1}] [doc_id={2}] '
605 '[is_priority={3}] [enqueue_res={4}]'.format(args, prefix, doc.id, 611 '[is_priority={3}] [enqueue_res={4}]'.format(args, prefix, doc.id,
606 is_priority, enqueue_res)) 612 is_priority, enqueue_res))
...@@ -665,7 +671,7 @@ class PriorityDocView(GenericView, DocHandler): ...@@ -665,7 +671,7 @@ class PriorityDocView(GenericView, DocHandler):
665 self.running_log.info( 671 self.running_log.info(
666 '[priority doc success] [args={0}]'.format(args)) 672 '[priority doc success] [args={0}]'.format(args))
667 else: 673 else:
668 enqueue_res = rh.enqueue(tasks_list, is_priority=True) 674 enqueue_res = rh.enqueue(tasks_list, is_priority=True) # TODO 可能把压缩文件放入优先队列
669 self.running_log.info('[priority doc success] [args={0}] [tasks_list={1}] [enqueue_res={2}]'.format( 675 self.running_log.info('[priority doc success] [args={0}] [tasks_list={1}] [enqueue_res={2}]'.format(
670 args, tasks_list, enqueue_res)) 676 args, tasks_list, enqueue_res))
671 return response.ok() 677 return response.ok()
......
...@@ -35,16 +35,27 @@ class RedisHandler: ...@@ -35,16 +35,27 @@ class RedisHandler:
35 self.prefix = 'bwm_ocr' 35 self.prefix = 'bwm_ocr'
36 self.common_queue_key = '{0}:common_queue'.format(self.prefix) 36 self.common_queue_key = '{0}:common_queue'.format(self.prefix)
37 self.priority_queue_key = '{0}:priority_queue'.format(self.prefix) 37 self.priority_queue_key = '{0}:priority_queue'.format(self.prefix)
38 self.zip_queue_key = '{0}:zip_queue'.format(self.prefix)
38 self.session_id_key = '{0}:session_id'.format(self.prefix) 39 self.session_id_key = '{0}:session_id'.format(self.prefix)
39 self.cms_token_key = '{0}:cms_token'.format(self.prefix) 40 self.cms_token_key = '{0}:cms_token'.format(self.prefix)
40 self.ecm_token_key = '{0}:ecm_token'.format(self.prefix) 41 self.ecm_token_key = '{0}:ecm_token'.format(self.prefix)
41 self.login_limit_key = '{0}:login_limit'.format(self.prefix) 42 self.login_limit_key = '{0}:login_limit'.format(self.prefix)
42 43
43 def enqueue(self, tasks, is_priority=False): 44 def enqueue(self, tasks, is_priority=False, is_zip=False):
44 # 1 45 # 1
45 key = self.priority_queue_key if is_priority else self.common_queue_key 46 if is_zip:
47 key = self.zip_queue_key
48 elif is_priority:
49 key = self.priority_queue_key
50 else:
51 key = self.common_queue_key
46 return self.redis.lpush(key, tasks) 52 return self.redis.lpush(key, tasks)
47 53
54 def dequeue_zip(self):
55 # task or None
56 task = self.redis.rpop(self.zip_queue_key)
57 return task
58
48 def dequeue(self): 59 def dequeue(self):
49 # task or None 60 # task or None
50 task = self.redis.rpop(self.priority_queue_key) 61 task = self.redis.rpop(self.priority_queue_key)
......
1 import os 1 import os
2 import re
3 import zipfile
4
5 import rarfile
2 from zipfile import ZipFile 6 from zipfile import ZipFile
3 7
4 8
...@@ -18,3 +22,77 @@ def write_zip_file(dir_name, zipfile_path): ...@@ -18,3 +22,77 @@ def write_zip_file(dir_name, zipfile_path):
18 src_file_path = os.path.join(root, single_file) 22 src_file_path = os.path.join(root, single_file)
19 file_target_path = os.path.join(root_target_path, single_file) 23 file_target_path = os.path.join(root_target_path, single_file)
20 z.write(src_file_path, file_target_path) 24 z.write(src_file_path, file_target_path)
25
26
27 def get_pwd_list_from_str(doc_name):
28 try:
29 pwd_list = re.findall(r'\d{6}', doc_name)
30 return pwd_list
31 except Exception as e:
32 return []
33
34
35 def extract_zip_or_rar(file_path, extract_path, pwd_list=[]):
36 if file_path.endswith('.zip') or file_path.endswith('.ZIP'):
37 if len(pwd_list) > 0:
38 for password in pwd_list:
39 try:
40 with zipfile.ZipFile(file_path) as zf:
41 zf.extractall(extract_path, pwd=bytes(password, 'utf-8'))
42 except Exception as e:
43 continue
44 else:
45 return True
46 else:
47 return False
48 else:
49 try:
50 with zipfile.ZipFile(file_path) as zf:
51 zf.extractall(extract_path)
52 except Exception as e:
53 return False
54 else:
55 return True
56 elif file_path.endswith('.rar') or file_path.endswith('.RAR'):
57 if len(pwd_list) > 0:
58 for password in pwd_list:
59 try:
60 with rarfile.RarFile(file_path) as rf:
61 rf.extractall(extract_path, pwd=password)
62 except Exception as e:
63 continue
64 else:
65 return True
66 else:
67 return False
68 else:
69 try:
70 with rarfile.RarFile(file_path) as rf:
71 rf.extractall(extract_path)
72 except Exception as e:
73 return False
74 else:
75 return True
76 else:
77 return False
78
79
80 def get_file_paths(input_path, suffix_list):
81 """
82
83 Args:
84 input_path: str 目标目录
85 suffix_list: list 搜索的文件的后缀列表
86
87 Returns: list 搜索到的相关文件绝对路径列表
88
89 """
90 for parent, _, filenames in os.walk(input_path):
91 for filename in filenames:
92 for suffix in suffix_list:
93 if filename.endswith(suffix):
94 file_path = os.path.join(parent, filename)
95 break
96 else:
97 continue
98 yield file_path
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!