add zip & rar file
Showing
4 changed files
with
107 additions
and
12 deletions
This diff is collapsed.
Click to expand it.
| ... | @@ -566,15 +566,14 @@ class UploadDocView(GenericView, DocHandler): | ... | @@ -566,15 +566,14 @@ class UploadDocView(GenericView, DocHandler): |
| 566 | data_source = self.fix_data_source(data_source) | 566 | data_source = self.fix_data_source(data_source) |
| 567 | document_scheme = self.fix_scheme(document_scheme) | 567 | document_scheme = self.fix_scheme(document_scheme) |
| 568 | 568 | ||
| 569 | if document_name.endswith('.zip'): | 569 | # if document_name.endswith('.zip'): |
| 570 | self.running_log.info('[doc upload success] [zip file skip] [args={0}]'.format(args)) | 570 | # self.running_log.info('[doc upload success] [zip file skip] [args={0}]'.format(args)) |
| 571 | return response.ok() | 571 | # return response.ok() |
| 572 | 572 | ||
| 573 | if data_source == consts.DATA_SOURCE_LIST[1]: | 573 | if data_source == consts.DATA_SOURCE_LIST[1]: |
| 574 | if isinstance(document_name, str): | 574 | if document_name.endswith('-证书.pdf') or document_name.endswith('-证书'): |
| 575 | if document_name.endswith('-证书.pdf') or document_name.endswith('-证书'): | 575 | self.running_log.info('[doc upload success] [eapp license skip] [args={0}]'.format(args)) |
| 576 | self.running_log.info('[doc upload success] [eapp license skip] [args={0}]'.format(args)) | 576 | return response.ok() |
| 577 | return response.ok() | ||
| 578 | 577 | ||
| 579 | # 2. 根据业务类型分库存储 | 578 | # 2. 根据业务类型分库存储 |
| 580 | doc_class, prefix = self.get_doc_class(business_type) | 579 | doc_class, prefix = self.get_doc_class(business_type) |
| ... | @@ -590,17 +589,24 @@ class UploadDocView(GenericView, DocHandler): | ... | @@ -590,17 +589,24 @@ class UploadDocView(GenericView, DocHandler): |
| 590 | data_source=data_source, | 589 | data_source=data_source, |
| 591 | upload_finish_time=document.get('uploadFinishTime'), | 590 | upload_finish_time=document.get('uploadFinishTime'), |
| 592 | ) | 591 | ) |
| 592 | |||
| 593 | # 3. 选择队列进入 | 593 | # 3. 选择队列进入 |
| 594 | is_priority = PriorityApplication.objects.filter(application_id=application_id, on_off=True).exists() | 594 | is_priority = PriorityApplication.objects.filter(application_id=application_id, on_off=True).exists() |
| 595 | 595 | is_zip = False | |
| 596 | |||
| 596 | classify_1 = 0 | 597 | classify_1 = 0 |
| 598 | # 电子合同 | ||
| 597 | if data_source == consts.DATA_SOURCE_LIST[-1] and document_scheme == consts.DOC_SCHEME_LIST[1]: | 599 | if data_source == consts.DATA_SOURCE_LIST[-1] and document_scheme == consts.DOC_SCHEME_LIST[1]: |
| 598 | for keyword, classify_1_tmp in consts.ECONTRACT_KEYWORDS_MAP.get(prefix): | 600 | for keyword, classify_1_tmp in consts.ECONTRACT_KEYWORDS_MAP.get(prefix): |
| 599 | if keyword in document_name: | 601 | if keyword in document_name: |
| 600 | classify_1 = classify_1_tmp | 602 | classify_1 = classify_1_tmp |
| 601 | break | 603 | break |
| 604 | elif document_name.endswith('.zip') or document_name.endswith('.rar') or document_name.endswith('.ZIP') \ | ||
| 605 | or document_name.endswith('.RAR'): | ||
| 606 | is_zip = True | ||
| 607 | |||
| 602 | task = consts.SPLIT_STR.join([prefix, str(doc.id), str(classify_1)]) | 608 | task = consts.SPLIT_STR.join([prefix, str(doc.id), str(classify_1)]) |
| 603 | enqueue_res = rh.enqueue([task], is_priority) | 609 | enqueue_res = rh.enqueue([task], is_priority, is_zip) |
| 604 | self.running_log.info('[doc upload success] [args={0}] [business_type={1}] [doc_id={2}] ' | 610 | self.running_log.info('[doc upload success] [args={0}] [business_type={1}] [doc_id={2}] ' |
| 605 | '[is_priority={3}] [enqueue_res={4}]'.format(args, prefix, doc.id, | 611 | '[is_priority={3}] [enqueue_res={4}]'.format(args, prefix, doc.id, |
| 606 | is_priority, enqueue_res)) | 612 | is_priority, enqueue_res)) |
| ... | @@ -665,7 +671,7 @@ class PriorityDocView(GenericView, DocHandler): | ... | @@ -665,7 +671,7 @@ class PriorityDocView(GenericView, DocHandler): |
| 665 | self.running_log.info( | 671 | self.running_log.info( |
| 666 | '[priority doc success] [args={0}]'.format(args)) | 672 | '[priority doc success] [args={0}]'.format(args)) |
| 667 | else: | 673 | else: |
| 668 | enqueue_res = rh.enqueue(tasks_list, is_priority=True) | 674 | enqueue_res = rh.enqueue(tasks_list, is_priority=True) # TODO 可能把压缩文件放入优先队列 |
| 669 | self.running_log.info('[priority doc success] [args={0}] [tasks_list={1}] [enqueue_res={2}]'.format( | 675 | self.running_log.info('[priority doc success] [args={0}] [tasks_list={1}] [enqueue_res={2}]'.format( |
| 670 | args, tasks_list, enqueue_res)) | 676 | args, tasks_list, enqueue_res)) |
| 671 | return response.ok() | 677 | return response.ok() | ... | ... |
| ... | @@ -35,16 +35,27 @@ class RedisHandler: | ... | @@ -35,16 +35,27 @@ class RedisHandler: |
| 35 | self.prefix = 'bwm_ocr' | 35 | self.prefix = 'bwm_ocr' |
| 36 | self.common_queue_key = '{0}:common_queue'.format(self.prefix) | 36 | self.common_queue_key = '{0}:common_queue'.format(self.prefix) |
| 37 | self.priority_queue_key = '{0}:priority_queue'.format(self.prefix) | 37 | self.priority_queue_key = '{0}:priority_queue'.format(self.prefix) |
| 38 | self.zip_queue_key = '{0}:zip_queue'.format(self.prefix) | ||
| 38 | self.session_id_key = '{0}:session_id'.format(self.prefix) | 39 | self.session_id_key = '{0}:session_id'.format(self.prefix) |
| 39 | self.cms_token_key = '{0}:cms_token'.format(self.prefix) | 40 | self.cms_token_key = '{0}:cms_token'.format(self.prefix) |
| 40 | self.ecm_token_key = '{0}:ecm_token'.format(self.prefix) | 41 | self.ecm_token_key = '{0}:ecm_token'.format(self.prefix) |
| 41 | self.login_limit_key = '{0}:login_limit'.format(self.prefix) | 42 | self.login_limit_key = '{0}:login_limit'.format(self.prefix) |
| 42 | 43 | ||
| 43 | def enqueue(self, tasks, is_priority=False): | 44 | def enqueue(self, tasks, is_priority=False, is_zip=False): |
| 44 | # 1 | 45 | # 1 |
| 45 | key = self.priority_queue_key if is_priority else self.common_queue_key | 46 | if is_zip: |
| 47 | key = self.zip_queue_key | ||
| 48 | elif is_priority: | ||
| 49 | key = self.priority_queue_key | ||
| 50 | else: | ||
| 51 | key = self.common_queue_key | ||
| 46 | return self.redis.lpush(key, tasks) | 52 | return self.redis.lpush(key, tasks) |
| 47 | 53 | ||
| 54 | def dequeue_zip(self): | ||
| 55 | # task or None | ||
| 56 | task = self.redis.rpop(self.zip_queue_key) | ||
| 57 | return task | ||
| 58 | |||
| 48 | def dequeue(self): | 59 | def dequeue(self): |
| 49 | # task or None | 60 | # task or None |
| 50 | task = self.redis.rpop(self.priority_queue_key) | 61 | task = self.redis.rpop(self.priority_queue_key) | ... | ... |
| 1 | import os | 1 | import os |
| 2 | import re | ||
| 3 | import zipfile | ||
| 4 | |||
| 5 | import rarfile | ||
| 2 | from zipfile import ZipFile | 6 | from zipfile import ZipFile |
| 3 | 7 | ||
| 4 | 8 | ||
| ... | @@ -18,3 +22,77 @@ def write_zip_file(dir_name, zipfile_path): | ... | @@ -18,3 +22,77 @@ def write_zip_file(dir_name, zipfile_path): |
| 18 | src_file_path = os.path.join(root, single_file) | 22 | src_file_path = os.path.join(root, single_file) |
| 19 | file_target_path = os.path.join(root_target_path, single_file) | 23 | file_target_path = os.path.join(root_target_path, single_file) |
| 20 | z.write(src_file_path, file_target_path) | 24 | z.write(src_file_path, file_target_path) |
| 25 | |||
| 26 | |||
| 27 | def get_pwd_list_from_str(doc_name): | ||
| 28 | try: | ||
| 29 | pwd_list = re.findall(r'\d{6}', doc_name) | ||
| 30 | return pwd_list | ||
| 31 | except Exception as e: | ||
| 32 | return [] | ||
| 33 | |||
| 34 | |||
| 35 | def extract_zip_or_rar(file_path, extract_path, pwd_list=[]): | ||
| 36 | if file_path.endswith('.zip') or file_path.endswith('.ZIP'): | ||
| 37 | if len(pwd_list) > 0: | ||
| 38 | for password in pwd_list: | ||
| 39 | try: | ||
| 40 | with zipfile.ZipFile(file_path) as zf: | ||
| 41 | zf.extractall(extract_path, pwd=bytes(password, 'utf-8')) | ||
| 42 | except Exception as e: | ||
| 43 | continue | ||
| 44 | else: | ||
| 45 | return True | ||
| 46 | else: | ||
| 47 | return False | ||
| 48 | else: | ||
| 49 | try: | ||
| 50 | with zipfile.ZipFile(file_path) as zf: | ||
| 51 | zf.extractall(extract_path) | ||
| 52 | except Exception as e: | ||
| 53 | return False | ||
| 54 | else: | ||
| 55 | return True | ||
| 56 | elif file_path.endswith('.rar') or file_path.endswith('.RAR'): | ||
| 57 | if len(pwd_list) > 0: | ||
| 58 | for password in pwd_list: | ||
| 59 | try: | ||
| 60 | with rarfile.RarFile(file_path) as rf: | ||
| 61 | rf.extractall(extract_path, pwd=password) | ||
| 62 | except Exception as e: | ||
| 63 | continue | ||
| 64 | else: | ||
| 65 | return True | ||
| 66 | else: | ||
| 67 | return False | ||
| 68 | else: | ||
| 69 | try: | ||
| 70 | with rarfile.RarFile(file_path) as rf: | ||
| 71 | rf.extractall(extract_path) | ||
| 72 | except Exception as e: | ||
| 73 | return False | ||
| 74 | else: | ||
| 75 | return True | ||
| 76 | else: | ||
| 77 | return False | ||
| 78 | |||
| 79 | |||
| 80 | def get_file_paths(input_path, suffix_list): | ||
| 81 | """ | ||
| 82 | |||
| 83 | Args: | ||
| 84 | input_path: str 目标目录 | ||
| 85 | suffix_list: list 搜索的文件的后缀列表 | ||
| 86 | |||
| 87 | Returns: list 搜索到的相关文件绝对路径列表 | ||
| 88 | |||
| 89 | """ | ||
| 90 | for parent, _, filenames in os.walk(input_path): | ||
| 91 | for filename in filenames: | ||
| 92 | for suffix in suffix_list: | ||
| 93 | if filename.endswith(suffix): | ||
| 94 | file_path = os.path.join(parent, filename) | ||
| 95 | break | ||
| 96 | else: | ||
| 97 | continue | ||
| 98 | yield file_path | ... | ... |
-
Please register or sign in to post a comment