Merge branch 'feature/ecm' into feature/1119
Showing
4 changed files
with
173 additions
and
3 deletions
| ... | @@ -19,7 +19,8 @@ from common.mixins import LoggerMixin | ... | @@ -19,7 +19,8 @@ from common.mixins import LoggerMixin | 
| 19 | from common.tools.file_tools import write_zip_file | 19 | from common.tools.file_tools import write_zip_file | 
| 20 | from common.tools.pdf_to_img import PDFHandler | 20 | from common.tools.pdf_to_img import PDFHandler | 
| 21 | from apps.doc import consts | 21 | from apps.doc import consts | 
| 22 | from apps.doc.ocr.edms import EDMS, rh | 22 | # from apps.doc.ocr.edms import EDMS, rh | 
| 23 | from apps.doc.ocr.ecm import ECM, rh | ||
| 23 | from apps.doc.named_enum import KeywordsType, FailureReason, WorkflowName, ProcessName, RequestTeam, RequestTrigger | 24 | from apps.doc.named_enum import KeywordsType, FailureReason, WorkflowName, ProcessName, RequestTeam, RequestTrigger | 
| 24 | from apps.doc.exceptions import EDMSException, OCR1Exception, OCR2Exception, OCR4Exception | 25 | from apps.doc.exceptions import EDMSException, OCR1Exception, OCR2Exception, OCR4Exception | 
| 25 | from apps.doc.ocr.wb import BSWorkbook | 26 | from apps.doc.ocr.wb import BSWorkbook | 
| ... | @@ -68,7 +69,8 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -68,7 +69,8 @@ class Command(BaseCommand, LoggerMixin): | 
| 68 | self.ocr_url_3 = conf.BC_URL | 69 | self.ocr_url_3 = conf.BC_URL | 
| 69 | self.ocr_url_4 = conf.IC_URL | 70 | self.ocr_url_4 = conf.IC_URL | 
| 70 | # EDMS web_service_api | 71 | # EDMS web_service_api | 
| 71 | self.edms = EDMS() | 72 | # self.edms = EDMS() | 
| 73 | self.edms = ECM() | ||
| 72 | # 优雅退出信号:15 | 74 | # 优雅退出信号:15 | 
| 73 | signal.signal(signal.SIGTERM, self.signal_handler) | 75 | signal.signal(signal.SIGTERM, self.signal_handler) | 
| 74 | 76 | ||
| ... | @@ -945,7 +947,8 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -945,7 +947,8 @@ class Command(BaseCommand, LoggerMixin): | 
| 945 | for times in range(consts.RETRY_TIMES): | 947 | for times in range(consts.RETRY_TIMES): | 
| 946 | try: | 948 | try: | 
| 947 | if not doc.application_id.startswith(consts.FIXED_APPLICATION_ID_PREFIX): | 949 | if not doc.application_id.startswith(consts.FIXED_APPLICATION_ID_PREFIX): | 
| 948 | self.edms.download(pdf_path, doc.metadata_version_id) | 950 | # self.edms.download(pdf_path, doc.metadata_version_id) | 
| 951 | self.edms.download(pdf_path, doc.metadata_version_id, doc.document_scheme, business_type) | ||
| 949 | self.online_log.info('{0} [edms download success] [task={1}] [times={2}] ' | 952 | self.online_log.info('{0} [edms download success] [task={1}] [times={2}] ' | 
| 950 | '[pdf_path={3}]'.format(self.log_base, task_str, times, pdf_path)) | 953 | '[pdf_path={3}]'.format(self.log_base, task_str, times, pdf_path)) | 
| 951 | 954 | ... | ... | 
src/apps/doc/ocr/ecm.py
0 → 100644
| 1 | import base64 | ||
| 2 | import requests | ||
| 3 | from common.redis_cache import redis_handler as rh | ||
| 4 | from settings import conf | ||
| 5 | from apps.doc.exceptions import ECMException | ||
| 6 | |||
| 7 | |||
| 8 | class ECM: | ||
| 9 | |||
| 10 | def __init__(self): | ||
| 11 | self.oauth_token = None | ||
| 12 | self.username = conf.ECM_USER | ||
| 13 | self.pwd = conf.ECM_PWD | ||
| 14 | self.oauth_url = conf.ECM_OAUTH_URL | ||
| 15 | self.download_url = conf.ECM_DOWNLOAD_URL | ||
| 16 | self.upload_url = conf.ECM_UPLOAD_URL | ||
| 17 | self.search_url = conf.ECM_SEARCH_URL | ||
| 18 | self.oauth_headers = { | ||
| 19 | 'Content-Type': 'application/x-www-form-urlencoded' | ||
| 20 | } | ||
| 21 | self.oauth_payload = { | ||
| 22 | 'grant_type': 'client_credentials', | ||
| 23 | 'client_id': conf.ECM_OAUTH_ID, | ||
| 24 | 'client_secret': conf.ECM_OAUTH_SECRET, | ||
| 25 | } | ||
| 26 | self.token_key = 'access_token' | ||
| 27 | self.token_type = 'Bearer' | ||
| 28 | self.token_type_key = 'token_type' | ||
| 29 | self.expires_key = 'expires_in' | ||
| 30 | self.settlement_type = 'settlement' | ||
| 31 | self.doc_type_map = { | ||
| 32 | 'ACCEPTANCE': ('acceptance', conf.ECM_FOLDER_CA, conf.ECM_FOLDER_CA_HIL), | ||
| 33 | 'SETTLEMENT': (self.settlement_type, conf.ECM_FOLDER_SE, conf.ECM_FOLDER_SE_HIL), | ||
| 34 | 'CONTRACTMANAGEMENT': ('contract_management', conf.ECM_FOLDER_CA, conf.ECM_FOLDER_CA_HIL), | ||
| 35 | } | ||
| 36 | self.doc_base_map = { | ||
| 37 | 'AFC': 'SF5_CN', | ||
| 38 | 'HIL': 'SF5_CL', | ||
| 39 | } | ||
| 40 | self.prefix = 'OCR' | ||
| 41 | self.upload_fields = ["b_region", "b_region_name", "r_object_type", "r_creation_date", | ||
| 42 | "r_creator_name", "r_modify_date", "r_modifier", "owner", "b_short_application_no", | ||
| 43 | "b_short_contract_no", "b_customer_id", "b_customer_name", "b_customer_mobile", | ||
| 44 | "b_coborrower_id", "b_coborrower_name", "b_guarantor_id", "b_guarantor_name", | ||
| 45 | "b_frontend_partner", "b_dealer_code", "b_dealer_name", "b_input_date", "b_comment", | ||
| 46 | "b_contract_no", "b_location"] | ||
| 47 | self.contract_prefix = '电子' | ||
| 48 | |||
| 49 | def update_oauth_token(self): | ||
| 50 | response = requests.post(self.oauth_url, headers=self.oauth_headers, data=self.oauth_payload, verify=False) | ||
| 51 | if response.status_code != 200: | ||
| 52 | raise ECMException('ECM Oauth response with code: {0}'.format(response.status_code)) | ||
| 53 | token = response.json().get(self.token_key) | ||
| 54 | if not isinstance(token, str): | ||
| 55 | raise ECMException('ECM Oauth can not get token: {0}'.format(response.json())) | ||
| 56 | self.oauth_token = token | ||
| 57 | self.token_type = response.json().get(self.token_type_key, self.token_type) | ||
| 58 | expires = response.json().get(self.expires_key, 3600) | ||
| 59 | rh.set_ecm_token(self.oauth_token, expires) | ||
| 60 | |||
| 61 | def get_oauth_token(self): | ||
| 62 | if self.oauth_token is None: | ||
| 63 | # redis获取token | ||
| 64 | self.oauth_token = rh.get_ecm_token() | ||
| 65 | if self.oauth_token is None: | ||
| 66 | self.update_oauth_token() | ||
| 67 | return self.oauth_token | ||
| 68 | |||
| 69 | def get_headers(self): | ||
| 70 | return {'Authorization': '{0} {1}'.format(self.token_type, self.get_oauth_token())} | ||
| 71 | |||
| 72 | def search(self, application_id, business_type): | ||
| 73 | sql = "select * from {0} where b_application_no='{1}' and object_name like '{2}%'".format( | ||
| 74 | self.settlement_type, application_id, self.contract_prefix) | ||
| 75 | search_args = { | ||
| 76 | "userName": self.username, | ||
| 77 | "password": self.pwd, | ||
| 78 | "docbase": self.doc_base_map.get(business_type), | ||
| 79 | "documentType": self.settlement_type, | ||
| 80 | "dql": sql | ||
| 81 | } | ||
| 82 | response = requests.post(self.search_url, headers=self.get_headers(), json=search_args, verify=False) | ||
| 83 | if response.status_code != 200: | ||
| 84 | raise ECMException('ECM search failed with code: {0} , with headers: {1}'.format( | ||
| 85 | response.status_code, response.headers)) | ||
| 86 | result = [] | ||
| 87 | for object_dict in response.json().get('Envelope', {}).get('Body', {}).get('executeResponse', {}).get( | ||
| 88 | 'return', {}).get('dataPackage', {}).get('DataObjects', []): | ||
| 89 | object_id = object_dict.get('Identity', {}).get('ObjectId', {}).get('@id', '') | ||
| 90 | object_name = '' | ||
| 91 | for attr_dict in object_dict.get('Properties', {}).get('Properties', []): | ||
| 92 | if attr_dict.get('@name', '') == 'object_name': | ||
| 93 | object_name = attr_dict.get('Value', '') | ||
| 94 | break | ||
| 95 | if len(object_id) > 0 and len(object_name) > 0: | ||
| 96 | result.append((object_name, object_id)) | ||
| 97 | return result | ||
| 98 | |||
| 99 | |||
| 100 | def download(self, save_path, object_id, document_scheme, business_type): | ||
| 101 | doc_type, _, _ = self.doc_type_map.get(document_scheme) | ||
| 102 | download_json = { | ||
| 103 | "userName": self.username, | ||
| 104 | "password": self.pwd, | ||
| 105 | "docbase": self.doc_base_map.get(business_type), | ||
| 106 | "documentType": doc_type, | ||
| 107 | "objectId": object_id, | ||
| 108 | } | ||
| 109 | response = requests.post(self.download_url, headers=self.get_headers(), json=download_json, verify=False) | ||
| 110 | if response.status_code != 200: | ||
| 111 | raise ECMException('ECM download failed with code: {0}'.format(response.status_code)) | ||
| 112 | base64_data = response.json().get('Envelope', {}).get('Body', {}).get('getResponse', {}).get('return', {}).get( | ||
| 113 | 'DataObjects', {}).get('Contents', {}).get('Value') | ||
| 114 | if not isinstance(base64_data, str): | ||
| 115 | raise ECMException('ECM download failed: {0}'.format(response.json())) | ||
| 116 | with open(save_path, "wb") as fh: | ||
| 117 | fh.write(base64.b64decode(base64_data.encode())) | ||
| 118 | |||
| 119 | def get_doc_file_name(self, doc_name): | ||
| 120 | if not isinstance(doc_name, str): | ||
| 121 | return self.prefix | ||
| 122 | if doc_name.endswith('.pdf') or doc_name.endswith('.PDF') or \ | ||
| 123 | doc_name.endswith('.pdF') or doc_name.endswith('.pDF') or doc_name.endswith('.pDf') or \ | ||
| 124 | doc_name.endswith('.Pdf') or doc_name.endswith('.PdF') or doc_name.endswith('.PDf'): | ||
| 125 | name, _ = os.path.splitext(doc_name) | ||
| 126 | return '{0}{1}'.format(self.prefix, name) | ||
| 127 | return '{0}{1}'.format(self.prefix, doc_name) | ||
| 128 | |||
| 129 | def upload(self, file_path, doc, business_type): | ||
| 130 | doc_type, folder_afc, folder_hil = self.doc_type_map.get(doc.document_scheme) | ||
| 131 | folder = folder_afc if business_type == 'AFC' else folder_hil | ||
| 132 | args = { | ||
| 133 | "username": self.username, | ||
| 134 | "password": self.pwd, | ||
| 135 | "docbase": self.doc_base_map.get(business_type), | ||
| 136 | "documentType": doc_type, | ||
| 137 | "object_name": self.get_doc_file_name(doc.document_name), | ||
| 138 | "folder": folder, | ||
| 139 | "format": "excel12book", | ||
| 140 | "r_content_type": "excel12book", | ||
| 141 | "b_application_no": doc.application_id, | ||
| 142 | # "file_base64_content": "", | ||
| 143 | } | ||
| 144 | for key in self.upload_fields: | ||
| 145 | args[key] = '' | ||
| 146 | with open(file_path, 'rb') as f: | ||
| 147 | base64_data = base64.b64encode(f.read()) | ||
| 148 | # 获取解码后的base64值 | ||
| 149 | file_data = base64_data.decode() | ||
| 150 | args['file_base64_content'] = file_data | ||
| 151 | response = requests.post(self.upload_url, headers=self.get_headers(), json=args, verify=False) | ||
| 152 | if response.status_code != 200: | ||
| 153 | raise ECMException('ECM upload failed with code: {0} , with headers: {1}'.format( | ||
| 154 | response.status_code, response.headers)) | ||
| 155 | if 'ns6:createResponse' not in response.json().get('S:Envelope', {}).get('S:Body', {}): | ||
| 156 | raise ECMException('ECM upload failed: {0} , with headers: {1}'.format(response.json(), response.headers)) | 
| ... | @@ -37,6 +37,7 @@ class RedisHandler: | ... | @@ -37,6 +37,7 @@ class RedisHandler: | 
| 37 | self.priority_queue_key = '{0}:priority_queue'.format(self.prefix) | 37 | self.priority_queue_key = '{0}:priority_queue'.format(self.prefix) | 
| 38 | self.session_id_key = '{0}:session_id'.format(self.prefix) | 38 | self.session_id_key = '{0}:session_id'.format(self.prefix) | 
| 39 | self.cms_token_key = '{0}:cms_token'.format(self.prefix) | 39 | self.cms_token_key = '{0}:cms_token'.format(self.prefix) | 
| 40 | self.ecm_token_key = '{0}:ecm_token'.format(self.prefix) | ||
| 40 | 41 | ||
| 41 | def enqueue(self, tasks, is_priority=False): | 42 | def enqueue(self, tasks, is_priority=False): | 
| 42 | # 1 | 43 | # 1 | 
| ... | @@ -64,3 +65,9 @@ class RedisHandler: | ... | @@ -64,3 +65,9 @@ class RedisHandler: | 
| 64 | def set_cms_token(self, token, expires=None): | 65 | def set_cms_token(self, token, expires=None): | 
| 65 | return self.redis.set(self.cms_token_key, token, expires) | 66 | return self.redis.set(self.cms_token_key, token, expires) | 
| 66 | 67 | ||
| 68 | def get_ecm_token(self): | ||
| 69 | return self.redis.get(self.ecm_token_key) | ||
| 70 | |||
| 71 | def set_ecm_token(self, token, expires=None): | ||
| 72 | return self.redis.set(self.ecm_token_key, token, expires) | ||
| 73 | ... | ... | 
- 
Please register or sign in to post a comment