97994674 by 周伟奇

ocr excel upload eDMS

1 parent 7aa0284c
...@@ -12,13 +12,16 @@ Django==2.1 ...@@ -12,13 +12,16 @@ Django==2.1
12 django-oauth-toolkit==1.3.2 12 django-oauth-toolkit==1.3.2
13 djangorestframework==3.9.0 13 djangorestframework==3.9.0
14 djangorestframework-jwt==1.11.0 14 djangorestframework-jwt==1.11.0
15 et-xmlfile==1.0.1
15 idna==2.9 16 idna==2.9
16 idna-ssl==1.1.0 17 idna-ssl==1.1.0
17 isodate==0.6.0 18 isodate==0.6.0
19 jdcal==1.4.1
18 lxml==4.5.1 20 lxml==4.5.1
19 marshmallow==3.6.1 21 marshmallow==3.6.1
20 multidict==4.7.6 22 multidict==4.7.6
21 oauthlib==3.1.0 23 oauthlib==3.1.0
24 openpyxl==3.0.4
22 pdfminer3k==1.3.4 25 pdfminer3k==1.3.4
23 Pillow==7.1.2 26 Pillow==7.1.2
24 ply==3.11 27 ply==3.11
......
1 PAGE_DEFAULT = 1 1 PAGE_DEFAULT = 1
2 PAGE_SIZE_DEFAULT = 10 2 PAGE_SIZE_DEFAULT = 10
3 3
4 DOC_SCHEME_LIST = ['Acceptance', 'Settlement', 'Contract Management'] 4 FIXED_APPLICATION_ID = '手工单'
5 DATA_SOURCE_LIST = ['POS', 'EAPP', 'Econtract'] 5
6 BUSINESS_TYPE_LIST = ['HIL', 'AFC'] 6 DOC_SCHEME_LIST = ['ACCEPTANCE', 'SETTLEMENT', 'CONTRACT MANAGEMENT']
7 HIL_SET = {'HIL', 'HIl', 'HiL', 'Hil', 'hIL', 'hIl', 'hiL', 'hil', 'CO00002'} 7 DATA_SOURCE_LIST = ['POS', 'EAPP', 'ECONTRACT']
8
8 HIL_PREFIX = 'HIL' 9 HIL_PREFIX = 'HIL'
9 AFC_PREFIX = 'AFC' 10 AFC_PREFIX = 'AFC'
11 SPLIT_STR = '_'
12 BUSINESS_TYPE_LIST = [HIL_PREFIX, AFC_PREFIX]
13 HIL_SET = {'HIL', 'HIl', 'HiL', 'Hil', 'hIL', 'hIl', 'hiL', 'hil', 'CO00002'}
10 14
11 SESSION_PREFIX = 'FHLSID' 15 SESSION_PREFIX = 'FHLSID'
12 CUSTOM_CLIENT = 'CustomClient' 16 CUSTOM_CLIENT = 'CustomClient'
...@@ -15,12 +19,22 @@ FIXED_FILE_SIZE = 0 ...@@ -15,12 +19,22 @@ FIXED_FILE_SIZE = 0
15 DOWNLOAD_ACTION_TYPE = 'Downloaded' 19 DOWNLOAD_ACTION_TYPE = 'Downloaded'
16 20
17 DOC_SCHEMA_ID_FILL = { 21 DOC_SCHEMA_ID_FILL = {
18 'Acceptance': (1, 'DFE-AutoFilingScript'), 22 'ACCEPTANCE': (1, 'DFE-AutoFilingScript'),
19 'Settlement': (20, 'DFE-AutoFilingScript'), 23 'SETTLEMENT': (20, 'DFE-AutoFilingScript'),
20 'Contract Management': (86, 'Schema-Based') 24 'CONTRACT MANAGEMENT': (86, 'Schema-Based')
25 }
26 BUSINESS_TYPE_DICT = {
27 HIL_PREFIX: 'CO00002',
28 AFC_PREFIX: 'CO00001'
21 } 29 }
22 DOC_SCHEMA_TYPE = 'ElectronicRecord' 30 DOC_SCHEMA_TYPE = 'ElectronicRecord'
23 APPLICATION_ID_META_FIELD_id = 1 31 APPLICATION_ID_META_FIELD_id = 1
24 DEALER_CODE_META_FIELD_id = 13 32 DEALER_CODE_META_FIELD_id = 13
25 BUSINESS_TYPE_META_FIELD_id = 93 33 BUSINESS_TYPE_META_FIELD_id = 93
26 DEALER_CODE = 'ocr_situ_group' 34 DEALER_CODE = 'ocr_situ_group'
35
36 AMOUNT_COL_TITLE_SET = {"交易金额", "金额", "收入/支出金额", "发生额"}
37 OVERAGE_COL_TITLE_SET = {"账户余额", "余额"}
38 PROOF_COL_TITLE = '核对结果'
39 PROOF_RES = ('对', '错')
40 META_SHEET_TITLE = '关键信息提取和展示'
......
1 import os
1 import requests 2 import requests
2 from zeep import Client, xsd 3 from zeep import Client, xsd
3 from settings import conf 4 from settings import conf
...@@ -65,9 +66,9 @@ class EDMS: ...@@ -65,9 +66,9 @@ class EDMS:
65 params = {'token': token} 66 params = {'token': token}
66 self.download_handler(params, headers, save_path) 67 self.download_handler(params, headers, save_path)
67 68
68 def create_upload_token(self, headers, file_size): 69 def create_upload_token(self, headers):
69 with self.rc_client.settings(extra_http_headers=headers): 70 with self.rc_client.settings(extra_http_headers=headers):
70 token = self.rc_client.service.CreateUploadToken(fileSize=file_size) 71 token = self.rc_client.service.CreateUploadToken(fileSize=consts.FIXED_FILE_SIZE)
71 return token 72 return token
72 73
73 def upload_handler(self, file_path, params, headers): 74 def upload_handler(self, file_path, params, headers):
...@@ -80,11 +81,19 @@ class EDMS: ...@@ -80,11 +81,19 @@ class EDMS:
80 else: 81 else:
81 raise Exception 82 raise Exception
82 83
83 def get_doc_info(self, token, doc_info): 84 @staticmethod
84 doc_schema_id, auto_filing = consts.DOC_SCHEMA_ID_FILL.get(doc_info.get('document_scheme')) 85 def get_doc_file_name(doc_name):
85 application_id = doc_info.get('application_id') 86 if doc_name.endswith('pdf'):
86 doc_file_name = doc_info.get('doc_file_name') 87 name, _ = os.path.splitext(doc_name)
87 business_type = doc_info.get('business_type') 88 return name
89 return doc_name
90
91 def get_doc_info(self, token, doc, business_type, file_path):
92 business_type = consts.BUSINESS_TYPE_DICT.get(business_type)
93 doc_schema_id, auto_filing = consts.DOC_SCHEMA_ID_FILL.get(doc.document_scheme)
94 application_id = doc.application_id
95 doc_file_name = self.get_doc_file_name(doc.document_name)
96 origin_file_name = os.path.basename(file_path)
88 fields_with_value = [ 97 fields_with_value = [
89 {'FieldId': consts.APPLICATION_ID_META_FIELD_id, 98 {'FieldId': consts.APPLICATION_ID_META_FIELD_id,
90 'FieldValue': xsd.AnyObject(xsd.String(), application_id)}, 99 'FieldValue': xsd.AnyObject(xsd.String(), application_id)},
...@@ -99,20 +108,20 @@ class EDMS: ...@@ -99,20 +108,20 @@ class EDMS:
99 'DocumentName': doc_file_name, 108 'DocumentName': doc_file_name,
100 'FieldsWithValues': fields_with_values, 109 'FieldsWithValues': fields_with_values,
101 'UploadToken': token, 110 'UploadToken': token,
102 'OriginalFileName': doc_file_name, 111 'OriginalFileName': origin_file_name,
103 'SendEmailToMembers': False, 112 'SendEmailToMembers': False,
104 'AutoFilingScriptToUse': auto_filing, 113 'AutoFilingScriptToUse': auto_filing,
105 'DocumentSchemaType': consts.DOC_SCHEMA_TYPE, 114 'DocumentSchemaType': consts.DOC_SCHEMA_TYPE,
106 } 115 }
107 return info 116 return info
108 117
109 def add_doc_info(self, headers, token, doc_info): 118 def add_doc_info(self, headers, token, doc, business_type, file_path):
110 info = self.get_doc_info(token, doc_info) 119 info = self.get_doc_info(token, doc, business_type, file_path)
111 with self.dm_client.settings(extra_http_headers=headers): 120 with self.dm_client.settings(extra_http_headers=headers):
112 metadata_version_id = self.dm_client.service.AddDocumentInfo(info=info) 121 metadata_version_id = self.dm_client.service.AddDocumentInfo(info=info)
113 return metadata_version_id 122 return metadata_version_id
114 123
115 def upload(self, file_path, file_size, doc_info): 124 def upload(self, file_path, doc, business_type):
116 # file_path = '/Users/clay/Postman/files/OCRuploadTest4.txt' 125 # file_path = '/Users/clay/Postman/files/OCRuploadTest4.txt'
117 # file_size = 16 126 # file_size = 16
118 # doc_info = { 127 # doc_info = {
...@@ -122,12 +131,12 @@ class EDMS: ...@@ -122,12 +131,12 @@ class EDMS:
122 # 'business_type': 'CO00001', 131 # 'business_type': 'CO00001',
123 # } 132 # }
124 headers = self.get_headers() 133 headers = self.get_headers()
125 token = self.create_upload_token(headers, file_size) 134 token = self.create_upload_token(headers)
126 headers.update({'Content-Type': 'application/octet-stream'}) 135 headers.update({'Content-Type': 'application/octet-stream'})
127 params = {'token': token} 136 params = {'token': token}
128 self.upload_handler(file_path, params, headers) 137 self.upload_handler(file_path, params, headers)
129 headers.pop('Content-Type') 138 headers.pop('Content-Type')
130 metadata_version_id = self.add_doc_info(headers, token, doc_info) 139 metadata_version_id = self.add_doc_info(headers, token, doc, business_type, file_path)
131 return metadata_version_id 140 return metadata_version_id
132 141
133 142
......
...@@ -26,7 +26,21 @@ class DocHandler: ...@@ -26,7 +26,21 @@ class DocHandler:
26 26
27 @staticmethod 27 @staticmethod
28 def get_doc_class(business_type): 28 def get_doc_class(business_type):
29 is_hil = business_type in consts.HIL_SET 29 return (HILDoc, consts.HIL_PREFIX) if business_type in consts.HIL_SET else (AFCDoc, consts.AFC_PREFIX)
30 doc_class, prefix = (HILDoc, consts.HIL_PREFIX) if is_hil else (AFCDoc, consts.AFC_PREFIX) 30
31 return doc_class, prefix 31 def fix_scheme(self, scheme):
32 if scheme in consts.DOC_SCHEME_LIST:
33 return scheme
34 elif scheme.upper() in consts.DOC_SCHEME_LIST:
35 return scheme.upper()
36 else:
37 return consts.DOC_SCHEME_LIST[0]
38
39 def fix_data_source(self, data_source):
40 if data_source in consts.DATA_SOURCE_LIST:
41 return data_source
42 elif data_source.upper() in consts.DATA_SOURCE_LIST:
43 return data_source.upper()
44 else:
45 return consts.DATA_SOURCE_LIST[0]
32 46
......
...@@ -7,20 +7,3 @@ class DocStatus(NamedEnum): ...@@ -7,20 +7,3 @@ class DocStatus(NamedEnum):
7 PROCESS_FAILED = (2, '识别失败') 7 PROCESS_FAILED = (2, '识别失败')
8 UPLOAD_FAILED = (3, '同步失败') 8 UPLOAD_FAILED = (3, '同步失败')
9 COMPLETE = (4, '已完成') 9 COMPLETE = (4, '已完成')
10
11
12 class DocScheme(NamedEnum):
13 ACCEPTANCE = (0, "Acceptance")
14 SETTLEMENT = (1, 'Settlement')
15 CONTRACT_MANAGEMENT = (2, 'Contract Management')
16
17
18 class BusinessType(NamedEnum):
19 AFC = (0, "CO00001")
20 HIL = (1, 'CO00002')
21
22
23 class DataSource(NamedEnum):
24 POS = (0, "POS")
25 EAPP = (1, 'EAPP')
26 ECONTRACT = (2, 'Econtract')
......
...@@ -60,7 +60,7 @@ doc_list_args = { ...@@ -60,7 +60,7 @@ doc_list_args = {
60 'status': fields.Int(required=False, 60 'status': fields.Int(required=False,
61 validate=validate.OneOf(DocStatus.get_value_lst())), 61 validate=validate.OneOf(DocStatus.get_value_lst())),
62 'application_id': fields.Str(required=False, validate=validate.Length(max=64)), 62 'application_id': fields.Str(required=False, validate=validate.Length(max=64)),
63 'data_source': fields.Str(required=False, validate=validate.Length(max=64)), 63 'data_source': fields.Str(required=False, validate=validate.OneOf(consts.DATA_SOURCE_LIST)),
64 'business_type': fields.Str(required=True, validate=validate.OneOf(consts.BUSINESS_TYPE_LIST)), 64 'business_type': fields.Str(required=True, validate=validate.OneOf(consts.BUSINESS_TYPE_LIST)),
65 'upload_time_start': fields.Date(required=False), 65 'upload_time_start': fields.Date(required=False),
66 'upload_time_end': fields.Date(required=False), 66 'upload_time_end': fields.Date(required=False),
...@@ -100,6 +100,8 @@ class UploadDocView(GenericView, DocHandler): ...@@ -100,6 +100,8 @@ class UploadDocView(GenericView, DocHandler):
100 document = args.get('document') 100 document = args.get('document')
101 business_type = document.get('businessType') 101 business_type = document.get('businessType')
102 application_id = application_data.get('applicationId') 102 application_id = application_data.get('applicationId')
103 document_scheme = document.get('documentScheme')
104 data_source = document.get('dataSource')
103 try: 105 try:
104 # 1. 上传信息记录 106 # 1. 上传信息记录
105 record = UploadDocRecords.objects.create( 107 record = UploadDocRecords.objects.create(
...@@ -110,9 +112,9 @@ class UploadDocView(GenericView, DocHandler): ...@@ -110,9 +112,9 @@ class UploadDocView(GenericView, DocHandler):
110 guarantor_1=applicant_data.get('guarantor1Name'), 112 guarantor_1=applicant_data.get('guarantor1Name'),
111 guarantor_2=applicant_data.get('guarantor2Name'), 113 guarantor_2=applicant_data.get('guarantor2Name'),
112 document_name=document.get('documentName'), 114 document_name=document.get('documentName'),
113 document_scheme=document.get('documentScheme'), 115 document_scheme=document_scheme,
114 business_type=business_type, 116 business_type=business_type,
115 data_source=document.get('dataSource'), 117 data_source=data_source,
116 upload_finish_time=document.get('uploadFinishTime'), 118 upload_finish_time=document.get('uploadFinishTime'),
117 ) 119 )
118 except IntegrityError as e: 120 except IntegrityError as e:
...@@ -130,17 +132,17 @@ class UploadDocView(GenericView, DocHandler): ...@@ -130,17 +132,17 @@ class UploadDocView(GenericView, DocHandler):
130 guarantor_1=applicant_data.get('guarantor1Name'), 132 guarantor_1=applicant_data.get('guarantor1Name'),
131 guarantor_2=applicant_data.get('guarantor2Name'), 133 guarantor_2=applicant_data.get('guarantor2Name'),
132 document_name=document.get('documentName'), 134 document_name=document.get('documentName'),
133 document_scheme=document.get('documentScheme'), 135 document_scheme=self.fix_scheme(document_scheme),
134 data_source=document.get('dataSource'), 136 data_source=self.fix_data_source(data_source),
135 upload_finish_time=document.get('uploadFinishTime'), 137 upload_finish_time=document.get('uploadFinishTime'),
136 ) 138 )
137 # 3. 选择队列进入 139 # 3. 选择队列进入
138 is_priority = PriorityApplication.objects.filter(application_id=application_id, on_off=True).exists() 140 is_priority = PriorityApplication.objects.filter(application_id=application_id, on_off=True).exists()
139 value = ['{0}_{1}'.format(prefix, doc.id)] 141 tasks = ['{0}{1}{2}'.format(prefix, consts.SPLIT_STR, doc.id)]
140 redis_res = rh.enqueue(value, is_priority) 142 enqueue_res = rh.enqueue(tasks, is_priority)
141 self.running_log.info('[doc upload success] [args={0}] [record_id={1}] [prefix={2}] [doc_id={3}] ' 143 self.running_log.info('[doc upload success] [args={0}] [record_id={1}] [business_type={2}] [doc_id={3}] '
142 '[is_priority={4}] [enqueue_res={5}]'.format(args, record.id, prefix, doc.id, 144 '[is_priority={4}] [enqueue_res={5}]'.format(args, record.id, prefix, doc.id,
143 is_priority, redis_res)) 145 is_priority, enqueue_res))
144 return response.ok() 146 return response.ok()
145 147
146 post.openapi_doc = ''' 148 post.openapi_doc = '''
...@@ -174,7 +176,8 @@ class PriorityDocView(GenericView, DocHandler): ...@@ -174,7 +176,8 @@ class PriorityDocView(GenericView, DocHandler):
174 application_id = application_info.get('APPLICATION_ID') 176 application_id = application_info.get('APPLICATION_ID')
175 submit_datetime = application_info.get('SUBMIT_DATETIME') 177 submit_datetime = application_info.get('SUBMIT_DATETIME')
176 entity = application_info.get('ENTITY') 178 entity = application_info.get('ENTITY')
177 submit_datetime = timezone.make_naive(submit_datetime, timezone.get_current_timezone()) 179 if submit_datetime.utcoffset() is not None:
180 submit_datetime = timezone.make_naive(submit_datetime, timezone.get_current_timezone())
178 GCAPRecords.objects.create( 181 GCAPRecords.objects.create(
179 entity=entity, 182 entity=entity,
180 status=application_info.get('STATUS'), 183 status=application_info.get('STATUS'),
...@@ -190,14 +193,14 @@ class PriorityDocView(GenericView, DocHandler): ...@@ -190,14 +193,14 @@ class PriorityDocView(GenericView, DocHandler):
190 doc_class, prefix = self.get_doc_class(entity) 193 doc_class, prefix = self.get_doc_class(entity)
191 doc_ids = doc_class.objects.filter(application_id=application_id, 194 doc_ids = doc_class.objects.filter(application_id=application_id,
192 status=DocStatus.INIT.value).values_list('id', flat=True) 195 status=DocStatus.INIT.value).values_list('id', flat=True)
193 task_str_list = ['{0}_{1}'.format(prefix, doc_id) for doc_id in doc_ids] 196 tasks_list = ['{0}{1}{2}'.format(prefix, consts.SPLIT_STR, doc_id) for doc_id in doc_ids]
194 if not task_str_list: 197 if not tasks_list:
195 self.running_log.info( 198 self.running_log.info(
196 '[priority doc success] [args={0}] [task_str_list={1}]'.format(args, task_str_list)) 199 '[priority doc success] [args={0}]'.format(args))
197 else: 200 else:
198 enqueue_res = rh.enqueue(task_str_list, is_priority=True) 201 enqueue_res = rh.enqueue(tasks_list, is_priority=True)
199 self.running_log.info('[priority doc success] [args={0}] [task_str_list={1}] [enqueue_res={2}]'.format( 202 self.running_log.info('[priority doc success] [args={0}] [tasks_list={1}] [enqueue_res={2}]'.format(
200 args, task_str_list, enqueue_res)) 203 args, tasks_list, enqueue_res))
201 return response.ok() 204 return response.ok()
202 205
203 post.openapi_doc = ''' 206 post.openapi_doc = '''
...@@ -268,7 +271,7 @@ class DocView(GenericView, DocHandler): ...@@ -268,7 +271,7 @@ class DocView(GenericView, DocHandler):
268 @use_args(upload_pdf_args, location='files') 271 @use_args(upload_pdf_args, location='files')
269 def post(self, request, args): 272 def post(self, request, args):
270 # 1. 上传信息记录 273 # 1. 上传信息记录
271 const_str = '手工单' 274 const_str = consts.FIXED_APPLICATION_ID
272 metadata_version_id = str(int(time.time())) 275 metadata_version_id = str(int(time.time()))
273 upload_finish_time = timezone.now() 276 upload_finish_time = timezone.now()
274 document_scheme = random.choice(consts.DOC_SCHEME_LIST) 277 document_scheme = random.choice(consts.DOC_SCHEME_LIST)
...@@ -305,8 +308,8 @@ class DocView(GenericView, DocHandler): ...@@ -305,8 +308,8 @@ class DocView(GenericView, DocHandler):
305 ) 308 )
306 # 3. 选择队列进入 309 # 3. 选择队列进入
307 is_priority = False 310 is_priority = False
308 value = ['{0}_{1}'.format(prefix, doc.id)] 311 tasks = ['{0}{1}{2}'.format(prefix, consts.SPLIT_STR, doc.id)]
309 redis_res = rh.enqueue(value, is_priority) 312 enqueue_res = rh.enqueue(tasks, is_priority)
310 313
311 pdf_file = args.get('pdf_file') 314 pdf_file = args.get('pdf_file')
312 save_dir_path = os.path.join(conf.DATA_DIR, business_type, str(doc.id)) 315 save_dir_path = os.path.join(conf.DATA_DIR, business_type, str(doc.id))
...@@ -314,7 +317,7 @@ class DocView(GenericView, DocHandler): ...@@ -314,7 +317,7 @@ class DocView(GenericView, DocHandler):
314 os.makedirs(save_dir_path, exist_ok=True) 317 os.makedirs(save_dir_path, exist_ok=True)
315 file_write(pdf_file, save_file_path) 318 file_write(pdf_file, save_file_path)
316 319
317 self.running_log.info('[mock doc upload success] [args={0}] [record_id={1}] [prefix={2}] [doc_id={3}] ' 320 self.running_log.info('[mock doc upload success] [args={0}] [record_id={1}] [business_type={2}] [doc_id={3}] '
318 '[is_priority={4}] [enqueue_res={5}]'.format(args, record.id, prefix, doc.id, 321 '[is_priority={4}] [enqueue_res={5}]'.format(args, record.id, prefix, doc.id,
319 is_priority, redis_res)) 322 is_priority, enqueue_res))
320 return response.ok() 323 return response.ok()
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!