97994674 by 周伟奇

ocr excel upload eDMS

1 parent 7aa0284c
......@@ -12,13 +12,16 @@ Django==2.1
django-oauth-toolkit==1.3.2
djangorestframework==3.9.0
djangorestframework-jwt==1.11.0
et-xmlfile==1.0.1
idna==2.9
idna-ssl==1.1.0
isodate==0.6.0
jdcal==1.4.1
lxml==4.5.1
marshmallow==3.6.1
multidict==4.7.6
oauthlib==3.1.0
openpyxl==3.0.4
pdfminer3k==1.3.4
Pillow==7.1.2
ply==3.11
......
PAGE_DEFAULT = 1
PAGE_SIZE_DEFAULT = 10
DOC_SCHEME_LIST = ['Acceptance', 'Settlement', 'Contract Management']
DATA_SOURCE_LIST = ['POS', 'EAPP', 'Econtract']
BUSINESS_TYPE_LIST = ['HIL', 'AFC']
HIL_SET = {'HIL', 'HIl', 'HiL', 'Hil', 'hIL', 'hIl', 'hiL', 'hil', 'CO00002'}
FIXED_APPLICATION_ID = '手工单'
DOC_SCHEME_LIST = ['ACCEPTANCE', 'SETTLEMENT', 'CONTRACT MANAGEMENT']
DATA_SOURCE_LIST = ['POS', 'EAPP', 'ECONTRACT']
HIL_PREFIX = 'HIL'
AFC_PREFIX = 'AFC'
SPLIT_STR = '_'
BUSINESS_TYPE_LIST = [HIL_PREFIX, AFC_PREFIX]
HIL_SET = {'HIL', 'HIl', 'HiL', 'Hil', 'hIL', 'hIl', 'hiL', 'hil', 'CO00002'}
SESSION_PREFIX = 'FHLSID'
CUSTOM_CLIENT = 'CustomClient'
......@@ -15,12 +19,22 @@ FIXED_FILE_SIZE = 0
DOWNLOAD_ACTION_TYPE = 'Downloaded'
DOC_SCHEMA_ID_FILL = {
'Acceptance': (1, 'DFE-AutoFilingScript'),
'Settlement': (20, 'DFE-AutoFilingScript'),
'Contract Management': (86, 'Schema-Based')
'ACCEPTANCE': (1, 'DFE-AutoFilingScript'),
'SETTLEMENT': (20, 'DFE-AutoFilingScript'),
'CONTRACT MANAGEMENT': (86, 'Schema-Based')
}
BUSINESS_TYPE_DICT = {
HIL_PREFIX: 'CO00002',
AFC_PREFIX: 'CO00001'
}
DOC_SCHEMA_TYPE = 'ElectronicRecord'
APPLICATION_ID_META_FIELD_id = 1
DEALER_CODE_META_FIELD_id = 13
BUSINESS_TYPE_META_FIELD_id = 93
DEALER_CODE = 'ocr_situ_group'
AMOUNT_COL_TITLE_SET = {"交易金额", "金额", "收入/支出金额", "发生额"}
OVERAGE_COL_TITLE_SET = {"账户余额", "余额"}
PROOF_COL_TITLE = '核对结果'
PROOF_RES = ('对', '错')
META_SHEET_TITLE = '关键信息提取和展示'
......
import os
import requests
from zeep import Client, xsd
from settings import conf
......@@ -65,9 +66,9 @@ class EDMS:
params = {'token': token}
self.download_handler(params, headers, save_path)
def create_upload_token(self, headers, file_size):
def create_upload_token(self, headers):
with self.rc_client.settings(extra_http_headers=headers):
token = self.rc_client.service.CreateUploadToken(fileSize=file_size)
token = self.rc_client.service.CreateUploadToken(fileSize=consts.FIXED_FILE_SIZE)
return token
def upload_handler(self, file_path, params, headers):
......@@ -80,11 +81,19 @@ class EDMS:
else:
raise Exception
def get_doc_info(self, token, doc_info):
doc_schema_id, auto_filing = consts.DOC_SCHEMA_ID_FILL.get(doc_info.get('document_scheme'))
application_id = doc_info.get('application_id')
doc_file_name = doc_info.get('doc_file_name')
business_type = doc_info.get('business_type')
@staticmethod
def get_doc_file_name(doc_name):
if doc_name.endswith('pdf'):
name, _ = os.path.splitext(doc_name)
return name
return doc_name
def get_doc_info(self, token, doc, business_type, file_path):
business_type = consts.BUSINESS_TYPE_DICT.get(business_type)
doc_schema_id, auto_filing = consts.DOC_SCHEMA_ID_FILL.get(doc.document_scheme)
application_id = doc.application_id
doc_file_name = self.get_doc_file_name(doc.document_name)
origin_file_name = os.path.basename(file_path)
fields_with_value = [
{'FieldId': consts.APPLICATION_ID_META_FIELD_id,
'FieldValue': xsd.AnyObject(xsd.String(), application_id)},
......@@ -99,20 +108,20 @@ class EDMS:
'DocumentName': doc_file_name,
'FieldsWithValues': fields_with_values,
'UploadToken': token,
'OriginalFileName': doc_file_name,
'OriginalFileName': origin_file_name,
'SendEmailToMembers': False,
'AutoFilingScriptToUse': auto_filing,
'DocumentSchemaType': consts.DOC_SCHEMA_TYPE,
}
return info
def add_doc_info(self, headers, token, doc_info):
info = self.get_doc_info(token, doc_info)
def add_doc_info(self, headers, token, doc, business_type, file_path):
info = self.get_doc_info(token, doc, business_type, file_path)
with self.dm_client.settings(extra_http_headers=headers):
metadata_version_id = self.dm_client.service.AddDocumentInfo(info=info)
return metadata_version_id
def upload(self, file_path, file_size, doc_info):
def upload(self, file_path, doc, business_type):
# file_path = '/Users/clay/Postman/files/OCRuploadTest4.txt'
# file_size = 16
# doc_info = {
......@@ -122,12 +131,12 @@ class EDMS:
# 'business_type': 'CO00001',
# }
headers = self.get_headers()
token = self.create_upload_token(headers, file_size)
token = self.create_upload_token(headers)
headers.update({'Content-Type': 'application/octet-stream'})
params = {'token': token}
self.upload_handler(file_path, params, headers)
headers.pop('Content-Type')
metadata_version_id = self.add_doc_info(headers, token, doc_info)
metadata_version_id = self.add_doc_info(headers, token, doc, business_type, file_path)
return metadata_version_id
......
......@@ -26,7 +26,21 @@ class DocHandler:
@staticmethod
def get_doc_class(business_type):
is_hil = business_type in consts.HIL_SET
doc_class, prefix = (HILDoc, consts.HIL_PREFIX) if is_hil else (AFCDoc, consts.AFC_PREFIX)
return doc_class, prefix
return (HILDoc, consts.HIL_PREFIX) if business_type in consts.HIL_SET else (AFCDoc, consts.AFC_PREFIX)
def fix_scheme(self, scheme):
if scheme in consts.DOC_SCHEME_LIST:
return scheme
elif scheme.upper() in consts.DOC_SCHEME_LIST:
return scheme.upper()
else:
return consts.DOC_SCHEME_LIST[0]
def fix_data_source(self, data_source):
if data_source in consts.DATA_SOURCE_LIST:
return data_source
elif data_source.upper() in consts.DATA_SOURCE_LIST:
return data_source.upper()
else:
return consts.DATA_SOURCE_LIST[0]
......
......@@ -7,20 +7,3 @@ class DocStatus(NamedEnum):
PROCESS_FAILED = (2, '识别失败')
UPLOAD_FAILED = (3, '同步失败')
COMPLETE = (4, '已完成')
class DocScheme(NamedEnum):
ACCEPTANCE = (0, "Acceptance")
SETTLEMENT = (1, 'Settlement')
CONTRACT_MANAGEMENT = (2, 'Contract Management')
class BusinessType(NamedEnum):
AFC = (0, "CO00001")
HIL = (1, 'CO00002')
class DataSource(NamedEnum):
POS = (0, "POS")
EAPP = (1, 'EAPP')
ECONTRACT = (2, 'Econtract')
......
......@@ -60,7 +60,7 @@ doc_list_args = {
'status': fields.Int(required=False,
validate=validate.OneOf(DocStatus.get_value_lst())),
'application_id': fields.Str(required=False, validate=validate.Length(max=64)),
'data_source': fields.Str(required=False, validate=validate.Length(max=64)),
'data_source': fields.Str(required=False, validate=validate.OneOf(consts.DATA_SOURCE_LIST)),
'business_type': fields.Str(required=True, validate=validate.OneOf(consts.BUSINESS_TYPE_LIST)),
'upload_time_start': fields.Date(required=False),
'upload_time_end': fields.Date(required=False),
......@@ -100,6 +100,8 @@ class UploadDocView(GenericView, DocHandler):
document = args.get('document')
business_type = document.get('businessType')
application_id = application_data.get('applicationId')
document_scheme = document.get('documentScheme')
data_source = document.get('dataSource')
try:
# 1. 上传信息记录
record = UploadDocRecords.objects.create(
......@@ -110,9 +112,9 @@ class UploadDocView(GenericView, DocHandler):
guarantor_1=applicant_data.get('guarantor1Name'),
guarantor_2=applicant_data.get('guarantor2Name'),
document_name=document.get('documentName'),
document_scheme=document.get('documentScheme'),
document_scheme=document_scheme,
business_type=business_type,
data_source=document.get('dataSource'),
data_source=data_source,
upload_finish_time=document.get('uploadFinishTime'),
)
except IntegrityError as e:
......@@ -130,17 +132,17 @@ class UploadDocView(GenericView, DocHandler):
guarantor_1=applicant_data.get('guarantor1Name'),
guarantor_2=applicant_data.get('guarantor2Name'),
document_name=document.get('documentName'),
document_scheme=document.get('documentScheme'),
data_source=document.get('dataSource'),
document_scheme=self.fix_scheme(document_scheme),
data_source=self.fix_data_source(data_source),
upload_finish_time=document.get('uploadFinishTime'),
)
# 3. 选择队列进入
is_priority = PriorityApplication.objects.filter(application_id=application_id, on_off=True).exists()
value = ['{0}_{1}'.format(prefix, doc.id)]
redis_res = rh.enqueue(value, is_priority)
self.running_log.info('[doc upload success] [args={0}] [record_id={1}] [prefix={2}] [doc_id={3}] '
tasks = ['{0}{1}{2}'.format(prefix, consts.SPLIT_STR, doc.id)]
enqueue_res = rh.enqueue(tasks, is_priority)
self.running_log.info('[doc upload success] [args={0}] [record_id={1}] [business_type={2}] [doc_id={3}] '
'[is_priority={4}] [enqueue_res={5}]'.format(args, record.id, prefix, doc.id,
is_priority, redis_res))
is_priority, enqueue_res))
return response.ok()
post.openapi_doc = '''
......@@ -174,7 +176,8 @@ class PriorityDocView(GenericView, DocHandler):
application_id = application_info.get('APPLICATION_ID')
submit_datetime = application_info.get('SUBMIT_DATETIME')
entity = application_info.get('ENTITY')
submit_datetime = timezone.make_naive(submit_datetime, timezone.get_current_timezone())
if submit_datetime.utcoffset() is not None:
submit_datetime = timezone.make_naive(submit_datetime, timezone.get_current_timezone())
GCAPRecords.objects.create(
entity=entity,
status=application_info.get('STATUS'),
......@@ -190,14 +193,14 @@ class PriorityDocView(GenericView, DocHandler):
doc_class, prefix = self.get_doc_class(entity)
doc_ids = doc_class.objects.filter(application_id=application_id,
status=DocStatus.INIT.value).values_list('id', flat=True)
task_str_list = ['{0}_{1}'.format(prefix, doc_id) for doc_id in doc_ids]
if not task_str_list:
tasks_list = ['{0}{1}{2}'.format(prefix, consts.SPLIT_STR, doc_id) for doc_id in doc_ids]
if not tasks_list:
self.running_log.info(
'[priority doc success] [args={0}] [task_str_list={1}]'.format(args, task_str_list))
'[priority doc success] [args={0}]'.format(args))
else:
enqueue_res = rh.enqueue(task_str_list, is_priority=True)
self.running_log.info('[priority doc success] [args={0}] [task_str_list={1}] [enqueue_res={2}]'.format(
args, task_str_list, enqueue_res))
enqueue_res = rh.enqueue(tasks_list, is_priority=True)
self.running_log.info('[priority doc success] [args={0}] [tasks_list={1}] [enqueue_res={2}]'.format(
args, tasks_list, enqueue_res))
return response.ok()
post.openapi_doc = '''
......@@ -268,7 +271,7 @@ class DocView(GenericView, DocHandler):
@use_args(upload_pdf_args, location='files')
def post(self, request, args):
# 1. 上传信息记录
const_str = '手工单'
const_str = consts.FIXED_APPLICATION_ID
metadata_version_id = str(int(time.time()))
upload_finish_time = timezone.now()
document_scheme = random.choice(consts.DOC_SCHEME_LIST)
......@@ -305,8 +308,8 @@ class DocView(GenericView, DocHandler):
)
# 3. 选择队列进入
is_priority = False
value = ['{0}_{1}'.format(prefix, doc.id)]
redis_res = rh.enqueue(value, is_priority)
tasks = ['{0}{1}{2}'.format(prefix, consts.SPLIT_STR, doc.id)]
enqueue_res = rh.enqueue(tasks, is_priority)
pdf_file = args.get('pdf_file')
save_dir_path = os.path.join(conf.DATA_DIR, business_type, str(doc.id))
......@@ -314,7 +317,7 @@ class DocView(GenericView, DocHandler):
os.makedirs(save_dir_path, exist_ok=True)
file_write(pdf_file, save_file_path)
self.running_log.info('[mock doc upload success] [args={0}] [record_id={1}] [prefix={2}] [doc_id={3}] '
self.running_log.info('[mock doc upload success] [args={0}] [record_id={1}] [business_type={2}] [doc_id={3}] '
'[is_priority={4}] [enqueue_res={5}]'.format(args, record.id, prefix, doc.id,
is_priority, redis_res))
is_priority, enqueue_res))
return response.ok()
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!