37ca9589 by 周伟奇

change data folder

1 parent fe7d3a71
......@@ -29,8 +29,10 @@ sftp-config.json
*.sqlite3
conf/*
data/*
ocr/*
# 脚本
src/*.sh
test*
folder_ocr_process.py
\ No newline at end of file
......
......@@ -16,6 +16,9 @@ SPLIT_STR = '_'
BUSINESS_TYPE_LIST = [HIL_PREFIX, AFC_PREFIX]
HIL_SET = {'HIL', 'HIl', 'HiL', 'Hil', 'hIL', 'hIl', 'hiL', 'hil', 'CO00002'}
PRIORITY_WORDS = {'muw', 'MUW'}
TMP_DIR_NAME = 'tmp'
# -------EDMS相关---------------------------------------------------------------------------------------------------
SESSION_PREFIX = 'FHLSID'
......
......@@ -373,7 +373,7 @@ class Command(BaseCommand, LoggerMixin):
try:
# 2. 从EDMS获取PDF文件
doc_data_path = os.path.join(self.data_dir, business_type, str(doc.id))
doc_data_path = os.path.join(self.data_dir, business_type, consts.TMP_DIR_NAME, str(doc.id))
os.makedirs(doc_data_path, exist_ok=True)
pdf_path = os.path.join(doc_data_path, '{0}.pdf'.format(doc.id))
self.pdf_download(doc, pdf_path)
......@@ -579,7 +579,7 @@ class Command(BaseCommand, LoggerMixin):
# 4.2 重构Excel文件
doc, business_type = self.get_doc_object(task_str)
doc_data_path = os.path.join(self.data_dir, business_type, str(doc.id))
doc_data_path = os.path.join(self.data_dir, business_type, consts.TMP_DIR_NAME, str(doc.id))
excel_path = os.path.join(doc_data_path, '{0}.xlsx'.format(doc.id))
img_save_path = os.path.join(doc_data_path, 'img')
# wb.save(src_excel_path)
......
......@@ -303,8 +303,8 @@ class BSWorkbook(Workbook):
period_idx = len(res_str) - 3
if res_str[period_idx] == '.' and res_str[period_idx - 1] in {',', '.'}: # 364,.92 364..92
res_str = '{0}{1}'.format(res_str[:period_idx - 1], res_str[period_idx:])
elif res_str[period_idx] == ',':
if res_str[period_idx - 1] in {',', '.'}: # 364.,92 364,,92
elif res_str[period_idx] in {',', ':', ':'}:
if res_str[period_idx - 1] in {',', '.', ':', ':'}: # 364.,92 364,,92
pre_idx = period_idx - 1
else: # 364,92
pre_idx = period_idx
......
......@@ -178,6 +178,7 @@ class PriorityDocView(GenericView, DocHandler):
application_info = args.get('APPLICATION_INFORMATION')
application_id = application_info.get('APPLICATION_ID')
submit_datetime = application_info.get('SUBMIT_DATETIME')
intermediate_decision = application_info.get('INTERMEDIATE_DECISION')
entity = application_info.get('ENTITY')
if submit_datetime.utcoffset() is not None:
submit_datetime = timezone.make_naive(submit_datetime, timezone.get_current_timezone())
......@@ -187,9 +188,14 @@ class PriorityDocView(GenericView, DocHandler):
rating=application_info.get('RATING'),
application_id=application_id,
application_version=application_info.get('APPLICATION_VERSION'),
intermediate_decision=application_info.get('INTERMEDIATE_DECISION'),
intermediate_decision=intermediate_decision,
submit_datetime=submit_datetime,
)
if intermediate_decision not in consts.PRIORITY_WORDS:
self.running_log.info('[priority doc skip] [args={0}]'.format(args))
return response.ok()
_, created = PriorityApplication.objects.update_or_create(application_id=application_id,
defaults={'on_off': True})
if created:
......@@ -282,7 +288,8 @@ class DocView(GenericView, DocHandler):
if not pdf_file.name.endswith('pdf'):
self.invalid_params(msg='invalid params: not a PDF file')
tmp_save_path = os.path.join(conf.DATA_DIR, '{0}.pdf'.format(metadata_version_id))
business_type = random.choice(consts.BUSINESS_TYPE_LIST)
tmp_save_path = os.path.join(conf.DATA_DIR, business_type, '{0}.pdf'.format(metadata_version_id))
file_write(pdf_file, tmp_save_path)
try:
......@@ -302,7 +309,6 @@ class DocView(GenericView, DocHandler):
upload_finish_time = timezone.now()
document_scheme = random.choice(consts.DOC_SCHEME_LIST)
data_source = random.choice(consts.DATA_SOURCE_LIST)
business_type = random.choice(consts.BUSINESS_TYPE_LIST)
UploadDocRecords.objects.create(
metadata_version_id=metadata_version_id,
application_id=application_id,
......@@ -333,7 +339,7 @@ class DocView(GenericView, DocHandler):
)
# 3.pdf文件移动
save_dir_path = os.path.join(conf.DATA_DIR, business_type, str(doc.id))
save_dir_path = os.path.join(conf.DATA_DIR, business_type, consts.TMP_DIR_NAME, str(doc.id))
save_file_path = os.path.join(save_dir_path, '{0}.pdf'.format(doc.id))
os.makedirs(save_dir_path, exist_ok=True)
# file_write(pdf_file, save_file_path)
......
......@@ -455,6 +455,30 @@ afc_sql = """
on afc_doc (start_time, end_time);
"""
keywords_sql = """
INSERT INTO afc.dbo.keywords (keyword, type, update_time, create_time) VALUES
(N'利息', 0, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'),
(N'结息', 0, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'),
(N'工资', 1, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'),
(N'代发', 1, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'),
(N'养老保险', 1, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'),
(N'奖金', 1, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'),
(N'理财', 2, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'),
(N'赎回', 2, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'),
(N'微信', 2, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'),
(N'支付宝', 2, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'),
(N'财付通', 2, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'),
(N'放款', 2, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'),
(N'还款', 2, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'),
(N'贷款', 2, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'),
(N'银证转账', 2, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'),
(N'银行卡户名(姓名)', 2, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'),
(N'转账/转账', 3, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'),
(N'商品/线下', 3, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'),
(N'转账', 3, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'),
(N'二维码收款', 3, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000');
"""
hil_cnxn = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server};', autocommit=True)
hil_cursor = hil_cnxn.cursor()
......@@ -468,6 +492,7 @@ afc_cnxn = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server};', autocommit=
afc_cursor = afc_cnxn.cursor()
afc_cursor.execute(afc_sql)
afc_cursor.execute(keywords_sql)
afc_cursor.close()
afc_cnxn.close()
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!