37ca9589 by 周伟奇

change data folder

1 parent fe7d3a71
...@@ -29,8 +29,10 @@ sftp-config.json ...@@ -29,8 +29,10 @@ sftp-config.json
29 *.sqlite3 29 *.sqlite3
30 conf/* 30 conf/*
31 data/* 31 data/*
32 ocr/*
32 33
33 # 脚本 34 # 脚本
34 src/*.sh 35 src/*.sh
35 36
36 test*
...\ No newline at end of file ...\ No newline at end of file
37 test*
38 folder_ocr_process.py
...\ No newline at end of file ...\ No newline at end of file
......
...@@ -16,6 +16,9 @@ SPLIT_STR = '_' ...@@ -16,6 +16,9 @@ SPLIT_STR = '_'
16 BUSINESS_TYPE_LIST = [HIL_PREFIX, AFC_PREFIX] 16 BUSINESS_TYPE_LIST = [HIL_PREFIX, AFC_PREFIX]
17 HIL_SET = {'HIL', 'HIl', 'HiL', 'Hil', 'hIL', 'hIl', 'hiL', 'hil', 'CO00002'} 17 HIL_SET = {'HIL', 'HIl', 'HiL', 'Hil', 'hIL', 'hIl', 'hiL', 'hil', 'CO00002'}
18 18
19 PRIORITY_WORDS = {'muw', 'MUW'}
20 TMP_DIR_NAME = 'tmp'
21
19 # -------EDMS相关--------------------------------------------------------------------------------------------------- 22 # -------EDMS相关---------------------------------------------------------------------------------------------------
20 23
21 SESSION_PREFIX = 'FHLSID' 24 SESSION_PREFIX = 'FHLSID'
......
...@@ -373,7 +373,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -373,7 +373,7 @@ class Command(BaseCommand, LoggerMixin):
373 373
374 try: 374 try:
375 # 2. 从EDMS获取PDF文件 375 # 2. 从EDMS获取PDF文件
376 doc_data_path = os.path.join(self.data_dir, business_type, str(doc.id)) 376 doc_data_path = os.path.join(self.data_dir, business_type, consts.TMP_DIR_NAME, str(doc.id))
377 os.makedirs(doc_data_path, exist_ok=True) 377 os.makedirs(doc_data_path, exist_ok=True)
378 pdf_path = os.path.join(doc_data_path, '{0}.pdf'.format(doc.id)) 378 pdf_path = os.path.join(doc_data_path, '{0}.pdf'.format(doc.id))
379 self.pdf_download(doc, pdf_path) 379 self.pdf_download(doc, pdf_path)
...@@ -579,7 +579,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -579,7 +579,7 @@ class Command(BaseCommand, LoggerMixin):
579 579
580 # 4.2 重构Excel文件 580 # 4.2 重构Excel文件
581 doc, business_type = self.get_doc_object(task_str) 581 doc, business_type = self.get_doc_object(task_str)
582 doc_data_path = os.path.join(self.data_dir, business_type, str(doc.id)) 582 doc_data_path = os.path.join(self.data_dir, business_type, consts.TMP_DIR_NAME, str(doc.id))
583 excel_path = os.path.join(doc_data_path, '{0}.xlsx'.format(doc.id)) 583 excel_path = os.path.join(doc_data_path, '{0}.xlsx'.format(doc.id))
584 img_save_path = os.path.join(doc_data_path, 'img') 584 img_save_path = os.path.join(doc_data_path, 'img')
585 # wb.save(src_excel_path) 585 # wb.save(src_excel_path)
......
...@@ -303,8 +303,8 @@ class BSWorkbook(Workbook): ...@@ -303,8 +303,8 @@ class BSWorkbook(Workbook):
303 period_idx = len(res_str) - 3 303 period_idx = len(res_str) - 3
304 if res_str[period_idx] == '.' and res_str[period_idx - 1] in {',', '.'}: # 364,.92 364..92 304 if res_str[period_idx] == '.' and res_str[period_idx - 1] in {',', '.'}: # 364,.92 364..92
305 res_str = '{0}{1}'.format(res_str[:period_idx - 1], res_str[period_idx:]) 305 res_str = '{0}{1}'.format(res_str[:period_idx - 1], res_str[period_idx:])
306 elif res_str[period_idx] == ',': 306 elif res_str[period_idx] in {',', ':', ':'}:
307 if res_str[period_idx - 1] in {',', '.'}: # 364.,92 364,,92 307 if res_str[period_idx - 1] in {',', '.', ':', ':'}: # 364.,92 364,,92
308 pre_idx = period_idx - 1 308 pre_idx = period_idx - 1
309 else: # 364,92 309 else: # 364,92
310 pre_idx = period_idx 310 pre_idx = period_idx
......
...@@ -178,6 +178,7 @@ class PriorityDocView(GenericView, DocHandler): ...@@ -178,6 +178,7 @@ class PriorityDocView(GenericView, DocHandler):
178 application_info = args.get('APPLICATION_INFORMATION') 178 application_info = args.get('APPLICATION_INFORMATION')
179 application_id = application_info.get('APPLICATION_ID') 179 application_id = application_info.get('APPLICATION_ID')
180 submit_datetime = application_info.get('SUBMIT_DATETIME') 180 submit_datetime = application_info.get('SUBMIT_DATETIME')
181 intermediate_decision = application_info.get('INTERMEDIATE_DECISION')
181 entity = application_info.get('ENTITY') 182 entity = application_info.get('ENTITY')
182 if submit_datetime.utcoffset() is not None: 183 if submit_datetime.utcoffset() is not None:
183 submit_datetime = timezone.make_naive(submit_datetime, timezone.get_current_timezone()) 184 submit_datetime = timezone.make_naive(submit_datetime, timezone.get_current_timezone())
...@@ -187,9 +188,14 @@ class PriorityDocView(GenericView, DocHandler): ...@@ -187,9 +188,14 @@ class PriorityDocView(GenericView, DocHandler):
187 rating=application_info.get('RATING'), 188 rating=application_info.get('RATING'),
188 application_id=application_id, 189 application_id=application_id,
189 application_version=application_info.get('APPLICATION_VERSION'), 190 application_version=application_info.get('APPLICATION_VERSION'),
190 intermediate_decision=application_info.get('INTERMEDIATE_DECISION'), 191 intermediate_decision=intermediate_decision,
191 submit_datetime=submit_datetime, 192 submit_datetime=submit_datetime,
192 ) 193 )
194
195 if intermediate_decision not in consts.PRIORITY_WORDS:
196 self.running_log.info('[priority doc skip] [args={0}]'.format(args))
197 return response.ok()
198
193 _, created = PriorityApplication.objects.update_or_create(application_id=application_id, 199 _, created = PriorityApplication.objects.update_or_create(application_id=application_id,
194 defaults={'on_off': True}) 200 defaults={'on_off': True})
195 if created: 201 if created:
...@@ -282,7 +288,8 @@ class DocView(GenericView, DocHandler): ...@@ -282,7 +288,8 @@ class DocView(GenericView, DocHandler):
282 if not pdf_file.name.endswith('pdf'): 288 if not pdf_file.name.endswith('pdf'):
283 self.invalid_params(msg='invalid params: not a PDF file') 289 self.invalid_params(msg='invalid params: not a PDF file')
284 290
285 tmp_save_path = os.path.join(conf.DATA_DIR, '{0}.pdf'.format(metadata_version_id)) 291 business_type = random.choice(consts.BUSINESS_TYPE_LIST)
292 tmp_save_path = os.path.join(conf.DATA_DIR, business_type, '{0}.pdf'.format(metadata_version_id))
286 file_write(pdf_file, tmp_save_path) 293 file_write(pdf_file, tmp_save_path)
287 294
288 try: 295 try:
...@@ -302,7 +309,6 @@ class DocView(GenericView, DocHandler): ...@@ -302,7 +309,6 @@ class DocView(GenericView, DocHandler):
302 upload_finish_time = timezone.now() 309 upload_finish_time = timezone.now()
303 document_scheme = random.choice(consts.DOC_SCHEME_LIST) 310 document_scheme = random.choice(consts.DOC_SCHEME_LIST)
304 data_source = random.choice(consts.DATA_SOURCE_LIST) 311 data_source = random.choice(consts.DATA_SOURCE_LIST)
305 business_type = random.choice(consts.BUSINESS_TYPE_LIST)
306 UploadDocRecords.objects.create( 312 UploadDocRecords.objects.create(
307 metadata_version_id=metadata_version_id, 313 metadata_version_id=metadata_version_id,
308 application_id=application_id, 314 application_id=application_id,
...@@ -333,7 +339,7 @@ class DocView(GenericView, DocHandler): ...@@ -333,7 +339,7 @@ class DocView(GenericView, DocHandler):
333 ) 339 )
334 340
335 # 3.pdf文件移动 341 # 3.pdf文件移动
336 save_dir_path = os.path.join(conf.DATA_DIR, business_type, str(doc.id)) 342 save_dir_path = os.path.join(conf.DATA_DIR, business_type, consts.TMP_DIR_NAME, str(doc.id))
337 save_file_path = os.path.join(save_dir_path, '{0}.pdf'.format(doc.id)) 343 save_file_path = os.path.join(save_dir_path, '{0}.pdf'.format(doc.id))
338 os.makedirs(save_dir_path, exist_ok=True) 344 os.makedirs(save_dir_path, exist_ok=True)
339 # file_write(pdf_file, save_file_path) 345 # file_write(pdf_file, save_file_path)
......
...@@ -455,6 +455,30 @@ afc_sql = """ ...@@ -455,6 +455,30 @@ afc_sql = """
455 on afc_doc (start_time, end_time); 455 on afc_doc (start_time, end_time);
456 """ 456 """
457 457
458 keywords_sql = """
459 INSERT INTO afc.dbo.keywords (keyword, type, update_time, create_time) VALUES
460 (N'利息', 0, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'),
461 (N'结息', 0, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'),
462 (N'工资', 1, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'),
463 (N'代发', 1, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'),
464 (N'养老保险', 1, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'),
465 (N'奖金', 1, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'),
466 (N'理财', 2, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'),
467 (N'赎回', 2, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'),
468 (N'微信', 2, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'),
469 (N'支付宝', 2, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'),
470 (N'财付通', 2, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'),
471 (N'放款', 2, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'),
472 (N'还款', 2, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'),
473 (N'贷款', 2, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'),
474 (N'银证转账', 2, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'),
475 (N'银行卡户名(姓名)', 2, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'),
476 (N'转账/转账', 3, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'),
477 (N'商品/线下', 3, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'),
478 (N'转账', 3, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'),
479 (N'二维码收款', 3, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000');
480 """
481
458 hil_cnxn = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server};', autocommit=True) 482 hil_cnxn = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server};', autocommit=True)
459 483
460 hil_cursor = hil_cnxn.cursor() 484 hil_cursor = hil_cnxn.cursor()
...@@ -468,6 +492,7 @@ afc_cnxn = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server};', autocommit= ...@@ -468,6 +492,7 @@ afc_cnxn = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server};', autocommit=
468 492
469 afc_cursor = afc_cnxn.cursor() 493 afc_cursor = afc_cnxn.cursor()
470 afc_cursor.execute(afc_sql) 494 afc_cursor.execute(afc_sql)
495 afc_cursor.execute(keywords_sql)
471 496
472 afc_cursor.close() 497 afc_cursor.close()
473 afc_cnxn.close() 498 afc_cnxn.close()
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!