change data folder
Showing
6 changed files
with
45 additions
and
9 deletions
... | @@ -29,8 +29,10 @@ sftp-config.json | ... | @@ -29,8 +29,10 @@ sftp-config.json |
29 | *.sqlite3 | 29 | *.sqlite3 |
30 | conf/* | 30 | conf/* |
31 | data/* | 31 | data/* |
32 | ocr/* | ||
32 | 33 | ||
33 | # 脚本 | 34 | # 脚本 |
34 | src/*.sh | 35 | src/*.sh |
35 | 36 | ||
36 | test* | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
37 | test* | ||
38 | folder_ocr_process.py | ||
... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
... | @@ -16,6 +16,9 @@ SPLIT_STR = '_' | ... | @@ -16,6 +16,9 @@ SPLIT_STR = '_' |
16 | BUSINESS_TYPE_LIST = [HIL_PREFIX, AFC_PREFIX] | 16 | BUSINESS_TYPE_LIST = [HIL_PREFIX, AFC_PREFIX] |
17 | HIL_SET = {'HIL', 'HIl', 'HiL', 'Hil', 'hIL', 'hIl', 'hiL', 'hil', 'CO00002'} | 17 | HIL_SET = {'HIL', 'HIl', 'HiL', 'Hil', 'hIL', 'hIl', 'hiL', 'hil', 'CO00002'} |
18 | 18 | ||
19 | PRIORITY_WORDS = {'muw', 'MUW'} | ||
20 | TMP_DIR_NAME = 'tmp' | ||
21 | |||
19 | # -------EDMS相关--------------------------------------------------------------------------------------------------- | 22 | # -------EDMS相关--------------------------------------------------------------------------------------------------- |
20 | 23 | ||
21 | SESSION_PREFIX = 'FHLSID' | 24 | SESSION_PREFIX = 'FHLSID' | ... | ... |
... | @@ -373,7 +373,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -373,7 +373,7 @@ class Command(BaseCommand, LoggerMixin): |
373 | 373 | ||
374 | try: | 374 | try: |
375 | # 2. 从EDMS获取PDF文件 | 375 | # 2. 从EDMS获取PDF文件 |
376 | doc_data_path = os.path.join(self.data_dir, business_type, str(doc.id)) | 376 | doc_data_path = os.path.join(self.data_dir, business_type, consts.TMP_DIR_NAME, str(doc.id)) |
377 | os.makedirs(doc_data_path, exist_ok=True) | 377 | os.makedirs(doc_data_path, exist_ok=True) |
378 | pdf_path = os.path.join(doc_data_path, '{0}.pdf'.format(doc.id)) | 378 | pdf_path = os.path.join(doc_data_path, '{0}.pdf'.format(doc.id)) |
379 | self.pdf_download(doc, pdf_path) | 379 | self.pdf_download(doc, pdf_path) |
... | @@ -579,7 +579,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -579,7 +579,7 @@ class Command(BaseCommand, LoggerMixin): |
579 | 579 | ||
580 | # 4.2 重构Excel文件 | 580 | # 4.2 重构Excel文件 |
581 | doc, business_type = self.get_doc_object(task_str) | 581 | doc, business_type = self.get_doc_object(task_str) |
582 | doc_data_path = os.path.join(self.data_dir, business_type, str(doc.id)) | 582 | doc_data_path = os.path.join(self.data_dir, business_type, consts.TMP_DIR_NAME, str(doc.id)) |
583 | excel_path = os.path.join(doc_data_path, '{0}.xlsx'.format(doc.id)) | 583 | excel_path = os.path.join(doc_data_path, '{0}.xlsx'.format(doc.id)) |
584 | img_save_path = os.path.join(doc_data_path, 'img') | 584 | img_save_path = os.path.join(doc_data_path, 'img') |
585 | # wb.save(src_excel_path) | 585 | # wb.save(src_excel_path) | ... | ... |
... | @@ -303,8 +303,8 @@ class BSWorkbook(Workbook): | ... | @@ -303,8 +303,8 @@ class BSWorkbook(Workbook): |
303 | period_idx = len(res_str) - 3 | 303 | period_idx = len(res_str) - 3 |
304 | if res_str[period_idx] == '.' and res_str[period_idx - 1] in {',', '.'}: # 364,.92 364..92 | 304 | if res_str[period_idx] == '.' and res_str[period_idx - 1] in {',', '.'}: # 364,.92 364..92 |
305 | res_str = '{0}{1}'.format(res_str[:period_idx - 1], res_str[period_idx:]) | 305 | res_str = '{0}{1}'.format(res_str[:period_idx - 1], res_str[period_idx:]) |
306 | elif res_str[period_idx] == ',': | 306 | elif res_str[period_idx] in {',', ':', ':'}: |
307 | if res_str[period_idx - 1] in {',', '.'}: # 364.,92 364,,92 | 307 | if res_str[period_idx - 1] in {',', '.', ':', ':'}: # 364.,92 364,,92 |
308 | pre_idx = period_idx - 1 | 308 | pre_idx = period_idx - 1 |
309 | else: # 364,92 | 309 | else: # 364,92 |
310 | pre_idx = period_idx | 310 | pre_idx = period_idx | ... | ... |
... | @@ -178,6 +178,7 @@ class PriorityDocView(GenericView, DocHandler): | ... | @@ -178,6 +178,7 @@ class PriorityDocView(GenericView, DocHandler): |
178 | application_info = args.get('APPLICATION_INFORMATION') | 178 | application_info = args.get('APPLICATION_INFORMATION') |
179 | application_id = application_info.get('APPLICATION_ID') | 179 | application_id = application_info.get('APPLICATION_ID') |
180 | submit_datetime = application_info.get('SUBMIT_DATETIME') | 180 | submit_datetime = application_info.get('SUBMIT_DATETIME') |
181 | intermediate_decision = application_info.get('INTERMEDIATE_DECISION') | ||
181 | entity = application_info.get('ENTITY') | 182 | entity = application_info.get('ENTITY') |
182 | if submit_datetime.utcoffset() is not None: | 183 | if submit_datetime.utcoffset() is not None: |
183 | submit_datetime = timezone.make_naive(submit_datetime, timezone.get_current_timezone()) | 184 | submit_datetime = timezone.make_naive(submit_datetime, timezone.get_current_timezone()) |
... | @@ -187,9 +188,14 @@ class PriorityDocView(GenericView, DocHandler): | ... | @@ -187,9 +188,14 @@ class PriorityDocView(GenericView, DocHandler): |
187 | rating=application_info.get('RATING'), | 188 | rating=application_info.get('RATING'), |
188 | application_id=application_id, | 189 | application_id=application_id, |
189 | application_version=application_info.get('APPLICATION_VERSION'), | 190 | application_version=application_info.get('APPLICATION_VERSION'), |
190 | intermediate_decision=application_info.get('INTERMEDIATE_DECISION'), | 191 | intermediate_decision=intermediate_decision, |
191 | submit_datetime=submit_datetime, | 192 | submit_datetime=submit_datetime, |
192 | ) | 193 | ) |
194 | |||
195 | if intermediate_decision not in consts.PRIORITY_WORDS: | ||
196 | self.running_log.info('[priority doc skip] [args={0}]'.format(args)) | ||
197 | return response.ok() | ||
198 | |||
193 | _, created = PriorityApplication.objects.update_or_create(application_id=application_id, | 199 | _, created = PriorityApplication.objects.update_or_create(application_id=application_id, |
194 | defaults={'on_off': True}) | 200 | defaults={'on_off': True}) |
195 | if created: | 201 | if created: |
... | @@ -282,7 +288,8 @@ class DocView(GenericView, DocHandler): | ... | @@ -282,7 +288,8 @@ class DocView(GenericView, DocHandler): |
282 | if not pdf_file.name.endswith('pdf'): | 288 | if not pdf_file.name.endswith('pdf'): |
283 | self.invalid_params(msg='invalid params: not a PDF file') | 289 | self.invalid_params(msg='invalid params: not a PDF file') |
284 | 290 | ||
285 | tmp_save_path = os.path.join(conf.DATA_DIR, '{0}.pdf'.format(metadata_version_id)) | 291 | business_type = random.choice(consts.BUSINESS_TYPE_LIST) |
292 | tmp_save_path = os.path.join(conf.DATA_DIR, business_type, '{0}.pdf'.format(metadata_version_id)) | ||
286 | file_write(pdf_file, tmp_save_path) | 293 | file_write(pdf_file, tmp_save_path) |
287 | 294 | ||
288 | try: | 295 | try: |
... | @@ -302,7 +309,6 @@ class DocView(GenericView, DocHandler): | ... | @@ -302,7 +309,6 @@ class DocView(GenericView, DocHandler): |
302 | upload_finish_time = timezone.now() | 309 | upload_finish_time = timezone.now() |
303 | document_scheme = random.choice(consts.DOC_SCHEME_LIST) | 310 | document_scheme = random.choice(consts.DOC_SCHEME_LIST) |
304 | data_source = random.choice(consts.DATA_SOURCE_LIST) | 311 | data_source = random.choice(consts.DATA_SOURCE_LIST) |
305 | business_type = random.choice(consts.BUSINESS_TYPE_LIST) | ||
306 | UploadDocRecords.objects.create( | 312 | UploadDocRecords.objects.create( |
307 | metadata_version_id=metadata_version_id, | 313 | metadata_version_id=metadata_version_id, |
308 | application_id=application_id, | 314 | application_id=application_id, |
... | @@ -333,7 +339,7 @@ class DocView(GenericView, DocHandler): | ... | @@ -333,7 +339,7 @@ class DocView(GenericView, DocHandler): |
333 | ) | 339 | ) |
334 | 340 | ||
335 | # 3.pdf文件移动 | 341 | # 3.pdf文件移动 |
336 | save_dir_path = os.path.join(conf.DATA_DIR, business_type, str(doc.id)) | 342 | save_dir_path = os.path.join(conf.DATA_DIR, business_type, consts.TMP_DIR_NAME, str(doc.id)) |
337 | save_file_path = os.path.join(save_dir_path, '{0}.pdf'.format(doc.id)) | 343 | save_file_path = os.path.join(save_dir_path, '{0}.pdf'.format(doc.id)) |
338 | os.makedirs(save_dir_path, exist_ok=True) | 344 | os.makedirs(save_dir_path, exist_ok=True) |
339 | # file_write(pdf_file, save_file_path) | 345 | # file_write(pdf_file, save_file_path) | ... | ... |
... | @@ -455,6 +455,30 @@ afc_sql = """ | ... | @@ -455,6 +455,30 @@ afc_sql = """ |
455 | on afc_doc (start_time, end_time); | 455 | on afc_doc (start_time, end_time); |
456 | """ | 456 | """ |
457 | 457 | ||
458 | keywords_sql = """ | ||
459 | INSERT INTO afc.dbo.keywords (keyword, type, update_time, create_time) VALUES | ||
460 | (N'利息', 0, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'), | ||
461 | (N'结息', 0, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'), | ||
462 | (N'工资', 1, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'), | ||
463 | (N'代发', 1, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'), | ||
464 | (N'养老保险', 1, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'), | ||
465 | (N'奖金', 1, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'), | ||
466 | (N'理财', 2, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'), | ||
467 | (N'赎回', 2, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'), | ||
468 | (N'微信', 2, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'), | ||
469 | (N'支付宝', 2, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'), | ||
470 | (N'财付通', 2, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'), | ||
471 | (N'放款', 2, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'), | ||
472 | (N'还款', 2, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'), | ||
473 | (N'贷款', 2, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'), | ||
474 | (N'银证转账', 2, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'), | ||
475 | (N'银行卡户名(姓名)', 2, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'), | ||
476 | (N'转账/转账', 3, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'), | ||
477 | (N'商品/线下', 3, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'), | ||
478 | (N'转账', 3, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'), | ||
479 | (N'二维码收款', 3, N'2020-11-09 16:14:58.000', N'2020-11-09 16:14:59.000'); | ||
480 | """ | ||
481 | |||
458 | hil_cnxn = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server};', autocommit=True) | 482 | hil_cnxn = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server};', autocommit=True) |
459 | 483 | ||
460 | hil_cursor = hil_cnxn.cursor() | 484 | hil_cursor = hil_cnxn.cursor() |
... | @@ -468,6 +492,7 @@ afc_cnxn = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server};', autocommit= | ... | @@ -468,6 +492,7 @@ afc_cnxn = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server};', autocommit= |
468 | 492 | ||
469 | afc_cursor = afc_cnxn.cursor() | 493 | afc_cursor = afc_cnxn.cursor() |
470 | afc_cursor.execute(afc_sql) | 494 | afc_cursor.execute(afc_sql) |
495 | afc_cursor.execute(keywords_sql) | ||
471 | 496 | ||
472 | afc_cursor.close() | 497 | afc_cursor.close() |
473 | afc_cnxn.close() | 498 | afc_cnxn.close() | ... | ... |
-
Please register or sign in to post a comment