554d2f4f by 周伟奇

Merge branch 'feature/bs_excel' into feature/0611

2 parents 174d2005 b17b3c65
...@@ -99,6 +99,7 @@ RES_FAILED = '识别失败' ...@@ -99,6 +99,7 @@ RES_FAILED = '识别失败'
99 RES_FAILED_1 = '识别失败(阶段1)' 99 RES_FAILED_1 = '识别失败(阶段1)'
100 RES_FAILED_2 = '识别失败(阶段2)' 100 RES_FAILED_2 = '识别失败(阶段2)'
101 RES_FAILED_3 = '识别失败(阶段1数据格式错误)' 101 RES_FAILED_3 = '识别失败(阶段1数据格式错误)'
102 RES_FAILED_SET = {RES_FAILED, RES_FAILED_1, RES_FAILED_2, RES_FAILED_3}
102 103
103 CARD_RATIO = 0.9 104 CARD_RATIO = 0.9
104 UNKNOWN_CARD = '未知卡号' 105 UNKNOWN_CARD = '未知卡号'
......
...@@ -768,7 +768,9 @@ class Command(BaseCommand, LoggerMixin): ...@@ -768,7 +768,9 @@ class Command(BaseCommand, LoggerMixin):
768 type=KeywordsType.LOAN.value, on_off=True).values_list('keyword', flat=True) 768 type=KeywordsType.LOAN.value, on_off=True).values_list('keyword', flat=True)
769 wechat_keyword = Keywords.objects.filter( 769 wechat_keyword = Keywords.objects.filter(
770 type=KeywordsType.ALI_WECHART.value, on_off=True).values_list('keyword', flat=True) 770 type=KeywordsType.ALI_WECHART.value, on_off=True).values_list('keyword', flat=True)
771 wb = BSWorkbook(interest_keyword, salary_keyword, loan_keyword, wechat_keyword) 771 repayments_keyword = Keywords.objects.filter(
772 type=KeywordsType.REPAYMENTS.value, on_off=True).values_list('keyword', flat=True)
773 wb = BSWorkbook(interest_keyword, salary_keyword, loan_keyword, wechat_keyword, repayments_keyword)
772 for img_path, res in ocr_1_res.items(): 774 for img_path, res in ocr_1_res.items():
773 pno, ino = self.parse_img_path(img_path) 775 pno, ino = self.parse_img_path(img_path)
774 part_idx = 1 776 part_idx = 1
......
...@@ -19,6 +19,7 @@ class KeywordsType(NamedEnum): ...@@ -19,6 +19,7 @@ class KeywordsType(NamedEnum):
19 SALARY = (1, '薪资') 19 SALARY = (1, '薪资')
20 LOAN = (2, '贷款') 20 LOAN = (2, '贷款')
21 ALI_WECHART = (3, '微信/支付宝') 21 ALI_WECHART = (3, '微信/支付宝')
22 REPAYMENTS = (4, '还款')
22 23
23 24
24 class RequestTeam(NamedEnum): 25 class RequestTeam(NamedEnum):
......
...@@ -7,24 +7,27 @@ from pandas._libs import tslib ...@@ -7,24 +7,27 @@ from pandas._libs import tslib
7 from pandas._libs.tslibs.nattype import NaTType 7 from pandas._libs.tslibs.nattype import NaTType
8 from pandas.core.indexes.datetimes import DatetimeIndex 8 from pandas.core.indexes.datetimes import DatetimeIndex
9 from openpyxl import Workbook 9 from openpyxl import Workbook
10 from openpyxl.styles import Border, Side, PatternFill, numbers 10 from openpyxl.styles import PatternFill, numbers
11 from openpyxl.utils import get_column_letter 11 from openpyxl.utils import get_column_letter
12 from apps.doc import consts 12 from apps.doc import consts
13 13
14 14
15 class BSWorkbook(Workbook): 15 class BSWorkbook(Workbook):
16 16
17 def __init__(self, interest_keyword, salary_keyword, loan_keyword, wechat_keyword, *args, **kwargs): 17 def __init__(self, interest_keyword, salary_keyword, loan_keyword, wechat_keyword, repayments_keyword, *args, **kwargs):
18 super().__init__(*args, **kwargs) 18 super().__init__(*args, **kwargs)
19 locale.setlocale(locale.LC_NUMERIC, 'en_US.UTF-8') 19 locale.setlocale(locale.LC_NUMERIC, 'en_US.UTF-8')
20 self.meta_sheet_title = '关键信息提取和展示' 20 self.meta_sheet_title = 'Key info'
21 self.blank_row = (None,) 21 self.blank_row = (None,)
22 self.code_header = ('页数', '电子回单验证码') 22 self.code_header = ('页数', '电子回单验证码')
23 self.date_header = ('打印时间', '起始日期', '终止日期', '流水区间结果') 23 self.date_header = ('打印时间', '起始日期', '终止日期', '流水区间结果')
24 self.keyword_header = ('关键词', '记账日期', '金额') 24 self.interest_keyword_header = ('结息关键词', '记账日期', '金额')
25 self.salary_keyword_header = ('收入关键词', '记账日期', '金额')
26 self.repayments_keyword_header = ('还款关键词', '记账日期', '金额')
25 self.interest_keyword = self.replace_newline(interest_keyword) 27 self.interest_keyword = self.replace_newline(interest_keyword)
26 self.salary_keyword = self.replace_newline(salary_keyword) 28 self.salary_keyword = self.replace_newline(salary_keyword)
27 self.loan_keyword = self.replace_newline(loan_keyword) 29 self.loan_keyword = self.replace_newline(loan_keyword)
30 self.repayments_keyword = self.replace_newline(repayments_keyword)
28 self.wechat_keyword = wechat_keyword 31 self.wechat_keyword = wechat_keyword
29 self.proof_res = ('对', '错') 32 self.proof_res = ('对', '错')
30 self.loan_fill = PatternFill("solid", fgColor="00FFCC00") 33 self.loan_fill = PatternFill("solid", fgColor="00FFCC00")
...@@ -45,7 +48,7 @@ class BSWorkbook(Workbook): ...@@ -45,7 +48,7 @@ class BSWorkbook(Workbook):
45 if not isinstance(card, str): 48 if not isinstance(card, str):
46 return consts.ERROR_CARD 49 return consts.ERROR_CARD
47 try: 50 try:
48 new_card = card.translate(consts.SHEET_TITLE_TRANS).strip()[-6:] 51 new_card = card.translate(consts.SHEET_TITLE_TRANS).strip()[-4:]
49 if len(new_card) == 0: 52 if len(new_card) == 0:
50 new_card = consts.ERROR_CARD 53 new_card = consts.ERROR_CARD
51 except Exception as e: 54 except Exception as e:
...@@ -307,13 +310,15 @@ class BSWorkbook(Workbook): ...@@ -307,13 +310,15 @@ class BSWorkbook(Workbook):
307 month_mapping.setdefault(item, []).append( 310 month_mapping.setdefault(item, []).append(
308 (ws.title, idx_list[i] + min_row, idx_list[i + 1] + min_row - 1, day_mean)) 311 (ws.title, idx_list[i] + min_row, idx_list[i + 1] + min_row - 1, day_mean))
309 312
310 def build_metadata_rows(self, confidence, code, print_time, start_date, end_date): 313 def build_metadata_rows(self, confidence, code, print_time, start_date, end_date, res_count_tuple):
311 if start_date is None or end_date is None: 314 if start_date is None or end_date is None:
312 timedelta = None 315 timedelta = None
313 else: 316 else:
314 timedelta = (end_date - start_date).days 317 timedelta = (end_date - start_date).days
315 metadata_rows = [ 318 metadata_rows = [
316 ('流水识别置信度', confidence), 319 ('流水识别置信度', confidence),
320 ('图片总数', res_count_tuple[0]),
321 ('识别成功', res_count_tuple[1]),
317 self.blank_row, 322 self.blank_row,
318 self.code_header, 323 self.code_header,
319 ] 324 ]
...@@ -323,13 +328,15 @@ class BSWorkbook(Workbook): ...@@ -323,13 +328,15 @@ class BSWorkbook(Workbook):
323 self.date_header, 328 self.date_header,
324 (print_time, start_date, end_date, timedelta), 329 (print_time, start_date, end_date, timedelta),
325 self.blank_row, 330 self.blank_row,
326 self.keyword_header] 331 self.interest_keyword_header]
327 ) 332 )
328 return metadata_rows 333 return metadata_rows
329 334
330 def build_meta_sheet(self, card, confidence, code, print_time, start_date, end_date): 335 def build_meta_sheet(self, role_name, card, confidence, code, print_time, start_date, end_date, res_count_tuple):
331 metadata_rows = self.build_metadata_rows(confidence, code, print_time, start_date, end_date) 336 metadata_rows = self.build_metadata_rows(confidence, code, print_time, start_date, end_date, res_count_tuple)
332 ms = self.create_sheet('{0}({1})'.format(self.meta_sheet_title, card)) 337 if not isinstance(role_name, str):
338 role_name = consts.UNKNOWN_ROLE
339 ms = self.create_sheet('{0}{1}({2})'.format(self.meta_sheet_title, role_name, card))
333 for row in metadata_rows: 340 for row in metadata_rows:
334 ms.append(row) 341 ms.append(row)
335 return ms 342 return ms
...@@ -398,7 +405,7 @@ class BSWorkbook(Workbook): ...@@ -398,7 +405,7 @@ class BSWorkbook(Workbook):
398 row_value[1] = '\n'.join(append_list) 405 row_value[1] = '\n'.join(append_list)
399 return row_value 406 return row_value
400 407
401 def build_month_sheet(self, ms, card, month_mapping, is_reverse, statistics_header_info, max_column, classify): 408 def build_month_sheet(self, ms, role_name, card, month_mapping, is_reverse, statistics_header_info, max_column, classify):
402 summary_cell_idx = statistics_header_info.get(consts.SUMMARY_KEY) 409 summary_cell_idx = statistics_header_info.get(consts.SUMMARY_KEY)
403 date_cell_idx = statistics_header_info.get(consts.DATE_KEY) 410 date_cell_idx = statistics_header_info.get(consts.DATE_KEY)
404 amount_cell_idx = statistics_header_info.get(consts.AMOUNT_KEY) # None or src or append 411 amount_cell_idx = statistics_header_info.get(consts.AMOUNT_KEY) # None or src or append
...@@ -412,15 +419,17 @@ class BSWorkbook(Workbook): ...@@ -412,15 +419,17 @@ class BSWorkbook(Workbook):
412 for i in range(max_column - src_header_len): 419 for i in range(max_column - src_header_len):
413 header.append(None) 420 header.append(None)
414 421
415 add_col = ['核对结果'] 422 add_col = ['核对结果', '合计']
416 if amount_cell_idx is None: 423 if amount_cell_idx is None:
417 if income_cell_idx is not None or outlay_cell_idx is not None: 424 if income_cell_idx is not None or outlay_cell_idx is not None:
418 add_col = ['金额', '核对结果'] 425 add_col = ['金额', '核对结果', '合计']
419 amount_cell_idx = len(header) 426 amount_cell_idx = len(header)
420 header.extend(add_col) 427 header.extend(add_col)
421 result_idx = len(header) - 1 428 result_idx = len(header) - 2
429 amount_sum_idx = len(header) - 1
422 430
423 tmp_ws = self.create_sheet('tmp_ws') 431 tmp_ws = self.create_sheet('tmp_ws')
432 tmp2_ws = self.create_sheet('tmp2_ws')
424 if classify in consts.ALI_WECHART_CLASSIFY: 433 if classify in consts.ALI_WECHART_CLASSIFY:
425 high_light_keyword = self.wechat_keyword 434 high_light_keyword = self.wechat_keyword
426 else: 435 else:
...@@ -444,7 +453,10 @@ class BSWorkbook(Workbook): ...@@ -444,7 +453,10 @@ class BSWorkbook(Workbook):
444 453
445 amount_mapping = {} 454 amount_mapping = {}
446 amount_fill_row = set() 455 amount_fill_row = set()
447 loan_fill_row = set() 456 fill_row = set()
457
458 # 添加筛选
459 new_ws.auto_filter.ref = 'A1:{0}{1}'.format(get_column_letter(new_ws.max_column), new_ws.max_row)
448 460
449 for rows in new_ws.iter_rows(min_row=2): 461 for rows in new_ws.iter_rows(min_row=2):
450 length = len(rows) 462 length = len(rows)
...@@ -466,7 +478,15 @@ class BSWorkbook(Workbook): ...@@ -466,7 +478,15 @@ class BSWorkbook(Workbook):
466 478
467 # 贷款关键词高亮 479 # 贷款关键词高亮
468 if summary_cell is not None and summary_cell_value in high_light_keyword: 480 if summary_cell is not None and summary_cell_value in high_light_keyword:
469 loan_fill_row.add(summary_cell.row) 481 fill_row.add(summary_cell.row)
482
483 # 户名高亮
484 row_num = 2
485 for cell in rows:
486 row_num = cell.row
487 if cell.value == role_name:
488 fill_row.add(summary_cell.row)
489 break
470 490
471 # 3.3.余额转数值 491 # 3.3.余额转数值
472 over_success = False 492 over_success = False
...@@ -505,16 +525,17 @@ class BSWorkbook(Workbook): ...@@ -505,16 +525,17 @@ class BSWorkbook(Workbook):
505 amount_cell.number_format = numbers.FORMAT_NUMBER_00 525 amount_cell.number_format = numbers.FORMAT_NUMBER_00
506 if date_cell is not None and isinstance(date_cell_value, str): 526 if date_cell is not None and isinstance(date_cell_value, str):
507 same_amount_mapping = amount_mapping.get(date_cell_value[:10], {}) 527 same_amount_mapping = amount_mapping.get(date_cell_value[:10], {})
508 fill_rows = same_amount_mapping.get(-amount_cell.value) 528 fill_rows_set = same_amount_mapping.get(-amount_cell.value, set())
509 if fill_rows: 529 if len(fill_rows_set) > 0:
510 amount_fill_row.add(amount_cell.row) 530 amount_fill_row.add(amount_cell.row)
511 amount_fill_row.update(fill_rows) 531 amount_fill_row.add(fill_rows_set.pop())
532 else:
512 amount_mapping.setdefault(date_cell_value[:10], {}).setdefault( 533 amount_mapping.setdefault(date_cell_value[:10], {}).setdefault(
513 amount_cell.value, []).append(amount_cell.row) 534 amount_cell.value, set()).add(amount_cell.row)
514 535
515 # 3.5.核对结果 536 # 3.5.核对结果
516 if amount_success and over_success and amount_cell.row > 2:
517 amount_col_letter = get_column_letter(amount_cell_idx + 1) 537 amount_col_letter = get_column_letter(amount_cell_idx + 1)
538 if amount_success and over_success and amount_cell.row > 2:
518 over_col_letter = get_column_letter(over_cell_idx + 1) 539 over_col_letter = get_column_letter(over_cell_idx + 1)
519 if is_reverse: 540 if is_reverse:
520 rows[result_idx].value = '=IF({2}{0}=ROUND(SUM({2}{1},{3}{0}),4), "{4}", "{5}")'.format( 541 rows[result_idx].value = '=IF({2}{0}=ROUND(SUM({2}{1},{3}{0}),4), "{4}", "{5}")'.format(
...@@ -523,6 +544,11 @@ class BSWorkbook(Workbook): ...@@ -523,6 +544,11 @@ class BSWorkbook(Workbook):
523 rows[result_idx].value = '=IF({2}{0}=ROUND(SUM({2}{1},{3}{0}),4), "{4}", "{5}")'.format( 544 rows[result_idx].value = '=IF({2}{0}=ROUND(SUM({2}{1},{3}{0}),4), "{4}", "{5}")'.format(
524 amount_cell.row, amount_cell.row - 1, over_col_letter, amount_col_letter, *self.proof_res) 545 amount_cell.row, amount_cell.row - 1, over_col_letter, amount_col_letter, *self.proof_res)
525 546
547 # 3.6 金额合计列
548 amount_sum_letter = get_column_letter(amount_sum_idx + 1)
549 rows[amount_sum_idx].value = '=SUM({0}{1},{2}{3})'.format(
550 amount_sum_letter, row_num - 1, amount_col_letter, row_num)
551
526 # 3.2.提取信息、高亮 552 # 3.2.提取信息、高亮
527 # row = summary_cell.row 553 # row = summary_cell.row
528 if summary_cell is not None: 554 if summary_cell is not None:
...@@ -534,13 +560,17 @@ class BSWorkbook(Workbook): ...@@ -534,13 +560,17 @@ class BSWorkbook(Workbook):
534 elif summary_cell_value in self.salary_keyword: 560 elif summary_cell_value in self.salary_keyword:
535 new_amount_cell_value = None if amount_cell is None else amount_cell.value 561 new_amount_cell_value = None if amount_cell is None else amount_cell.value
536 tmp_ws.append((summary_cell_value, date_cell_value, new_amount_cell_value)) 562 tmp_ws.append((summary_cell_value, date_cell_value, new_amount_cell_value))
563 # 关键词3提取至临时表
564 elif summary_cell_value in self.repayments_keyword:
565 new_amount_cell_value = None if amount_cell is None else amount_cell.value
566 tmp2_ws.append((summary_cell_value, date_cell_value, new_amount_cell_value))
537 # 贷款关键词高亮 567 # 贷款关键词高亮
538 # elif summary_cell_value in high_light_keyword: 568 # elif summary_cell_value in high_light_keyword:
539 # summary_cell.fill = self.amount_fill 569 # summary_cell.fill = self.amount_fill
540 # if amount_cell is not None: 570 # if amount_cell is not None:
541 # amount_cell.fill = self.amount_fill 571 # amount_cell.fill = self.amount_fill
542 572
543 for row in loan_fill_row: 573 for row in fill_row:
544 for cell in new_ws[row]: 574 for cell in new_ws[row]:
545 cell.fill = self.amount_fill 575 cell.fill = self.amount_fill
546 576
...@@ -555,12 +585,19 @@ class BSWorkbook(Workbook): ...@@ -555,12 +585,19 @@ class BSWorkbook(Workbook):
555 585
556 # 关键词2信息提取 586 # 关键词2信息提取
557 ms.append(self.blank_row) 587 ms.append(self.blank_row)
558 ms.append(self.keyword_header) 588 ms.append(self.salary_keyword_header)
559 for row in tmp_ws.iter_rows(values_only=True): 589 for row in tmp_ws.iter_rows(values_only=True):
560 ms.append(row) 590 ms.append(row)
561 self.remove(tmp_ws) 591 self.remove(tmp_ws)
562 592
563 def bs_rebuild(self, bs_summary): 593 # 关键词3信息提取
594 ms.append(self.blank_row)
595 ms.append(self.repayments_keyword_header)
596 for row in tmp2_ws.iter_rows(values_only=True):
597 ms.append(row)
598 self.remove(tmp2_ws)
599
600 def bs_rebuild(self, bs_summary, res_count_tuple):
564 # bs_summary = { 601 # bs_summary = {
565 # '卡号': { 602 # '卡号': {
566 # 'classify': 0, 603 # 'classify': 0,
...@@ -578,6 +615,7 @@ class BSWorkbook(Workbook): ...@@ -578,6 +615,7 @@ class BSWorkbook(Workbook):
578 new_card = self.get_new_card(card) 615 new_card = self.get_new_card(card)
579 # 1.原表表头收集、按照月份分割 616 # 1.原表表头收集、按照月份分割
580 # 1.1 总结首行信息 617 # 1.1 总结首行信息
618 role_name = summary.get('role', consts.UNKNOWN_ROLE)
581 classify = summary.get('classify', 0) 619 classify = summary.get('classify', 0)
582 sheet_header_info = {} 620 sheet_header_info = {}
583 header_info = {} 621 header_info = {}
...@@ -614,12 +652,14 @@ class BSWorkbook(Workbook): ...@@ -614,12 +652,14 @@ class BSWorkbook(Workbook):
614 652
615 # 2.元信息提取表 653 # 2.元信息提取表
616 confidence = self.get_confidence(max_find_count) 654 confidence = self.get_confidence(max_find_count)
617 ms = self.build_meta_sheet(new_card, 655 ms = self.build_meta_sheet(role_name,
656 new_card,
618 confidence, 657 confidence,
619 summary.get('code'), 658 summary.get('code'),
620 summary.get('print_time'), 659 summary.get('print_time'),
621 start_date, 660 start_date,
622 end_date) 661 end_date,
662 res_count_tuple)
623 663
624 # 3.创建月份表、提取/高亮关键行 664 # 3.创建月份表、提取/高亮关键行
625 # 倒序处理 665 # 倒序处理
...@@ -627,7 +667,7 @@ class BSWorkbook(Workbook): ...@@ -627,7 +667,7 @@ class BSWorkbook(Workbook):
627 for month_list in month_mapping.values(): 667 for month_list in month_mapping.values():
628 month_list.sort(key=lambda x: x[-1], reverse=is_reverse) 668 month_list.sort(key=lambda x: x[-1], reverse=is_reverse)
629 669
630 self.build_month_sheet(ms, new_card, month_mapping, is_reverse, statistics_header_info, max_column, classify) 670 self.build_month_sheet(ms, role_name, new_card, month_mapping, is_reverse, statistics_header_info, max_column, classify)
631 671
632 # 4.删除原表 672 # 4.删除原表
633 for sheet in sheets_list: 673 for sheet in sheets_list:
...@@ -701,21 +741,35 @@ class BSWorkbook(Workbook): ...@@ -701,21 +741,35 @@ class BSWorkbook(Workbook):
701 res_list.sort(key=lambda x: (x[0], x[1], x[2])) 741 res_list.sort(key=lambda x: (x[0], x[1], x[2]))
702 ws = self.create_sheet(consts.RES_SHEET_NAME) 742 ws = self.create_sheet(consts.RES_SHEET_NAME)
703 ws.append(consts.RES_SHEET_HEADER) 743 ws.append(consts.RES_SHEET_HEADER)
744 success_count = 0
704 for res_tuple in res_list: 745 for res_tuple in res_list:
746 if res_tuple[-1] not in consts.RES_FAILED_SET:
747 success_count += 1
705 ws.append(res_tuple) 748 ws.append(res_tuple)
749 return len(res_list), success_count
750 else:
751 return 0, 0
752
753 def move_res_sheet(self):
754 sheet = self.get_sheet_by_name(consts.RES_SHEET_NAME)
755 idx = self._sheets.index(sheet)
756 del self._sheets[idx]
757 self._sheets.append(sheet)
706 758
707 def remove_base_sheet(self): 759 def remove_base_sheet(self):
708 if len(self.sheetnames) > 1: 760 if len(self.sheetnames) > 1:
709 self.remove(self.get_sheet_by_name('Sheet')) 761 self.remove(self.get_sheet_by_name('Sheet'))
710 762
711 def rebuild(self, bs_summary, license_summary, res_list, document_scheme): 763 def rebuild(self, bs_summary, license_summary, res_list, document_scheme):
764 res_count_tuple = self.res_sheet(res_list)
765
712 count_list = [(consts.MODEL_FIELD_BS, len(bs_summary))] 766 count_list = [(consts.MODEL_FIELD_BS, len(bs_summary))]
713 if document_scheme == consts.DOC_SCHEME_LIST[1]: 767 if document_scheme == consts.DOC_SCHEME_LIST[1]:
714 self.license_rebuild(license_summary, document_scheme, count_list) 768 self.license_rebuild(license_summary, document_scheme, count_list)
715 self.bs_rebuild(bs_summary) 769 self.bs_rebuild(bs_summary, res_count_tuple)
716 else: 770 else:
717 self.bs_rebuild(bs_summary) 771 self.bs_rebuild(bs_summary, res_count_tuple)
718 self.license_rebuild(license_summary, document_scheme, count_list) 772 self.license_rebuild(license_summary, document_scheme, count_list)
719 self.res_sheet(res_list) 773 self.move_res_sheet()
720 self.remove_base_sheet() 774 self.remove_base_sheet()
721 return count_list 775 return count_list
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!