554d2f4f by 周伟奇

Merge branch 'feature/bs_excel' into feature/0611

2 parents 174d2005 b17b3c65
......@@ -99,6 +99,7 @@ RES_FAILED = '识别失败'
RES_FAILED_1 = '识别失败(阶段1)'
RES_FAILED_2 = '识别失败(阶段2)'
RES_FAILED_3 = '识别失败(阶段1数据格式错误)'
RES_FAILED_SET = {RES_FAILED, RES_FAILED_1, RES_FAILED_2, RES_FAILED_3}
CARD_RATIO = 0.9
UNKNOWN_CARD = '未知卡号'
......
......@@ -768,7 +768,9 @@ class Command(BaseCommand, LoggerMixin):
type=KeywordsType.LOAN.value, on_off=True).values_list('keyword', flat=True)
wechat_keyword = Keywords.objects.filter(
type=KeywordsType.ALI_WECHART.value, on_off=True).values_list('keyword', flat=True)
wb = BSWorkbook(interest_keyword, salary_keyword, loan_keyword, wechat_keyword)
repayments_keyword = Keywords.objects.filter(
type=KeywordsType.REPAYMENTS.value, on_off=True).values_list('keyword', flat=True)
wb = BSWorkbook(interest_keyword, salary_keyword, loan_keyword, wechat_keyword, repayments_keyword)
for img_path, res in ocr_1_res.items():
pno, ino = self.parse_img_path(img_path)
part_idx = 1
......
......@@ -19,6 +19,7 @@ class KeywordsType(NamedEnum):
SALARY = (1, '薪资')
LOAN = (2, '贷款')
ALI_WECHART = (3, '微信/支付宝')
REPAYMENTS = (4, '还款')
class RequestTeam(NamedEnum):
......
......@@ -7,24 +7,27 @@ from pandas._libs import tslib
from pandas._libs.tslibs.nattype import NaTType
from pandas.core.indexes.datetimes import DatetimeIndex
from openpyxl import Workbook
from openpyxl.styles import Border, Side, PatternFill, numbers
from openpyxl.styles import PatternFill, numbers
from openpyxl.utils import get_column_letter
from apps.doc import consts
class BSWorkbook(Workbook):
def __init__(self, interest_keyword, salary_keyword, loan_keyword, wechat_keyword, *args, **kwargs):
def __init__(self, interest_keyword, salary_keyword, loan_keyword, wechat_keyword, repayments_keyword, *args, **kwargs):
super().__init__(*args, **kwargs)
locale.setlocale(locale.LC_NUMERIC, 'en_US.UTF-8')
self.meta_sheet_title = '关键信息提取和展示'
self.meta_sheet_title = 'Key info'
self.blank_row = (None,)
self.code_header = ('页数', '电子回单验证码')
self.date_header = ('打印时间', '起始日期', '终止日期', '流水区间结果')
self.keyword_header = ('关键词', '记账日期', '金额')
self.interest_keyword_header = ('结息关键词', '记账日期', '金额')
self.salary_keyword_header = ('收入关键词', '记账日期', '金额')
self.repayments_keyword_header = ('还款关键词', '记账日期', '金额')
self.interest_keyword = self.replace_newline(interest_keyword)
self.salary_keyword = self.replace_newline(salary_keyword)
self.loan_keyword = self.replace_newline(loan_keyword)
self.repayments_keyword = self.replace_newline(repayments_keyword)
self.wechat_keyword = wechat_keyword
self.proof_res = ('对', '错')
self.loan_fill = PatternFill("solid", fgColor="00FFCC00")
......@@ -45,7 +48,7 @@ class BSWorkbook(Workbook):
if not isinstance(card, str):
return consts.ERROR_CARD
try:
new_card = card.translate(consts.SHEET_TITLE_TRANS).strip()[-6:]
new_card = card.translate(consts.SHEET_TITLE_TRANS).strip()[-4:]
if len(new_card) == 0:
new_card = consts.ERROR_CARD
except Exception as e:
......@@ -307,13 +310,15 @@ class BSWorkbook(Workbook):
month_mapping.setdefault(item, []).append(
(ws.title, idx_list[i] + min_row, idx_list[i + 1] + min_row - 1, day_mean))
def build_metadata_rows(self, confidence, code, print_time, start_date, end_date):
def build_metadata_rows(self, confidence, code, print_time, start_date, end_date, res_count_tuple):
if start_date is None or end_date is None:
timedelta = None
else:
timedelta = (end_date - start_date).days
metadata_rows = [
('流水识别置信度', confidence),
('图片总数', res_count_tuple[0]),
('识别成功', res_count_tuple[1]),
self.blank_row,
self.code_header,
]
......@@ -323,13 +328,15 @@ class BSWorkbook(Workbook):
self.date_header,
(print_time, start_date, end_date, timedelta),
self.blank_row,
self.keyword_header]
self.interest_keyword_header]
)
return metadata_rows
def build_meta_sheet(self, card, confidence, code, print_time, start_date, end_date):
metadata_rows = self.build_metadata_rows(confidence, code, print_time, start_date, end_date)
ms = self.create_sheet('{0}({1})'.format(self.meta_sheet_title, card))
def build_meta_sheet(self, role_name, card, confidence, code, print_time, start_date, end_date, res_count_tuple):
metadata_rows = self.build_metadata_rows(confidence, code, print_time, start_date, end_date, res_count_tuple)
if not isinstance(role_name, str):
role_name = consts.UNKNOWN_ROLE
ms = self.create_sheet('{0}{1}({2})'.format(self.meta_sheet_title, role_name, card))
for row in metadata_rows:
ms.append(row)
return ms
......@@ -398,7 +405,7 @@ class BSWorkbook(Workbook):
row_value[1] = '\n'.join(append_list)
return row_value
def build_month_sheet(self, ms, card, month_mapping, is_reverse, statistics_header_info, max_column, classify):
def build_month_sheet(self, ms, role_name, card, month_mapping, is_reverse, statistics_header_info, max_column, classify):
summary_cell_idx = statistics_header_info.get(consts.SUMMARY_KEY)
date_cell_idx = statistics_header_info.get(consts.DATE_KEY)
amount_cell_idx = statistics_header_info.get(consts.AMOUNT_KEY) # None or src or append
......@@ -412,15 +419,17 @@ class BSWorkbook(Workbook):
for i in range(max_column - src_header_len):
header.append(None)
add_col = ['核对结果']
add_col = ['核对结果', '合计']
if amount_cell_idx is None:
if income_cell_idx is not None or outlay_cell_idx is not None:
add_col = ['金额', '核对结果']
add_col = ['金额', '核对结果', '合计']
amount_cell_idx = len(header)
header.extend(add_col)
result_idx = len(header) - 1
result_idx = len(header) - 2
amount_sum_idx = len(header) - 1
tmp_ws = self.create_sheet('tmp_ws')
tmp2_ws = self.create_sheet('tmp2_ws')
if classify in consts.ALI_WECHART_CLASSIFY:
high_light_keyword = self.wechat_keyword
else:
......@@ -444,7 +453,10 @@ class BSWorkbook(Workbook):
amount_mapping = {}
amount_fill_row = set()
loan_fill_row = set()
fill_row = set()
# 添加筛选
new_ws.auto_filter.ref = 'A1:{0}{1}'.format(get_column_letter(new_ws.max_column), new_ws.max_row)
for rows in new_ws.iter_rows(min_row=2):
length = len(rows)
......@@ -466,7 +478,15 @@ class BSWorkbook(Workbook):
# 贷款关键词高亮
if summary_cell is not None and summary_cell_value in high_light_keyword:
loan_fill_row.add(summary_cell.row)
fill_row.add(summary_cell.row)
# 户名高亮
row_num = 2
for cell in rows:
row_num = cell.row
if cell.value == role_name:
fill_row.add(summary_cell.row)
break
# 3.3.余额转数值
over_success = False
......@@ -505,16 +525,17 @@ class BSWorkbook(Workbook):
amount_cell.number_format = numbers.FORMAT_NUMBER_00
if date_cell is not None and isinstance(date_cell_value, str):
same_amount_mapping = amount_mapping.get(date_cell_value[:10], {})
fill_rows = same_amount_mapping.get(-amount_cell.value)
if fill_rows:
fill_rows_set = same_amount_mapping.get(-amount_cell.value, set())
if len(fill_rows_set) > 0:
amount_fill_row.add(amount_cell.row)
amount_fill_row.update(fill_rows)
amount_mapping.setdefault(date_cell_value[:10], {}).setdefault(
amount_cell.value, []).append(amount_cell.row)
amount_fill_row.add(fill_rows_set.pop())
else:
amount_mapping.setdefault(date_cell_value[:10], {}).setdefault(
amount_cell.value, set()).add(amount_cell.row)
# 3.5.核对结果
amount_col_letter = get_column_letter(amount_cell_idx + 1)
if amount_success and over_success and amount_cell.row > 2:
amount_col_letter = get_column_letter(amount_cell_idx + 1)
over_col_letter = get_column_letter(over_cell_idx + 1)
if is_reverse:
rows[result_idx].value = '=IF({2}{0}=ROUND(SUM({2}{1},{3}{0}),4), "{4}", "{5}")'.format(
......@@ -523,6 +544,11 @@ class BSWorkbook(Workbook):
rows[result_idx].value = '=IF({2}{0}=ROUND(SUM({2}{1},{3}{0}),4), "{4}", "{5}")'.format(
amount_cell.row, amount_cell.row - 1, over_col_letter, amount_col_letter, *self.proof_res)
# 3.6 金额合计列
amount_sum_letter = get_column_letter(amount_sum_idx + 1)
rows[amount_sum_idx].value = '=SUM({0}{1},{2}{3})'.format(
amount_sum_letter, row_num - 1, amount_col_letter, row_num)
# 3.2.提取信息、高亮
# row = summary_cell.row
if summary_cell is not None:
......@@ -534,13 +560,17 @@ class BSWorkbook(Workbook):
elif summary_cell_value in self.salary_keyword:
new_amount_cell_value = None if amount_cell is None else amount_cell.value
tmp_ws.append((summary_cell_value, date_cell_value, new_amount_cell_value))
# 关键词3提取至临时表
elif summary_cell_value in self.repayments_keyword:
new_amount_cell_value = None if amount_cell is None else amount_cell.value
tmp2_ws.append((summary_cell_value, date_cell_value, new_amount_cell_value))
# 贷款关键词高亮
# elif summary_cell_value in high_light_keyword:
# summary_cell.fill = self.amount_fill
# if amount_cell is not None:
# amount_cell.fill = self.amount_fill
for row in loan_fill_row:
for row in fill_row:
for cell in new_ws[row]:
cell.fill = self.amount_fill
......@@ -555,12 +585,19 @@ class BSWorkbook(Workbook):
# 关键词2信息提取
ms.append(self.blank_row)
ms.append(self.keyword_header)
ms.append(self.salary_keyword_header)
for row in tmp_ws.iter_rows(values_only=True):
ms.append(row)
self.remove(tmp_ws)
def bs_rebuild(self, bs_summary):
# 关键词3信息提取
ms.append(self.blank_row)
ms.append(self.repayments_keyword_header)
for row in tmp2_ws.iter_rows(values_only=True):
ms.append(row)
self.remove(tmp2_ws)
def bs_rebuild(self, bs_summary, res_count_tuple):
# bs_summary = {
# '卡号': {
# 'classify': 0,
......@@ -578,6 +615,7 @@ class BSWorkbook(Workbook):
new_card = self.get_new_card(card)
# 1.原表表头收集、按照月份分割
# 1.1 总结首行信息
role_name = summary.get('role', consts.UNKNOWN_ROLE)
classify = summary.get('classify', 0)
sheet_header_info = {}
header_info = {}
......@@ -614,12 +652,14 @@ class BSWorkbook(Workbook):
# 2.元信息提取表
confidence = self.get_confidence(max_find_count)
ms = self.build_meta_sheet(new_card,
ms = self.build_meta_sheet(role_name,
new_card,
confidence,
summary.get('code'),
summary.get('print_time'),
start_date,
end_date)
end_date,
res_count_tuple)
# 3.创建月份表、提取/高亮关键行
# 倒序处理
......@@ -627,7 +667,7 @@ class BSWorkbook(Workbook):
for month_list in month_mapping.values():
month_list.sort(key=lambda x: x[-1], reverse=is_reverse)
self.build_month_sheet(ms, new_card, month_mapping, is_reverse, statistics_header_info, max_column, classify)
self.build_month_sheet(ms, role_name, new_card, month_mapping, is_reverse, statistics_header_info, max_column, classify)
# 4.删除原表
for sheet in sheets_list:
......@@ -701,21 +741,35 @@ class BSWorkbook(Workbook):
res_list.sort(key=lambda x: (x[0], x[1], x[2]))
ws = self.create_sheet(consts.RES_SHEET_NAME)
ws.append(consts.RES_SHEET_HEADER)
success_count = 0
for res_tuple in res_list:
if res_tuple[-1] not in consts.RES_FAILED_SET:
success_count += 1
ws.append(res_tuple)
return len(res_list), success_count
else:
return 0, 0
def move_res_sheet(self):
sheet = self.get_sheet_by_name(consts.RES_SHEET_NAME)
idx = self._sheets.index(sheet)
del self._sheets[idx]
self._sheets.append(sheet)
def remove_base_sheet(self):
if len(self.sheetnames) > 1:
self.remove(self.get_sheet_by_name('Sheet'))
def rebuild(self, bs_summary, license_summary, res_list, document_scheme):
res_count_tuple = self.res_sheet(res_list)
count_list = [(consts.MODEL_FIELD_BS, len(bs_summary))]
if document_scheme == consts.DOC_SCHEME_LIST[1]:
self.license_rebuild(license_summary, document_scheme, count_list)
self.bs_rebuild(bs_summary)
self.bs_rebuild(bs_summary, res_count_tuple)
else:
self.bs_rebuild(bs_summary)
self.bs_rebuild(bs_summary, res_count_tuple)
self.license_rebuild(license_summary, document_scheme, count_list)
self.res_sheet(res_list)
self.move_res_sheet()
self.remove_base_sheet()
return count_list
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!