cbe57bd2 by chenyao

add jira-4562 content, add income_keywords

1 parent a883d3ea
......@@ -2533,3 +2533,16 @@ FSM_ACTIVITED_STATUS = {
"APIPP": "Activated-Invoice Passed-PT Doc Required",
"APARD": "Activated-Review done",
}
# Jira-4562 - 银行流水首页提取关键词
INCOME_KEYWORDS_LIST = ["养老金", "社保", "代发工资", "工资入账", "奖金", "养老保险", "代发", "工资"]
INCOME_KEYWORDS_DICT = {
"养老金": "yanglaojin",
"社保": "shebao",
"代发工资": "daifagongzi",
"工资入账": "gongziruzhang",
"奖金": "jiangjin",
"养老保险": "yanglaobaoxian",
"代发": "daifa",
"工资": "gongzi"
}
\ No newline at end of file
......
......@@ -177,7 +177,7 @@ class Command(BaseCommand, LoggerMixin):
# raise EDMSException(edms_exc)
# self.online_log.info('{0} [edms download success] [pdf_path={1}]'.format(self.log_base, pdf_path))
def bs_process(self, wb, ocr_data, bs_summary, unknown_summary, classify, res_list, pno, ino, part_idx):
def bs_process(self, wb, ocr_data, bs_summary, unknown_summary, classify, res_list, pno, ino, part_idx, income_keywords_dictionary):
sheets = ocr_data.get('data', [])
if not sheets:
res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_EMPTY))
......@@ -196,6 +196,10 @@ class Command(BaseCommand, LoggerMixin):
c1 = cell.get('start_column')
r1 = cell.get('start_row')
words = cell.get('words')
if words is not None:
if words in consts.INCOME_KEYWORDS_LIST:
if consts.INCOME_KEYWORDS_DICT.get(words) not in income_keywords_dictionary["income_keywords"]:
income_keywords_dictionary["income_keywords"].append(consts.INCOME_KEYWORDS_DICT.setdefault(words, ""))
ws.cell(row=r1 + 1, column=c1 + 1, value=words)
# 真伪
......@@ -921,7 +925,7 @@ class Command(BaseCommand, LoggerMixin):
summary['role'] = self.get_most(summary['role'])
return bs_summary
def rebuild_bs(self, bs_summary):
def rebuild_bs(self, bs_summary, income_keywords_dictionary):
# bs_summary = {
# '卡号': {
# 'classify': 0,
......@@ -935,7 +939,24 @@ class Command(BaseCommand, LoggerMixin):
# 'sheet': ['sheet_name']
# }
# }
# income_keywords_dictionary = {
# 'income_keywords': [ # 其中 0-8 个
# 'yanglaojin',
# "shebao",
# "daifagongzi",
# "gongziruzhang",
# "jiangjin",
# "yanglaobaoxian",
# "daifa",
# "gongzi"
# ]
# }
res = []
income_keywords_list = income_keywords_dictionary.get('income_keywords', [])
income_filtered_keywords = [keyword_str for keyword_str in income_keywords_list if keyword_str]
income_keywords_str = ",".join(income_filtered_keywords)
for bs_info in bs_summary.values():
try:
print_date = bs_info.get('print_time', '').strftime("%Y-%m-%d")
......@@ -950,7 +971,8 @@ class Command(BaseCommand, LoggerMixin):
'print_time': print_date,
'timedelta': bs_info.get('timedelta', ''),
'verify': bs_info.get('verify_res_ebank', True),
'e_bank': bs_info.get('e_bank', False)
'e_bank': bs_info.get('e_bank', False),
'income_keywords': income_keywords_str
}
)
return res
......@@ -1724,6 +1746,7 @@ class Command(BaseCommand, LoggerMixin):
license_summary = {}
contract_result = {}
contract_result_compare = {}
income_keywords_dictionary = {"income_keywords": []}
res_list = []
interest_keyword = Keywords.objects.filter(
type=KeywordsType.INTEREST.value, on_off=True).values_list('keyword', flat=True)
......@@ -1879,7 +1902,7 @@ class Command(BaseCommand, LoggerMixin):
ino, part_idx, img_path, contract_result_compare)
else: # 流水处理
bs_classify_set.add(classify)
self.bs_process(wb, ocr_data, bs_summary, unknown_summary, classify, res_list, pno, ino, part_idx)
self.bs_process(wb, ocr_data, bs_summary, unknown_summary, classify, res_list, pno, ino, part_idx, income_keywords_dictionary)
else:
res_list.append((pno, ino, part_idx, consts.RES_FAILED_1))
self.online_log.info('{0} [ocr_1 res error] [img={1}]'.format(self.log_base, img_path))
......@@ -2035,7 +2058,7 @@ class Command(BaseCommand, LoggerMixin):
license_summary, contract_result_compare))
self.rebuild_contract(license_summary, contract_result_compare)
bs_rebuild = self.rebuild_bs(merged_bs_summary)
bs_rebuild = self.rebuild_bs(merged_bs_summary, income_keywords_dictionary)
if len(bs_rebuild) > 0:
license_summary[consts.BS_CLASSIFY] = bs_rebuild
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!