cbe57bd2 by chenyao

add jira-4562 content, add income_keywords

1 parent a883d3ea
...@@ -2532,4 +2532,17 @@ FSM_ACTIVITED_STATUS = { ...@@ -2532,4 +2532,17 @@ FSM_ACTIVITED_STATUS = {
2532 "APIPN": "Activated-Invoice Passed-Non PT", 2532 "APIPN": "Activated-Invoice Passed-Non PT",
2533 "APIPP": "Activated-Invoice Passed-PT Doc Required", 2533 "APIPP": "Activated-Invoice Passed-PT Doc Required",
2534 "APARD": "Activated-Review done", 2534 "APARD": "Activated-Review done",
2535 }
2536
2537 # Jira-4562 - 银行流水首页提取关键词
2538 INCOME_KEYWORDS_LIST = ["养老金", "社保", "代发工资", "工资入账", "奖金", "养老保险", "代发", "工资"]
2539 INCOME_KEYWORDS_DICT = {
2540 "养老金": "yanglaojin",
2541 "社保": "shebao",
2542 "代发工资": "daifagongzi",
2543 "工资入账": "gongziruzhang",
2544 "奖金": "jiangjin",
2545 "养老保险": "yanglaobaoxian",
2546 "代发": "daifa",
2547 "工资": "gongzi"
2535 } 2548 }
...\ No newline at end of file ...\ No newline at end of file
......
...@@ -177,7 +177,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -177,7 +177,7 @@ class Command(BaseCommand, LoggerMixin):
177 # raise EDMSException(edms_exc) 177 # raise EDMSException(edms_exc)
178 # self.online_log.info('{0} [edms download success] [pdf_path={1}]'.format(self.log_base, pdf_path)) 178 # self.online_log.info('{0} [edms download success] [pdf_path={1}]'.format(self.log_base, pdf_path))
179 179
180 def bs_process(self, wb, ocr_data, bs_summary, unknown_summary, classify, res_list, pno, ino, part_idx): 180 def bs_process(self, wb, ocr_data, bs_summary, unknown_summary, classify, res_list, pno, ino, part_idx, income_keywords_dictionary):
181 sheets = ocr_data.get('data', []) 181 sheets = ocr_data.get('data', [])
182 if not sheets: 182 if not sheets:
183 res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_EMPTY)) 183 res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_EMPTY))
...@@ -196,6 +196,10 @@ class Command(BaseCommand, LoggerMixin): ...@@ -196,6 +196,10 @@ class Command(BaseCommand, LoggerMixin):
196 c1 = cell.get('start_column') 196 c1 = cell.get('start_column')
197 r1 = cell.get('start_row') 197 r1 = cell.get('start_row')
198 words = cell.get('words') 198 words = cell.get('words')
199 if words is not None:
200 if words in consts.INCOME_KEYWORDS_LIST:
201 if consts.INCOME_KEYWORDS_DICT.get(words) not in income_keywords_dictionary["income_keywords"]:
202 income_keywords_dictionary["income_keywords"].append(consts.INCOME_KEYWORDS_DICT.setdefault(words, ""))
199 ws.cell(row=r1 + 1, column=c1 + 1, value=words) 203 ws.cell(row=r1 + 1, column=c1 + 1, value=words)
200 204
201 # 真伪 205 # 真伪
...@@ -921,7 +925,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -921,7 +925,7 @@ class Command(BaseCommand, LoggerMixin):
921 summary['role'] = self.get_most(summary['role']) 925 summary['role'] = self.get_most(summary['role'])
922 return bs_summary 926 return bs_summary
923 927
924 def rebuild_bs(self, bs_summary): 928 def rebuild_bs(self, bs_summary, income_keywords_dictionary):
925 # bs_summary = { 929 # bs_summary = {
926 # '卡号': { 930 # '卡号': {
927 # 'classify': 0, 931 # 'classify': 0,
...@@ -935,7 +939,24 @@ class Command(BaseCommand, LoggerMixin): ...@@ -935,7 +939,24 @@ class Command(BaseCommand, LoggerMixin):
935 # 'sheet': ['sheet_name'] 939 # 'sheet': ['sheet_name']
936 # } 940 # }
937 # } 941 # }
942
943 # income_keywords_dictionary = {
944 # 'income_keywords': [ # 其中 0-8 个
945 # 'yanglaojin',
946 # "shebao",
947 # "daifagongzi",
948 # "gongziruzhang",
949 # "jiangjin",
950 # "yanglaobaoxian",
951 # "daifa",
952 # "gongzi"
953 # ]
954 # }
938 res = [] 955 res = []
956 income_keywords_list = income_keywords_dictionary.get('income_keywords', [])
957 income_filtered_keywords = [keyword_str for keyword_str in income_keywords_list if keyword_str]
958 income_keywords_str = ",".join(income_filtered_keywords)
959
939 for bs_info in bs_summary.values(): 960 for bs_info in bs_summary.values():
940 try: 961 try:
941 print_date = bs_info.get('print_time', '').strftime("%Y-%m-%d") 962 print_date = bs_info.get('print_time', '').strftime("%Y-%m-%d")
...@@ -950,7 +971,8 @@ class Command(BaseCommand, LoggerMixin): ...@@ -950,7 +971,8 @@ class Command(BaseCommand, LoggerMixin):
950 'print_time': print_date, 971 'print_time': print_date,
951 'timedelta': bs_info.get('timedelta', ''), 972 'timedelta': bs_info.get('timedelta', ''),
952 'verify': bs_info.get('verify_res_ebank', True), 973 'verify': bs_info.get('verify_res_ebank', True),
953 'e_bank': bs_info.get('e_bank', False) 974 'e_bank': bs_info.get('e_bank', False),
975 'income_keywords': income_keywords_str
954 } 976 }
955 ) 977 )
956 return res 978 return res
...@@ -1724,6 +1746,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -1724,6 +1746,7 @@ class Command(BaseCommand, LoggerMixin):
1724 license_summary = {} 1746 license_summary = {}
1725 contract_result = {} 1747 contract_result = {}
1726 contract_result_compare = {} 1748 contract_result_compare = {}
1749 income_keywords_dictionary = {"income_keywords": []}
1727 res_list = [] 1750 res_list = []
1728 interest_keyword = Keywords.objects.filter( 1751 interest_keyword = Keywords.objects.filter(
1729 type=KeywordsType.INTEREST.value, on_off=True).values_list('keyword', flat=True) 1752 type=KeywordsType.INTEREST.value, on_off=True).values_list('keyword', flat=True)
...@@ -1879,7 +1902,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -1879,7 +1902,7 @@ class Command(BaseCommand, LoggerMixin):
1879 ino, part_idx, img_path, contract_result_compare) 1902 ino, part_idx, img_path, contract_result_compare)
1880 else: # 流水处理 1903 else: # 流水处理
1881 bs_classify_set.add(classify) 1904 bs_classify_set.add(classify)
1882 self.bs_process(wb, ocr_data, bs_summary, unknown_summary, classify, res_list, pno, ino, part_idx) 1905 self.bs_process(wb, ocr_data, bs_summary, unknown_summary, classify, res_list, pno, ino, part_idx, income_keywords_dictionary)
1883 else: 1906 else:
1884 res_list.append((pno, ino, part_idx, consts.RES_FAILED_1)) 1907 res_list.append((pno, ino, part_idx, consts.RES_FAILED_1))
1885 self.online_log.info('{0} [ocr_1 res error] [img={1}]'.format(self.log_base, img_path)) 1908 self.online_log.info('{0} [ocr_1 res error] [img={1}]'.format(self.log_base, img_path))
...@@ -2035,7 +2058,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -2035,7 +2058,7 @@ class Command(BaseCommand, LoggerMixin):
2035 license_summary, contract_result_compare)) 2058 license_summary, contract_result_compare))
2036 self.rebuild_contract(license_summary, contract_result_compare) 2059 self.rebuild_contract(license_summary, contract_result_compare)
2037 2060
2038 bs_rebuild = self.rebuild_bs(merged_bs_summary) 2061 bs_rebuild = self.rebuild_bs(merged_bs_summary, income_keywords_dictionary)
2039 if len(bs_rebuild) > 0: 2062 if len(bs_rebuild) > 0:
2040 license_summary[consts.BS_CLASSIFY] = bs_rebuild 2063 license_summary[consts.BS_CLASSIFY] = bs_rebuild
2041 2064
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!