add jira-4562 content, add income_keywords
Showing
2 changed files
with
41 additions
and
5 deletions
| ... | @@ -2533,3 +2533,16 @@ FSM_ACTIVITED_STATUS = { | ... | @@ -2533,3 +2533,16 @@ FSM_ACTIVITED_STATUS = { |
| 2533 | "APIPP": "Activated-Invoice Passed-PT Doc Required", | 2533 | "APIPP": "Activated-Invoice Passed-PT Doc Required", |
| 2534 | "APARD": "Activated-Review done", | 2534 | "APARD": "Activated-Review done", |
| 2535 | } | 2535 | } |
| 2536 | |||
| 2537 | # Jira-4562 - 银行流水首页提取关键词 | ||
| 2538 | INCOME_KEYWORDS_LIST = ["养老金", "社保", "代发工资", "工资入账", "奖金", "养老保险", "代发", "工资"] | ||
| 2539 | INCOME_KEYWORDS_DICT = { | ||
| 2540 | "养老金": "yanglaojin", | ||
| 2541 | "社保": "shebao", | ||
| 2542 | "代发工资": "daifagongzi", | ||
| 2543 | "工资入账": "gongziruzhang", | ||
| 2544 | "奖金": "jiangjin", | ||
| 2545 | "养老保险": "yanglaobaoxian", | ||
| 2546 | "代发": "daifa", | ||
| 2547 | "工资": "gongzi" | ||
| 2548 | } | ||
| ... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
| ... | @@ -177,7 +177,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -177,7 +177,7 @@ class Command(BaseCommand, LoggerMixin): |
| 177 | # raise EDMSException(edms_exc) | 177 | # raise EDMSException(edms_exc) |
| 178 | # self.online_log.info('{0} [edms download success] [pdf_path={1}]'.format(self.log_base, pdf_path)) | 178 | # self.online_log.info('{0} [edms download success] [pdf_path={1}]'.format(self.log_base, pdf_path)) |
| 179 | 179 | ||
| 180 | def bs_process(self, wb, ocr_data, bs_summary, unknown_summary, classify, res_list, pno, ino, part_idx): | 180 | def bs_process(self, wb, ocr_data, bs_summary, unknown_summary, classify, res_list, pno, ino, part_idx, income_keywords_dictionary): |
| 181 | sheets = ocr_data.get('data', []) | 181 | sheets = ocr_data.get('data', []) |
| 182 | if not sheets: | 182 | if not sheets: |
| 183 | res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_EMPTY)) | 183 | res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_EMPTY)) |
| ... | @@ -196,6 +196,10 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -196,6 +196,10 @@ class Command(BaseCommand, LoggerMixin): |
| 196 | c1 = cell.get('start_column') | 196 | c1 = cell.get('start_column') |
| 197 | r1 = cell.get('start_row') | 197 | r1 = cell.get('start_row') |
| 198 | words = cell.get('words') | 198 | words = cell.get('words') |
| 199 | if words is not None: | ||
| 200 | if words in consts.INCOME_KEYWORDS_LIST: | ||
| 201 | if consts.INCOME_KEYWORDS_DICT.get(words) not in income_keywords_dictionary["income_keywords"]: | ||
| 202 | income_keywords_dictionary["income_keywords"].append(consts.INCOME_KEYWORDS_DICT.setdefault(words, "")) | ||
| 199 | ws.cell(row=r1 + 1, column=c1 + 1, value=words) | 203 | ws.cell(row=r1 + 1, column=c1 + 1, value=words) |
| 200 | 204 | ||
| 201 | # 真伪 | 205 | # 真伪 |
| ... | @@ -921,7 +925,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -921,7 +925,7 @@ class Command(BaseCommand, LoggerMixin): |
| 921 | summary['role'] = self.get_most(summary['role']) | 925 | summary['role'] = self.get_most(summary['role']) |
| 922 | return bs_summary | 926 | return bs_summary |
| 923 | 927 | ||
| 924 | def rebuild_bs(self, bs_summary): | 928 | def rebuild_bs(self, bs_summary, income_keywords_dictionary): |
| 925 | # bs_summary = { | 929 | # bs_summary = { |
| 926 | # '卡号': { | 930 | # '卡号': { |
| 927 | # 'classify': 0, | 931 | # 'classify': 0, |
| ... | @@ -935,7 +939,24 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -935,7 +939,24 @@ class Command(BaseCommand, LoggerMixin): |
| 935 | # 'sheet': ['sheet_name'] | 939 | # 'sheet': ['sheet_name'] |
| 936 | # } | 940 | # } |
| 937 | # } | 941 | # } |
| 942 | |||
| 943 | # income_keywords_dictionary = { | ||
| 944 | # 'income_keywords': [ # 其中 0-8 个 | ||
| 945 | # 'yanglaojin', | ||
| 946 | # "shebao", | ||
| 947 | # "daifagongzi", | ||
| 948 | # "gongziruzhang", | ||
| 949 | # "jiangjin", | ||
| 950 | # "yanglaobaoxian", | ||
| 951 | # "daifa", | ||
| 952 | # "gongzi" | ||
| 953 | # ] | ||
| 954 | # } | ||
| 938 | res = [] | 955 | res = [] |
| 956 | income_keywords_list = income_keywords_dictionary.get('income_keywords', []) | ||
| 957 | income_filtered_keywords = [keyword_str for keyword_str in income_keywords_list if keyword_str] | ||
| 958 | income_keywords_str = ",".join(income_filtered_keywords) | ||
| 959 | |||
| 939 | for bs_info in bs_summary.values(): | 960 | for bs_info in bs_summary.values(): |
| 940 | try: | 961 | try: |
| 941 | print_date = bs_info.get('print_time', '').strftime("%Y-%m-%d") | 962 | print_date = bs_info.get('print_time', '').strftime("%Y-%m-%d") |
| ... | @@ -950,7 +971,8 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -950,7 +971,8 @@ class Command(BaseCommand, LoggerMixin): |
| 950 | 'print_time': print_date, | 971 | 'print_time': print_date, |
| 951 | 'timedelta': bs_info.get('timedelta', ''), | 972 | 'timedelta': bs_info.get('timedelta', ''), |
| 952 | 'verify': bs_info.get('verify_res_ebank', True), | 973 | 'verify': bs_info.get('verify_res_ebank', True), |
| 953 | 'e_bank': bs_info.get('e_bank', False) | 974 | 'e_bank': bs_info.get('e_bank', False), |
| 975 | 'income_keywords': income_keywords_str | ||
| 954 | } | 976 | } |
| 955 | ) | 977 | ) |
| 956 | return res | 978 | return res |
| ... | @@ -1724,6 +1746,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1724,6 +1746,7 @@ class Command(BaseCommand, LoggerMixin): |
| 1724 | license_summary = {} | 1746 | license_summary = {} |
| 1725 | contract_result = {} | 1747 | contract_result = {} |
| 1726 | contract_result_compare = {} | 1748 | contract_result_compare = {} |
| 1749 | income_keywords_dictionary = {"income_keywords": []} | ||
| 1727 | res_list = [] | 1750 | res_list = [] |
| 1728 | interest_keyword = Keywords.objects.filter( | 1751 | interest_keyword = Keywords.objects.filter( |
| 1729 | type=KeywordsType.INTEREST.value, on_off=True).values_list('keyword', flat=True) | 1752 | type=KeywordsType.INTEREST.value, on_off=True).values_list('keyword', flat=True) |
| ... | @@ -1879,7 +1902,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1879,7 +1902,7 @@ class Command(BaseCommand, LoggerMixin): |
| 1879 | ino, part_idx, img_path, contract_result_compare) | 1902 | ino, part_idx, img_path, contract_result_compare) |
| 1880 | else: # 流水处理 | 1903 | else: # 流水处理 |
| 1881 | bs_classify_set.add(classify) | 1904 | bs_classify_set.add(classify) |
| 1882 | self.bs_process(wb, ocr_data, bs_summary, unknown_summary, classify, res_list, pno, ino, part_idx) | 1905 | self.bs_process(wb, ocr_data, bs_summary, unknown_summary, classify, res_list, pno, ino, part_idx, income_keywords_dictionary) |
| 1883 | else: | 1906 | else: |
| 1884 | res_list.append((pno, ino, part_idx, consts.RES_FAILED_1)) | 1907 | res_list.append((pno, ino, part_idx, consts.RES_FAILED_1)) |
| 1885 | self.online_log.info('{0} [ocr_1 res error] [img={1}]'.format(self.log_base, img_path)) | 1908 | self.online_log.info('{0} [ocr_1 res error] [img={1}]'.format(self.log_base, img_path)) |
| ... | @@ -2035,7 +2058,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -2035,7 +2058,7 @@ class Command(BaseCommand, LoggerMixin): |
| 2035 | license_summary, contract_result_compare)) | 2058 | license_summary, contract_result_compare)) |
| 2036 | self.rebuild_contract(license_summary, contract_result_compare) | 2059 | self.rebuild_contract(license_summary, contract_result_compare) |
| 2037 | 2060 | ||
| 2038 | bs_rebuild = self.rebuild_bs(merged_bs_summary) | 2061 | bs_rebuild = self.rebuild_bs(merged_bs_summary, income_keywords_dictionary) |
| 2039 | if len(bs_rebuild) > 0: | 2062 | if len(bs_rebuild) > 0: |
| 2040 | license_summary[consts.BS_CLASSIFY] = bs_rebuild | 2063 | license_summary[consts.BS_CLASSIFY] = bs_rebuild |
| 2041 | 2064 | ... | ... |
-
Please register or sign in to post a comment