add jira-4562 content, add income_keywords
Showing
2 changed files
with
41 additions
and
5 deletions
... | @@ -2532,4 +2532,17 @@ FSM_ACTIVITED_STATUS = { | ... | @@ -2532,4 +2532,17 @@ FSM_ACTIVITED_STATUS = { |
2532 | "APIPN": "Activated-Invoice Passed-Non PT", | 2532 | "APIPN": "Activated-Invoice Passed-Non PT", |
2533 | "APIPP": "Activated-Invoice Passed-PT Doc Required", | 2533 | "APIPP": "Activated-Invoice Passed-PT Doc Required", |
2534 | "APARD": "Activated-Review done", | 2534 | "APARD": "Activated-Review done", |
2535 | } | ||
2536 | |||
2537 | # Jira-4562 - 银行流水首页提取关键词 | ||
2538 | INCOME_KEYWORDS_LIST = ["养老金", "社保", "代发工资", "工资入账", "奖金", "养老保险", "代发", "工资"] | ||
2539 | INCOME_KEYWORDS_DICT = { | ||
2540 | "养老金": "yanglaojin", | ||
2541 | "社保": "shebao", | ||
2542 | "代发工资": "daifagongzi", | ||
2543 | "工资入账": "gongziruzhang", | ||
2544 | "奖金": "jiangjin", | ||
2545 | "养老保险": "yanglaobaoxian", | ||
2546 | "代发": "daifa", | ||
2547 | "工资": "gongzi" | ||
2535 | } | 2548 | } |
... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
... | @@ -177,7 +177,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -177,7 +177,7 @@ class Command(BaseCommand, LoggerMixin): |
177 | # raise EDMSException(edms_exc) | 177 | # raise EDMSException(edms_exc) |
178 | # self.online_log.info('{0} [edms download success] [pdf_path={1}]'.format(self.log_base, pdf_path)) | 178 | # self.online_log.info('{0} [edms download success] [pdf_path={1}]'.format(self.log_base, pdf_path)) |
179 | 179 | ||
180 | def bs_process(self, wb, ocr_data, bs_summary, unknown_summary, classify, res_list, pno, ino, part_idx): | 180 | def bs_process(self, wb, ocr_data, bs_summary, unknown_summary, classify, res_list, pno, ino, part_idx, income_keywords_dictionary): |
181 | sheets = ocr_data.get('data', []) | 181 | sheets = ocr_data.get('data', []) |
182 | if not sheets: | 182 | if not sheets: |
183 | res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_EMPTY)) | 183 | res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_EMPTY)) |
... | @@ -196,6 +196,10 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -196,6 +196,10 @@ class Command(BaseCommand, LoggerMixin): |
196 | c1 = cell.get('start_column') | 196 | c1 = cell.get('start_column') |
197 | r1 = cell.get('start_row') | 197 | r1 = cell.get('start_row') |
198 | words = cell.get('words') | 198 | words = cell.get('words') |
199 | if words is not None: | ||
200 | if words in consts.INCOME_KEYWORDS_LIST: | ||
201 | if consts.INCOME_KEYWORDS_DICT.get(words) not in income_keywords_dictionary["income_keywords"]: | ||
202 | income_keywords_dictionary["income_keywords"].append(consts.INCOME_KEYWORDS_DICT.setdefault(words, "")) | ||
199 | ws.cell(row=r1 + 1, column=c1 + 1, value=words) | 203 | ws.cell(row=r1 + 1, column=c1 + 1, value=words) |
200 | 204 | ||
201 | # 真伪 | 205 | # 真伪 |
... | @@ -921,7 +925,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -921,7 +925,7 @@ class Command(BaseCommand, LoggerMixin): |
921 | summary['role'] = self.get_most(summary['role']) | 925 | summary['role'] = self.get_most(summary['role']) |
922 | return bs_summary | 926 | return bs_summary |
923 | 927 | ||
924 | def rebuild_bs(self, bs_summary): | 928 | def rebuild_bs(self, bs_summary, income_keywords_dictionary): |
925 | # bs_summary = { | 929 | # bs_summary = { |
926 | # '卡号': { | 930 | # '卡号': { |
927 | # 'classify': 0, | 931 | # 'classify': 0, |
... | @@ -935,7 +939,24 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -935,7 +939,24 @@ class Command(BaseCommand, LoggerMixin): |
935 | # 'sheet': ['sheet_name'] | 939 | # 'sheet': ['sheet_name'] |
936 | # } | 940 | # } |
937 | # } | 941 | # } |
942 | |||
943 | # income_keywords_dictionary = { | ||
944 | # 'income_keywords': [ # 其中 0-8 个 | ||
945 | # 'yanglaojin', | ||
946 | # "shebao", | ||
947 | # "daifagongzi", | ||
948 | # "gongziruzhang", | ||
949 | # "jiangjin", | ||
950 | # "yanglaobaoxian", | ||
951 | # "daifa", | ||
952 | # "gongzi" | ||
953 | # ] | ||
954 | # } | ||
938 | res = [] | 955 | res = [] |
956 | income_keywords_list = income_keywords_dictionary.get('income_keywords', []) | ||
957 | income_filtered_keywords = [keyword_str for keyword_str in income_keywords_list if keyword_str] | ||
958 | income_keywords_str = ",".join(income_filtered_keywords) | ||
959 | |||
939 | for bs_info in bs_summary.values(): | 960 | for bs_info in bs_summary.values(): |
940 | try: | 961 | try: |
941 | print_date = bs_info.get('print_time', '').strftime("%Y-%m-%d") | 962 | print_date = bs_info.get('print_time', '').strftime("%Y-%m-%d") |
... | @@ -950,7 +971,8 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -950,7 +971,8 @@ class Command(BaseCommand, LoggerMixin): |
950 | 'print_time': print_date, | 971 | 'print_time': print_date, |
951 | 'timedelta': bs_info.get('timedelta', ''), | 972 | 'timedelta': bs_info.get('timedelta', ''), |
952 | 'verify': bs_info.get('verify_res_ebank', True), | 973 | 'verify': bs_info.get('verify_res_ebank', True), |
953 | 'e_bank': bs_info.get('e_bank', False) | 974 | 'e_bank': bs_info.get('e_bank', False), |
975 | 'income_keywords': income_keywords_str | ||
954 | } | 976 | } |
955 | ) | 977 | ) |
956 | return res | 978 | return res |
... | @@ -1724,6 +1746,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1724,6 +1746,7 @@ class Command(BaseCommand, LoggerMixin): |
1724 | license_summary = {} | 1746 | license_summary = {} |
1725 | contract_result = {} | 1747 | contract_result = {} |
1726 | contract_result_compare = {} | 1748 | contract_result_compare = {} |
1749 | income_keywords_dictionary = {"income_keywords": []} | ||
1727 | res_list = [] | 1750 | res_list = [] |
1728 | interest_keyword = Keywords.objects.filter( | 1751 | interest_keyword = Keywords.objects.filter( |
1729 | type=KeywordsType.INTEREST.value, on_off=True).values_list('keyword', flat=True) | 1752 | type=KeywordsType.INTEREST.value, on_off=True).values_list('keyword', flat=True) |
... | @@ -1879,7 +1902,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1879,7 +1902,7 @@ class Command(BaseCommand, LoggerMixin): |
1879 | ino, part_idx, img_path, contract_result_compare) | 1902 | ino, part_idx, img_path, contract_result_compare) |
1880 | else: # 流水处理 | 1903 | else: # 流水处理 |
1881 | bs_classify_set.add(classify) | 1904 | bs_classify_set.add(classify) |
1882 | self.bs_process(wb, ocr_data, bs_summary, unknown_summary, classify, res_list, pno, ino, part_idx) | 1905 | self.bs_process(wb, ocr_data, bs_summary, unknown_summary, classify, res_list, pno, ino, part_idx, income_keywords_dictionary) |
1883 | else: | 1906 | else: |
1884 | res_list.append((pno, ino, part_idx, consts.RES_FAILED_1)) | 1907 | res_list.append((pno, ino, part_idx, consts.RES_FAILED_1)) |
1885 | self.online_log.info('{0} [ocr_1 res error] [img={1}]'.format(self.log_base, img_path)) | 1908 | self.online_log.info('{0} [ocr_1 res error] [img={1}]'.format(self.log_base, img_path)) |
... | @@ -2035,7 +2058,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -2035,7 +2058,7 @@ class Command(BaseCommand, LoggerMixin): |
2035 | license_summary, contract_result_compare)) | 2058 | license_summary, contract_result_compare)) |
2036 | self.rebuild_contract(license_summary, contract_result_compare) | 2059 | self.rebuild_contract(license_summary, contract_result_compare) |
2037 | 2060 | ||
2038 | bs_rebuild = self.rebuild_bs(merged_bs_summary) | 2061 | bs_rebuild = self.rebuild_bs(merged_bs_summary, income_keywords_dictionary) |
2039 | if len(bs_rebuild) > 0: | 2062 | if len(bs_rebuild) > 0: |
2040 | license_summary[consts.BS_CLASSIFY] = bs_rebuild | 2063 | license_summary[consts.BS_CLASSIFY] = bs_rebuild |
2041 | 2064 | ... | ... |
-
Please register or sign in to post a comment