添加income_keywords的处理
Showing
1 changed file
with
27 additions
and
5 deletions
| ... | @@ -177,7 +177,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -177,7 +177,7 @@ class Command(BaseCommand, LoggerMixin): |
| 177 | # raise EDMSException(edms_exc) | 177 | # raise EDMSException(edms_exc) |
| 178 | # self.online_log.info('{0} [edms download success] [pdf_path={1}]'.format(self.log_base, pdf_path)) | 178 | # self.online_log.info('{0} [edms download success] [pdf_path={1}]'.format(self.log_base, pdf_path)) |
| 179 | 179 | ||
| 180 | def bs_process(self, wb, ocr_data, bs_summary, unknown_summary, classify, res_list, pno, ino, part_idx): | 180 | def bs_process(self, wb, ocr_data, bs_summary, unknown_summary, classify, res_list, pno, ino, part_idx, income_keywords_dictionary): |
| 181 | sheets = ocr_data.get('data', []) | 181 | sheets = ocr_data.get('data', []) |
| 182 | if not sheets: | 182 | if not sheets: |
| 183 | res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_EMPTY)) | 183 | res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_EMPTY)) |
| ... | @@ -196,6 +196,10 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -196,6 +196,10 @@ class Command(BaseCommand, LoggerMixin): |
| 196 | c1 = cell.get('start_column') | 196 | c1 = cell.get('start_column') |
| 197 | r1 = cell.get('start_row') | 197 | r1 = cell.get('start_row') |
| 198 | words = cell.get('words') | 198 | words = cell.get('words') |
| 199 | if words is not None: | ||
| 200 | if words in consts.INCOME_KEYWORDS_LIST: | ||
| 201 | if consts.INCOME_KEYWORDS_DICT.get(words) not in income_keywords_dictionary["income_keywords"]: | ||
| 202 | income_keywords_dictionary["income_keywords"].append(consts.INCOME_KEYWORDS_DICT.setdefault(words, "")) | ||
| 199 | ws.cell(row=r1 + 1, column=c1 + 1, value=words) | 203 | ws.cell(row=r1 + 1, column=c1 + 1, value=words) |
| 200 | 204 | ||
| 201 | # 真伪 | 205 | # 真伪 |
| ... | @@ -921,7 +925,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -921,7 +925,7 @@ class Command(BaseCommand, LoggerMixin): |
| 921 | summary['role'] = self.get_most(summary['role']) | 925 | summary['role'] = self.get_most(summary['role']) |
| 922 | return bs_summary | 926 | return bs_summary |
| 923 | 927 | ||
| 924 | def rebuild_bs(self, bs_summary): | 928 | def rebuild_bs(self, bs_summary, income_keywords_dictionary): |
| 925 | # bs_summary = { | 929 | # bs_summary = { |
| 926 | # '卡号': { | 930 | # '卡号': { |
| 927 | # 'classify': 0, | 931 | # 'classify': 0, |
| ... | @@ -935,7 +939,23 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -935,7 +939,23 @@ class Command(BaseCommand, LoggerMixin): |
| 935 | # 'sheet': ['sheet_name'] | 939 | # 'sheet': ['sheet_name'] |
| 936 | # } | 940 | # } |
| 937 | # } | 941 | # } |
| 942 | |||
| 943 | # income_keywords_dictionary = { | ||
| 944 | # 'income_keywords': [ # 其中 0-8 个 | ||
| 945 | # 'yanglaojin', | ||
| 946 | # "shebao", | ||
| 947 | # "daifagongzi", | ||
| 948 | # "gongziruzhang", | ||
| 949 | # "jiangjin", | ||
| 950 | # "yanglaobaoxian", | ||
| 951 | # "daifa", | ||
| 952 | # "gongzi" | ||
| 953 | # ] | ||
| 954 | # } | ||
| 938 | res = [] | 955 | res = [] |
| 956 | income_keywords_list = income_keywords_dictionary.get('income_keywords', []) | ||
| 957 | income_filtered_keywords = [keyword_str for keyword_str in income_keywords_list if keyword_str] | ||
| 958 | income_keywords_str = ",".join(income_filtered_keywords) | ||
| 939 | for bs_info in bs_summary.values(): | 959 | for bs_info in bs_summary.values(): |
| 940 | try: | 960 | try: |
| 941 | print_date = bs_info.get('print_time', '').strftime("%Y-%m-%d") | 961 | print_date = bs_info.get('print_time', '').strftime("%Y-%m-%d") |
| ... | @@ -951,7 +971,8 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -951,7 +971,8 @@ class Command(BaseCommand, LoggerMixin): |
| 951 | 'print_time': print_date, | 971 | 'print_time': print_date, |
| 952 | 'timedelta': bs_info.get('timedelta', ''), | 972 | 'timedelta': bs_info.get('timedelta', ''), |
| 953 | 'verify': bs_info.get('verify_res_ebank', True), | 973 | 'verify': bs_info.get('verify_res_ebank', True), |
| 954 | 'e_bank': bs_info.get('e_bank', False) | 974 | 'e_bank': bs_info.get('e_bank', False), |
| 975 | 'income_keywords': income_keywords_str | ||
| 955 | } | 976 | } |
| 956 | ) | 977 | ) |
| 957 | return res | 978 | return res |
| ... | @@ -1742,6 +1763,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1742,6 +1763,7 @@ class Command(BaseCommand, LoggerMixin): |
| 1742 | license_summary = {} | 1763 | license_summary = {} |
| 1743 | contract_result = {} | 1764 | contract_result = {} |
| 1744 | contract_result_compare = {} | 1765 | contract_result_compare = {} |
| 1766 | income_keywords_dictionary = {"income_keywords": []} | ||
| 1745 | # 添加财报三个报表的处理 | 1767 | # 添加财报三个报表的处理 |
| 1746 | financial_statement_dict = {} | 1768 | financial_statement_dict = {} |
| 1747 | # 添加财报情况说明的处理 | 1769 | # 添加财报情况说明的处理 |
| ... | @@ -1939,7 +1961,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1939,7 +1961,7 @@ class Command(BaseCommand, LoggerMixin): |
| 1939 | ino, part_idx, img_path, contract_result_compare) | 1961 | ino, part_idx, img_path, contract_result_compare) |
| 1940 | else: # 流水处理 | 1962 | else: # 流水处理 |
| 1941 | bs_classify_set.add(classify) | 1963 | bs_classify_set.add(classify) |
| 1942 | self.bs_process(wb, ocr_data, bs_summary, unknown_summary, classify, res_list, pno, ino, part_idx) | 1964 | self.bs_process(wb, ocr_data, bs_summary, unknown_summary, classify, res_list, pno, ino, part_idx, income_keywords_dictionary) |
| 1943 | else: | 1965 | else: |
| 1944 | res_list.append((pno, ino, part_idx, consts.RES_FAILED_1)) | 1966 | res_list.append((pno, ino, part_idx, consts.RES_FAILED_1)) |
| 1945 | self.online_log.info('{0} [ocr_1 res error] [img={1}]'.format(self.log_base, img_path)) | 1967 | self.online_log.info('{0} [ocr_1 res error] [img={1}]'.format(self.log_base, img_path)) |
| ... | @@ -2095,7 +2117,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -2095,7 +2117,7 @@ class Command(BaseCommand, LoggerMixin): |
| 2095 | license_summary, contract_result_compare)) | 2117 | license_summary, contract_result_compare)) |
| 2096 | self.rebuild_contract(license_summary, contract_result_compare) | 2118 | self.rebuild_contract(license_summary, contract_result_compare) |
| 2097 | 2119 | ||
| 2098 | bs_rebuild = self.rebuild_bs(merged_bs_summary) | 2120 | bs_rebuild = self.rebuild_bs(merged_bs_summary, income_keywords_dictionary) |
| 2099 | if len(bs_rebuild) > 0: | 2121 | if len(bs_rebuild) > 0: |
| 2100 | license_summary[consts.BS_CLASSIFY] = bs_rebuild | 2122 | license_summary[consts.BS_CLASSIFY] = bs_rebuild |
| 2101 | 2123 | ... | ... |
-
Please register or sign in to post a comment