From b331bf72257f709acf3ebb1185448ab05708fd99 Mon Sep 17 00:00:00 2001
From: chenyao <chenyao@situdata.com>
Date: Tue, 3 Dec 2024 19:12:16 +0800
Subject: [PATCH] 添加income_keywords的处理

---
 src/apps/doc/management/commands/ocr_process.py | 32 +++++++++++++++++++++++++++-----
 1 file changed, 27 insertions(+), 5 deletions(-)

diff --git a/src/apps/doc/management/commands/ocr_process.py b/src/apps/doc/management/commands/ocr_process.py
index 8416348..df8d0f8 100644
--- a/src/apps/doc/management/commands/ocr_process.py
+++ b/src/apps/doc/management/commands/ocr_process.py
@@ -177,7 +177,7 @@ class Command(BaseCommand, LoggerMixin):
     #             raise EDMSException(edms_exc)
     #     self.online_log.info('{0} [edms download success] [pdf_path={1}]'.format(self.log_base, pdf_path))
 
-    def bs_process(self, wb, ocr_data, bs_summary, unknown_summary, classify, res_list, pno, ino, part_idx):
+    def bs_process(self, wb, ocr_data, bs_summary, unknown_summary, classify, res_list, pno, ino, part_idx, income_keywords_dictionary):
         sheets = ocr_data.get('data', [])
         if not sheets:
             res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_EMPTY))
@@ -196,6 +196,10 @@ class Command(BaseCommand, LoggerMixin):
                 c1 = cell.get('start_column')
                 r1 = cell.get('start_row')
                 words = cell.get('words')
+                if words is not None:
+                    if words in consts.INCOME_KEYWORDS_LIST:
+                        if consts.INCOME_KEYWORDS_DICT.get(words) not in income_keywords_dictionary["income_keywords"]:
+                            income_keywords_dictionary["income_keywords"].append(consts.INCOME_KEYWORDS_DICT.setdefault(words, ""))
                 ws.cell(row=r1 + 1, column=c1 + 1, value=words)
 
             # 真伪
@@ -921,7 +925,7 @@ class Command(BaseCommand, LoggerMixin):
             summary['role'] = self.get_most(summary['role'])
         return bs_summary
 
-    def rebuild_bs(self, bs_summary):
+    def rebuild_bs(self, bs_summary, income_keywords_dictionary):
         # bs_summary = {
         #     '卡号': {
         #         'classify': 0,
@@ -935,7 +939,23 @@ class Command(BaseCommand, LoggerMixin):
         #         'sheet': ['sheet_name']
         #     }
         # }
+
+        # income_keywords_dictionary = {
+        #     'income_keywords': [ # 其中 0-8 个
+        #         'yanglaojin',
+        #         "shebao",
+        #         "daifagongzi",
+        #         "gongziruzhang",
+        #         "jiangjin",
+        #         "yanglaobaoxian",
+        #         "daifa",
+        #         "gongzi"
+        #     ]
+        # }
         res = []
+        income_keywords_list = income_keywords_dictionary.get('income_keywords', [])
+        income_filtered_keywords = [keyword_str for keyword_str in income_keywords_list if keyword_str]
+        income_keywords_str = ",".join(income_filtered_keywords)
         for bs_info in bs_summary.values():
             try:
                 print_date = bs_info.get('print_time', '').strftime("%Y-%m-%d")
@@ -951,7 +971,8 @@ class Command(BaseCommand, LoggerMixin):
                     'print_time': print_date,
                     'timedelta': bs_info.get('timedelta', ''),
                     'verify': bs_info.get('verify_res_ebank', True),
-                    'e_bank': bs_info.get('e_bank', False)
+                    'e_bank': bs_info.get('e_bank', False),
+                    'income_keywords': income_keywords_str
                 }
             )
         return res
@@ -1742,6 +1763,7 @@ class Command(BaseCommand, LoggerMixin):
                         license_summary = {}
                         contract_result = {}
                         contract_result_compare = {}
+                        income_keywords_dictionary = {"income_keywords": []}
                         # 添加财报三个报表的处理
                         financial_statement_dict = {}
                         # 添加财报情况说明的处理
@@ -1939,7 +1961,7 @@ class Command(BaseCommand, LoggerMixin):
                                                                   ino, part_idx, img_path, contract_result_compare)
                                         else:  # 流水处理
                                             bs_classify_set.add(classify)
-                                            self.bs_process(wb, ocr_data, bs_summary, unknown_summary, classify, res_list, pno, ino, part_idx)
+                                            self.bs_process(wb, ocr_data, bs_summary, unknown_summary, classify, res_list, pno, ino, part_idx, income_keywords_dictionary)
                             else:
                                 res_list.append((pno, ino, part_idx, consts.RES_FAILED_1))
                                 self.online_log.info('{0} [ocr_1 res error] [img={1}]'.format(self.log_base, img_path))
@@ -2095,7 +2117,7 @@ class Command(BaseCommand, LoggerMixin):
                                                                      license_summary, contract_result_compare))
                             self.rebuild_contract(license_summary, contract_result_compare)
 
-                            bs_rebuild = self.rebuild_bs(merged_bs_summary)
+                            bs_rebuild = self.rebuild_bs(merged_bs_summary, income_keywords_dictionary)
                             if len(bs_rebuild) > 0:
                                 license_summary[consts.BS_CLASSIFY] = bs_rebuild
 
--
libgit2 0.24.0