update excel header

周伟奇
Showing 3 changed files with 63 additions and 26 deletions
src/apps/doc/consts.py
src/apps/doc/management/commands/doc_ocr_process.py
src/apps/doc/ocr/wb.py
--- a/src/apps/doc/consts.py
View file @c40124d
+++ b/src/apps/doc/consts.py
View file @c40124d
@@ -38,3 +38,56 @@ OVERAGE_COL_TITLE_SET = {"账户余额", "余额"}
 PROOF_COL_TITLE = '核对结果'
 PROOF_RES = ('对', '错')
 META_SHEET_TITLE = '关键信息提取和展示'
+FIXED_HEADERS = ('记账日期', '记账时间', '金额', '余额', '交易名称', '附言', '对方账户名', '对方卡号/账号', '对方开户行', '核对结果')
+FIXED_COL_AMOUNT = len(FIXED_HEADERS)
+BASE_HEADERS_MAPPING = {label: idx+1 for idx, label in enumerate(FIXED_HEADERS)}
+HEADERS_MAPPING = {}
+# 中国银行
+HEADERS_MAPPING.update(
+    {
+        '记账日期': BASE_HEADERS_MAPPING['记账日期'],
+        '记账时间': BASE_HEADERS_MAPPING['记账时间'],
+        '金额': BASE_HEADERS_MAPPING['金额'],
+        '余额': BASE_HEADERS_MAPPING['余额'],
+        '交易名称': BASE_HEADERS_MAPPING['交易名称'],
+        '附言': BASE_HEADERS_MAPPING['附言'],
+        '对方账户名': BASE_HEADERS_MAPPING['对方账户名'],
+        '对方卡号/账号': BASE_HEADERS_MAPPING['对方卡号/账号'],
+        '对方开户行': BASE_HEADERS_MAPPING['对方开户行'],
+    }
+)
+# 竖版-表格-建设银行
+HEADERS_MAPPING.update(
+    {
+        '交易日期': BASE_HEADERS_MAPPING['记账日期'],
+        '交易金额': BASE_HEADERS_MAPPING['金额'],
+        '账户余额': BASE_HEADERS_MAPPING['余额'],
+        '摘要': BASE_HEADERS_MAPPING['附言'],
+        '对方账号与户名': BASE_HEADERS_MAPPING['对方卡号/账号'],
+    }
+)
+# 横版-表格-农业银行
+HEADERS_MAPPING.update(
+    {
+        '存入': BASE_HEADERS_MAPPING['金额'],
+        '对方账号': BASE_HEADERS_MAPPING['对方卡号/账号'],
+        '对方名称': BASE_HEADERS_MAPPING['对方账户名'],
+    }
+)
+# 横版-表格-工商银行
+HEADERS_MAPPING.update(
+    {
+        '对方户名': BASE_HEADERS_MAPPING['对方账户名'],
+        '收入/支出金额': BASE_HEADERS_MAPPING['金额'],
+        '工作日期': BASE_HEADERS_MAPPING['记账日期'],
+    }
+)
+# 横版-表格-北京银行
+HEADERS_MAPPING.update(
+    {
+        '业务摘要': BASE_HEADERS_MAPPING['附言'],
+        '发生额': BASE_HEADERS_MAPPING['金额'],
+    }
+)
--- a/src/apps/doc/management/commands/doc_ocr_process.py
View file @c40124d
+++ b/src/apps/doc/management/commands/doc_ocr_process.py
View file @c40124d
@@ -83,9 +83,10 @@ class Command(BaseCommand, LoggerMixin):
            self.edms.download(pdf_path, doc.metadata_version_id)
        excel_path = os.path.join(doc_data_path, '{0}.xlsx'.format(doc.id))
+        src_excel_path = os.path.join(doc_data_path, 'src.xlsx')
        self.cronjob_log.info('{0} [pdf download success] [business_type={1}] [doc_id={2}] [pdf_path={3}]'.format(
            self.log_base, business_type, doc.id, pdf_path))
-        return doc_data_path, excel_path, pdf_path
+        return doc_data_path, excel_path, src_excel_path, pdf_path
    @staticmethod
    def append_sheet(wb, sheets_list, img_name, role_summary):
@@ -134,7 +135,7 @@ class Command(BaseCommand, LoggerMixin):
            doc, business_type = self.get_doc_info()
            try:
                # 2. 从EDMS获取PDF文件
-                doc_data_path, excel_path, pdf_path = self.pdf_download(doc, business_type)
+                doc_data_path, excel_path, src_excel_path, pdf_path = self.pdf_download(doc, business_type)
                # 队列为空时的处理
                if pdf_path is None:
                    time.sleep(sleep_second)
@@ -167,6 +168,7 @@ class Command(BaseCommand, LoggerMixin):
                # loop.close()
                # 整合excel文件
+                wb.save(src_excel_path)
                wb.rebuild(role_summary)
                wb.save(excel_path)
            except Exception as e:
--- a/src/apps/doc/ocr/wb.py
View file @c40124d
+++ b/src/apps/doc/ocr/wb.py
View file @c40124d
@@ -6,31 +6,13 @@ from pandas.core.indexes.datetimes import DatetimeIndex
 from openpyxl import Workbook
 from openpyxl.styles import Border, Side, PatternFill, numbers
 from openpyxl.utils import get_column_letter
+from apps.doc import consts
 class BSWorkbook(Workbook):
    def __init__(self, interest_keyword, salary_keyword, loan_keyword, *args, **kwargs):
        super().__init__(*args, **kwargs)
-        self.fixed_headers = ('记账日期', '记账时间', '金额', '余额', '交易名称', '附言', '对方账户名',
-                              '对方卡号/账号', '对方开户行', '核对结果')
-        self.fixed_col_amount = len(self.fixed_headers)
-        self.headers_mapping = {
-            '记账日期': 1,
-            '交易日期': 1,
-            '记账时间': 2,
-            '金额': 3,
-            '交易金额': 3,
-            '余额': 4,
-            '账户余额': 4,
-            '交易名称': 5,
-            '附言': 6,
-            '摘要': 6,
-            '对方账户名': 7,
-            '对方卡号/账号': 8,
-            '对方账号与户名': 8,
-            '对方开户行': 9,
-        }
        self.meta_sheet_title = '关键信息提取和展示'
        self.blank_row = (None,)
        self.code_header = ('页数', '电子回单验证码')
@@ -47,16 +29,16 @@ class BSWorkbook(Workbook):
        self.MAX_MEAN = 31
    def sheet_prune(self, ws):
-        ws.insert_cols(1, amount=self.fixed_col_amount)
+        ws.insert_cols(1, amount=consts.FIXED_COL_AMOUNT)
-        for col in range(self.fixed_col_amount + 1, ws.max_column + 1):
+        for col in range(consts.FIXED_COL_AMOUNT + 1, ws.max_column + 1):
            header_value = ws.cell(1, col).value
-            header_idx = self.headers_mapping.get(header_value)
+            header_idx = consts.HEADERS_MAPPING.get(header_value)
            # TODO 关键字段再次查找
            if header_idx is None:
                continue
            letter = get_column_letter(col)
            ws.move_range("{0}1:{0}{1}".format(letter, ws.max_row), cols=header_idx - col)
-        ws.delete_cols(self.fixed_col_amount + 1, amount=ws.max_column)
+        ws.delete_cols(consts.FIXED_COL_AMOUNT + 1, amount=ws.max_column)
    @staticmethod
    def month_split(dti, date_list):
@@ -151,7 +133,7 @@ class BSWorkbook(Workbook):
            # 3.1.拷贝数据
            parts = month_mapping.get(month)
            new_ws = self.create_sheet('{0}({1})'.format(month, role))
-            new_ws.append(self.fixed_headers)
+            new_ws.append(consts.FIXED_HEADERS)
            for part in parts:
                ws = self.get_sheet_by_name(part[0])
                for row in ws.iter_rows(min_row=part[1], max_row=part[2], values_only=True):