c40124d4 by 周伟奇

update excel header

1 parent e975baa4
...@@ -38,3 +38,56 @@ OVERAGE_COL_TITLE_SET = {"账户余额", "余额"} ...@@ -38,3 +38,56 @@ OVERAGE_COL_TITLE_SET = {"账户余额", "余额"}
38 PROOF_COL_TITLE = '核对结果' 38 PROOF_COL_TITLE = '核对结果'
39 PROOF_RES = ('对', '错') 39 PROOF_RES = ('对', '错')
40 META_SHEET_TITLE = '关键信息提取和展示' 40 META_SHEET_TITLE = '关键信息提取和展示'
41
42 FIXED_HEADERS = ('记账日期', '记账时间', '金额', '余额', '交易名称', '附言', '对方账户名', '对方卡号/账号', '对方开户行', '核对结果')
43 FIXED_COL_AMOUNT = len(FIXED_HEADERS)
44 BASE_HEADERS_MAPPING = {label: idx+1 for idx, label in enumerate(FIXED_HEADERS)}
45 HEADERS_MAPPING = {}
46 # 中国银行
47 HEADERS_MAPPING.update(
48 {
49 '记账日期': BASE_HEADERS_MAPPING['记账日期'],
50 '记账时间': BASE_HEADERS_MAPPING['记账时间'],
51 '金额': BASE_HEADERS_MAPPING['金额'],
52 '余额': BASE_HEADERS_MAPPING['余额'],
53 '交易名称': BASE_HEADERS_MAPPING['交易名称'],
54 '附言': BASE_HEADERS_MAPPING['附言'],
55 '对方账户名': BASE_HEADERS_MAPPING['对方账户名'],
56 '对方卡号/账号': BASE_HEADERS_MAPPING['对方卡号/账号'],
57 '对方开户行': BASE_HEADERS_MAPPING['对方开户行'],
58 }
59 )
60 # 竖版-表格-建设银行
61 HEADERS_MAPPING.update(
62 {
63 '交易日期': BASE_HEADERS_MAPPING['记账日期'],
64 '交易金额': BASE_HEADERS_MAPPING['金额'],
65 '账户余额': BASE_HEADERS_MAPPING['余额'],
66 '摘要': BASE_HEADERS_MAPPING['附言'],
67 '对方账号与户名': BASE_HEADERS_MAPPING['对方卡号/账号'],
68 }
69 )
70 # 横版-表格-农业银行
71 HEADERS_MAPPING.update(
72 {
73 '存入': BASE_HEADERS_MAPPING['金额'],
74 '对方账号': BASE_HEADERS_MAPPING['对方卡号/账号'],
75 '对方名称': BASE_HEADERS_MAPPING['对方账户名'],
76 }
77 )
78 # 横版-表格-工商银行
79 HEADERS_MAPPING.update(
80 {
81 '对方户名': BASE_HEADERS_MAPPING['对方账户名'],
82 '收入/支出金额': BASE_HEADERS_MAPPING['金额'],
83 '工作日期': BASE_HEADERS_MAPPING['记账日期'],
84 }
85 )
86 # 横版-表格-北京银行
87 HEADERS_MAPPING.update(
88 {
89 '业务摘要': BASE_HEADERS_MAPPING['附言'],
90 '发生额': BASE_HEADERS_MAPPING['金额'],
91 }
92 )
93
......
...@@ -83,9 +83,10 @@ class Command(BaseCommand, LoggerMixin): ...@@ -83,9 +83,10 @@ class Command(BaseCommand, LoggerMixin):
83 self.edms.download(pdf_path, doc.metadata_version_id) 83 self.edms.download(pdf_path, doc.metadata_version_id)
84 84
85 excel_path = os.path.join(doc_data_path, '{0}.xlsx'.format(doc.id)) 85 excel_path = os.path.join(doc_data_path, '{0}.xlsx'.format(doc.id))
86 src_excel_path = os.path.join(doc_data_path, 'src.xlsx')
86 self.cronjob_log.info('{0} [pdf download success] [business_type={1}] [doc_id={2}] [pdf_path={3}]'.format( 87 self.cronjob_log.info('{0} [pdf download success] [business_type={1}] [doc_id={2}] [pdf_path={3}]'.format(
87 self.log_base, business_type, doc.id, pdf_path)) 88 self.log_base, business_type, doc.id, pdf_path))
88 return doc_data_path, excel_path, pdf_path 89 return doc_data_path, excel_path, src_excel_path, pdf_path
89 90
90 @staticmethod 91 @staticmethod
91 def append_sheet(wb, sheets_list, img_name, role_summary): 92 def append_sheet(wb, sheets_list, img_name, role_summary):
...@@ -134,7 +135,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -134,7 +135,7 @@ class Command(BaseCommand, LoggerMixin):
134 doc, business_type = self.get_doc_info() 135 doc, business_type = self.get_doc_info()
135 try: 136 try:
136 # 2. 从EDMS获取PDF文件 137 # 2. 从EDMS获取PDF文件
137 doc_data_path, excel_path, pdf_path = self.pdf_download(doc, business_type) 138 doc_data_path, excel_path, src_excel_path, pdf_path = self.pdf_download(doc, business_type)
138 # 队列为空时的处理 139 # 队列为空时的处理
139 if pdf_path is None: 140 if pdf_path is None:
140 time.sleep(sleep_second) 141 time.sleep(sleep_second)
...@@ -167,6 +168,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -167,6 +168,7 @@ class Command(BaseCommand, LoggerMixin):
167 # loop.close() 168 # loop.close()
168 169
169 # 整合excel文件 170 # 整合excel文件
171 wb.save(src_excel_path)
170 wb.rebuild(role_summary) 172 wb.rebuild(role_summary)
171 wb.save(excel_path) 173 wb.save(excel_path)
172 except Exception as e: 174 except Exception as e:
......
...@@ -6,31 +6,13 @@ from pandas.core.indexes.datetimes import DatetimeIndex ...@@ -6,31 +6,13 @@ from pandas.core.indexes.datetimes import DatetimeIndex
6 from openpyxl import Workbook 6 from openpyxl import Workbook
7 from openpyxl.styles import Border, Side, PatternFill, numbers 7 from openpyxl.styles import Border, Side, PatternFill, numbers
8 from openpyxl.utils import get_column_letter 8 from openpyxl.utils import get_column_letter
9 from apps.doc import consts
9 10
10 11
11 class BSWorkbook(Workbook): 12 class BSWorkbook(Workbook):
12 13
13 def __init__(self, interest_keyword, salary_keyword, loan_keyword, *args, **kwargs): 14 def __init__(self, interest_keyword, salary_keyword, loan_keyword, *args, **kwargs):
14 super().__init__(*args, **kwargs) 15 super().__init__(*args, **kwargs)
15 self.fixed_headers = ('记账日期', '记账时间', '金额', '余额', '交易名称', '附言', '对方账户名',
16 '对方卡号/账号', '对方开户行', '核对结果')
17 self.fixed_col_amount = len(self.fixed_headers)
18 self.headers_mapping = {
19 '记账日期': 1,
20 '交易日期': 1,
21 '记账时间': 2,
22 '金额': 3,
23 '交易金额': 3,
24 '余额': 4,
25 '账户余额': 4,
26 '交易名称': 5,
27 '附言': 6,
28 '摘要': 6,
29 '对方账户名': 7,
30 '对方卡号/账号': 8,
31 '对方账号与户名': 8,
32 '对方开户行': 9,
33 }
34 self.meta_sheet_title = '关键信息提取和展示' 16 self.meta_sheet_title = '关键信息提取和展示'
35 self.blank_row = (None,) 17 self.blank_row = (None,)
36 self.code_header = ('页数', '电子回单验证码') 18 self.code_header = ('页数', '电子回单验证码')
...@@ -47,16 +29,16 @@ class BSWorkbook(Workbook): ...@@ -47,16 +29,16 @@ class BSWorkbook(Workbook):
47 self.MAX_MEAN = 31 29 self.MAX_MEAN = 31
48 30
49 def sheet_prune(self, ws): 31 def sheet_prune(self, ws):
50 ws.insert_cols(1, amount=self.fixed_col_amount) 32 ws.insert_cols(1, amount=consts.FIXED_COL_AMOUNT)
51 for col in range(self.fixed_col_amount + 1, ws.max_column + 1): 33 for col in range(consts.FIXED_COL_AMOUNT + 1, ws.max_column + 1):
52 header_value = ws.cell(1, col).value 34 header_value = ws.cell(1, col).value
53 header_idx = self.headers_mapping.get(header_value) 35 header_idx = consts.HEADERS_MAPPING.get(header_value)
54 # TODO 关键字段再次查找 36 # TODO 关键字段再次查找
55 if header_idx is None: 37 if header_idx is None:
56 continue 38 continue
57 letter = get_column_letter(col) 39 letter = get_column_letter(col)
58 ws.move_range("{0}1:{0}{1}".format(letter, ws.max_row), cols=header_idx - col) 40 ws.move_range("{0}1:{0}{1}".format(letter, ws.max_row), cols=header_idx - col)
59 ws.delete_cols(self.fixed_col_amount + 1, amount=ws.max_column) 41 ws.delete_cols(consts.FIXED_COL_AMOUNT + 1, amount=ws.max_column)
60 42
61 @staticmethod 43 @staticmethod
62 def month_split(dti, date_list): 44 def month_split(dti, date_list):
...@@ -151,7 +133,7 @@ class BSWorkbook(Workbook): ...@@ -151,7 +133,7 @@ class BSWorkbook(Workbook):
151 # 3.1.拷贝数据 133 # 3.1.拷贝数据
152 parts = month_mapping.get(month) 134 parts = month_mapping.get(month)
153 new_ws = self.create_sheet('{0}({1})'.format(month, role)) 135 new_ws = self.create_sheet('{0}({1})'.format(month, role))
154 new_ws.append(self.fixed_headers) 136 new_ws.append(consts.FIXED_HEADERS)
155 for part in parts: 137 for part in parts:
156 ws = self.get_sheet_by_name(part[0]) 138 ws = self.get_sheet_by_name(part[0])
157 for row in ws.iter_rows(min_row=part[1], max_row=part[2], values_only=True): 139 for row in ws.iter_rows(min_row=part[1], max_row=part[2], values_only=True):
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!