update excel header
Showing
3 changed files
with
63 additions
and
26 deletions
... | @@ -38,3 +38,56 @@ OVERAGE_COL_TITLE_SET = {"账户余额", "余额"} | ... | @@ -38,3 +38,56 @@ OVERAGE_COL_TITLE_SET = {"账户余额", "余额"} |
38 | PROOF_COL_TITLE = '核对结果' | 38 | PROOF_COL_TITLE = '核对结果' |
39 | PROOF_RES = ('对', '错') | 39 | PROOF_RES = ('对', '错') |
40 | META_SHEET_TITLE = '关键信息提取和展示' | 40 | META_SHEET_TITLE = '关键信息提取和展示' |
41 | |||
42 | FIXED_HEADERS = ('记账日期', '记账时间', '金额', '余额', '交易名称', '附言', '对方账户名', '对方卡号/账号', '对方开户行', '核对结果') | ||
43 | FIXED_COL_AMOUNT = len(FIXED_HEADERS) | ||
44 | BASE_HEADERS_MAPPING = {label: idx+1 for idx, label in enumerate(FIXED_HEADERS)} | ||
45 | HEADERS_MAPPING = {} | ||
46 | # 中国银行 | ||
47 | HEADERS_MAPPING.update( | ||
48 | { | ||
49 | '记账日期': BASE_HEADERS_MAPPING['记账日期'], | ||
50 | '记账时间': BASE_HEADERS_MAPPING['记账时间'], | ||
51 | '金额': BASE_HEADERS_MAPPING['金额'], | ||
52 | '余额': BASE_HEADERS_MAPPING['余额'], | ||
53 | '交易名称': BASE_HEADERS_MAPPING['交易名称'], | ||
54 | '附言': BASE_HEADERS_MAPPING['附言'], | ||
55 | '对方账户名': BASE_HEADERS_MAPPING['对方账户名'], | ||
56 | '对方卡号/账号': BASE_HEADERS_MAPPING['对方卡号/账号'], | ||
57 | '对方开户行': BASE_HEADERS_MAPPING['对方开户行'], | ||
58 | } | ||
59 | ) | ||
60 | # 竖版-表格-建设银行 | ||
61 | HEADERS_MAPPING.update( | ||
62 | { | ||
63 | '交易日期': BASE_HEADERS_MAPPING['记账日期'], | ||
64 | '交易金额': BASE_HEADERS_MAPPING['金额'], | ||
65 | '账户余额': BASE_HEADERS_MAPPING['余额'], | ||
66 | '摘要': BASE_HEADERS_MAPPING['附言'], | ||
67 | '对方账号与户名': BASE_HEADERS_MAPPING['对方卡号/账号'], | ||
68 | } | ||
69 | ) | ||
70 | # 横版-表格-农业银行 | ||
71 | HEADERS_MAPPING.update( | ||
72 | { | ||
73 | '存入': BASE_HEADERS_MAPPING['金额'], | ||
74 | '对方账号': BASE_HEADERS_MAPPING['对方卡号/账号'], | ||
75 | '对方名称': BASE_HEADERS_MAPPING['对方账户名'], | ||
76 | } | ||
77 | ) | ||
78 | # 横版-表格-工商银行 | ||
79 | HEADERS_MAPPING.update( | ||
80 | { | ||
81 | '对方户名': BASE_HEADERS_MAPPING['对方账户名'], | ||
82 | '收入/支出金额': BASE_HEADERS_MAPPING['金额'], | ||
83 | '工作日期': BASE_HEADERS_MAPPING['记账日期'], | ||
84 | } | ||
85 | ) | ||
86 | # 横版-表格-北京银行 | ||
87 | HEADERS_MAPPING.update( | ||
88 | { | ||
89 | '业务摘要': BASE_HEADERS_MAPPING['附言'], | ||
90 | '发生额': BASE_HEADERS_MAPPING['金额'], | ||
91 | } | ||
92 | ) | ||
93 | ... | ... |
... | @@ -83,9 +83,10 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -83,9 +83,10 @@ class Command(BaseCommand, LoggerMixin): |
83 | self.edms.download(pdf_path, doc.metadata_version_id) | 83 | self.edms.download(pdf_path, doc.metadata_version_id) |
84 | 84 | ||
85 | excel_path = os.path.join(doc_data_path, '{0}.xlsx'.format(doc.id)) | 85 | excel_path = os.path.join(doc_data_path, '{0}.xlsx'.format(doc.id)) |
86 | src_excel_path = os.path.join(doc_data_path, 'src.xlsx') | ||
86 | self.cronjob_log.info('{0} [pdf download success] [business_type={1}] [doc_id={2}] [pdf_path={3}]'.format( | 87 | self.cronjob_log.info('{0} [pdf download success] [business_type={1}] [doc_id={2}] [pdf_path={3}]'.format( |
87 | self.log_base, business_type, doc.id, pdf_path)) | 88 | self.log_base, business_type, doc.id, pdf_path)) |
88 | return doc_data_path, excel_path, pdf_path | 89 | return doc_data_path, excel_path, src_excel_path, pdf_path |
89 | 90 | ||
90 | @staticmethod | 91 | @staticmethod |
91 | def append_sheet(wb, sheets_list, img_name, role_summary): | 92 | def append_sheet(wb, sheets_list, img_name, role_summary): |
... | @@ -134,7 +135,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -134,7 +135,7 @@ class Command(BaseCommand, LoggerMixin): |
134 | doc, business_type = self.get_doc_info() | 135 | doc, business_type = self.get_doc_info() |
135 | try: | 136 | try: |
136 | # 2. 从EDMS获取PDF文件 | 137 | # 2. 从EDMS获取PDF文件 |
137 | doc_data_path, excel_path, pdf_path = self.pdf_download(doc, business_type) | 138 | doc_data_path, excel_path, src_excel_path, pdf_path = self.pdf_download(doc, business_type) |
138 | # 队列为空时的处理 | 139 | # 队列为空时的处理 |
139 | if pdf_path is None: | 140 | if pdf_path is None: |
140 | time.sleep(sleep_second) | 141 | time.sleep(sleep_second) |
... | @@ -167,6 +168,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -167,6 +168,7 @@ class Command(BaseCommand, LoggerMixin): |
167 | # loop.close() | 168 | # loop.close() |
168 | 169 | ||
169 | # 整合excel文件 | 170 | # 整合excel文件 |
171 | wb.save(src_excel_path) | ||
170 | wb.rebuild(role_summary) | 172 | wb.rebuild(role_summary) |
171 | wb.save(excel_path) | 173 | wb.save(excel_path) |
172 | except Exception as e: | 174 | except Exception as e: | ... | ... |
... | @@ -6,31 +6,13 @@ from pandas.core.indexes.datetimes import DatetimeIndex | ... | @@ -6,31 +6,13 @@ from pandas.core.indexes.datetimes import DatetimeIndex |
6 | from openpyxl import Workbook | 6 | from openpyxl import Workbook |
7 | from openpyxl.styles import Border, Side, PatternFill, numbers | 7 | from openpyxl.styles import Border, Side, PatternFill, numbers |
8 | from openpyxl.utils import get_column_letter | 8 | from openpyxl.utils import get_column_letter |
9 | from apps.doc import consts | ||
9 | 10 | ||
10 | 11 | ||
11 | class BSWorkbook(Workbook): | 12 | class BSWorkbook(Workbook): |
12 | 13 | ||
13 | def __init__(self, interest_keyword, salary_keyword, loan_keyword, *args, **kwargs): | 14 | def __init__(self, interest_keyword, salary_keyword, loan_keyword, *args, **kwargs): |
14 | super().__init__(*args, **kwargs) | 15 | super().__init__(*args, **kwargs) |
15 | self.fixed_headers = ('记账日期', '记账时间', '金额', '余额', '交易名称', '附言', '对方账户名', | ||
16 | '对方卡号/账号', '对方开户行', '核对结果') | ||
17 | self.fixed_col_amount = len(self.fixed_headers) | ||
18 | self.headers_mapping = { | ||
19 | '记账日期': 1, | ||
20 | '交易日期': 1, | ||
21 | '记账时间': 2, | ||
22 | '金额': 3, | ||
23 | '交易金额': 3, | ||
24 | '余额': 4, | ||
25 | '账户余额': 4, | ||
26 | '交易名称': 5, | ||
27 | '附言': 6, | ||
28 | '摘要': 6, | ||
29 | '对方账户名': 7, | ||
30 | '对方卡号/账号': 8, | ||
31 | '对方账号与户名': 8, | ||
32 | '对方开户行': 9, | ||
33 | } | ||
34 | self.meta_sheet_title = '关键信息提取和展示' | 16 | self.meta_sheet_title = '关键信息提取和展示' |
35 | self.blank_row = (None,) | 17 | self.blank_row = (None,) |
36 | self.code_header = ('页数', '电子回单验证码') | 18 | self.code_header = ('页数', '电子回单验证码') |
... | @@ -47,16 +29,16 @@ class BSWorkbook(Workbook): | ... | @@ -47,16 +29,16 @@ class BSWorkbook(Workbook): |
47 | self.MAX_MEAN = 31 | 29 | self.MAX_MEAN = 31 |
48 | 30 | ||
49 | def sheet_prune(self, ws): | 31 | def sheet_prune(self, ws): |
50 | ws.insert_cols(1, amount=self.fixed_col_amount) | 32 | ws.insert_cols(1, amount=consts.FIXED_COL_AMOUNT) |
51 | for col in range(self.fixed_col_amount + 1, ws.max_column + 1): | 33 | for col in range(consts.FIXED_COL_AMOUNT + 1, ws.max_column + 1): |
52 | header_value = ws.cell(1, col).value | 34 | header_value = ws.cell(1, col).value |
53 | header_idx = self.headers_mapping.get(header_value) | 35 | header_idx = consts.HEADERS_MAPPING.get(header_value) |
54 | # TODO 关键字段再次查找 | 36 | # TODO 关键字段再次查找 |
55 | if header_idx is None: | 37 | if header_idx is None: |
56 | continue | 38 | continue |
57 | letter = get_column_letter(col) | 39 | letter = get_column_letter(col) |
58 | ws.move_range("{0}1:{0}{1}".format(letter, ws.max_row), cols=header_idx - col) | 40 | ws.move_range("{0}1:{0}{1}".format(letter, ws.max_row), cols=header_idx - col) |
59 | ws.delete_cols(self.fixed_col_amount + 1, amount=ws.max_column) | 41 | ws.delete_cols(consts.FIXED_COL_AMOUNT + 1, amount=ws.max_column) |
60 | 42 | ||
61 | @staticmethod | 43 | @staticmethod |
62 | def month_split(dti, date_list): | 44 | def month_split(dti, date_list): |
... | @@ -151,7 +133,7 @@ class BSWorkbook(Workbook): | ... | @@ -151,7 +133,7 @@ class BSWorkbook(Workbook): |
151 | # 3.1.拷贝数据 | 133 | # 3.1.拷贝数据 |
152 | parts = month_mapping.get(month) | 134 | parts = month_mapping.get(month) |
153 | new_ws = self.create_sheet('{0}({1})'.format(month, role)) | 135 | new_ws = self.create_sheet('{0}({1})'.format(month, role)) |
154 | new_ws.append(self.fixed_headers) | 136 | new_ws.append(consts.FIXED_HEADERS) |
155 | for part in parts: | 137 | for part in parts: |
156 | ws = self.get_sheet_by_name(part[0]) | 138 | ws = self.get_sheet_by_name(part[0]) |
157 | for row in ws.iter_rows(min_row=part[1], max_row=part[2], values_only=True): | 139 | for row in ws.iter_rows(min_row=part[1], max_row=part[2], values_only=True): | ... | ... |
-
Please register or sign in to post a comment