update excel header
Showing
3 changed files
with
63 additions
and
26 deletions
| ... | @@ -38,3 +38,56 @@ OVERAGE_COL_TITLE_SET = {"账户余额", "余额"} | ... | @@ -38,3 +38,56 @@ OVERAGE_COL_TITLE_SET = {"账户余额", "余额"} |
| 38 | PROOF_COL_TITLE = '核对结果' | 38 | PROOF_COL_TITLE = '核对结果' |
| 39 | PROOF_RES = ('对', '错') | 39 | PROOF_RES = ('对', '错') |
| 40 | META_SHEET_TITLE = '关键信息提取和展示' | 40 | META_SHEET_TITLE = '关键信息提取和展示' |
| 41 | |||
| 42 | FIXED_HEADERS = ('记账日期', '记账时间', '金额', '余额', '交易名称', '附言', '对方账户名', '对方卡号/账号', '对方开户行', '核对结果') | ||
| 43 | FIXED_COL_AMOUNT = len(FIXED_HEADERS) | ||
| 44 | BASE_HEADERS_MAPPING = {label: idx+1 for idx, label in enumerate(FIXED_HEADERS)} | ||
| 45 | HEADERS_MAPPING = {} | ||
| 46 | # 中国银行 | ||
| 47 | HEADERS_MAPPING.update( | ||
| 48 | { | ||
| 49 | '记账日期': BASE_HEADERS_MAPPING['记账日期'], | ||
| 50 | '记账时间': BASE_HEADERS_MAPPING['记账时间'], | ||
| 51 | '金额': BASE_HEADERS_MAPPING['金额'], | ||
| 52 | '余额': BASE_HEADERS_MAPPING['余额'], | ||
| 53 | '交易名称': BASE_HEADERS_MAPPING['交易名称'], | ||
| 54 | '附言': BASE_HEADERS_MAPPING['附言'], | ||
| 55 | '对方账户名': BASE_HEADERS_MAPPING['对方账户名'], | ||
| 56 | '对方卡号/账号': BASE_HEADERS_MAPPING['对方卡号/账号'], | ||
| 57 | '对方开户行': BASE_HEADERS_MAPPING['对方开户行'], | ||
| 58 | } | ||
| 59 | ) | ||
| 60 | # 竖版-表格-建设银行 | ||
| 61 | HEADERS_MAPPING.update( | ||
| 62 | { | ||
| 63 | '交易日期': BASE_HEADERS_MAPPING['记账日期'], | ||
| 64 | '交易金额': BASE_HEADERS_MAPPING['金额'], | ||
| 65 | '账户余额': BASE_HEADERS_MAPPING['余额'], | ||
| 66 | '摘要': BASE_HEADERS_MAPPING['附言'], | ||
| 67 | '对方账号与户名': BASE_HEADERS_MAPPING['对方卡号/账号'], | ||
| 68 | } | ||
| 69 | ) | ||
| 70 | # 横版-表格-农业银行 | ||
| 71 | HEADERS_MAPPING.update( | ||
| 72 | { | ||
| 73 | '存入': BASE_HEADERS_MAPPING['金额'], | ||
| 74 | '对方账号': BASE_HEADERS_MAPPING['对方卡号/账号'], | ||
| 75 | '对方名称': BASE_HEADERS_MAPPING['对方账户名'], | ||
| 76 | } | ||
| 77 | ) | ||
| 78 | # 横版-表格-工商银行 | ||
| 79 | HEADERS_MAPPING.update( | ||
| 80 | { | ||
| 81 | '对方户名': BASE_HEADERS_MAPPING['对方账户名'], | ||
| 82 | '收入/支出金额': BASE_HEADERS_MAPPING['金额'], | ||
| 83 | '工作日期': BASE_HEADERS_MAPPING['记账日期'], | ||
| 84 | } | ||
| 85 | ) | ||
| 86 | # 横版-表格-北京银行 | ||
| 87 | HEADERS_MAPPING.update( | ||
| 88 | { | ||
| 89 | '业务摘要': BASE_HEADERS_MAPPING['附言'], | ||
| 90 | '发生额': BASE_HEADERS_MAPPING['金额'], | ||
| 91 | } | ||
| 92 | ) | ||
| 93 | ... | ... |
| ... | @@ -83,9 +83,10 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -83,9 +83,10 @@ class Command(BaseCommand, LoggerMixin): |
| 83 | self.edms.download(pdf_path, doc.metadata_version_id) | 83 | self.edms.download(pdf_path, doc.metadata_version_id) |
| 84 | 84 | ||
| 85 | excel_path = os.path.join(doc_data_path, '{0}.xlsx'.format(doc.id)) | 85 | excel_path = os.path.join(doc_data_path, '{0}.xlsx'.format(doc.id)) |
| 86 | src_excel_path = os.path.join(doc_data_path, 'src.xlsx') | ||
| 86 | self.cronjob_log.info('{0} [pdf download success] [business_type={1}] [doc_id={2}] [pdf_path={3}]'.format( | 87 | self.cronjob_log.info('{0} [pdf download success] [business_type={1}] [doc_id={2}] [pdf_path={3}]'.format( |
| 87 | self.log_base, business_type, doc.id, pdf_path)) | 88 | self.log_base, business_type, doc.id, pdf_path)) |
| 88 | return doc_data_path, excel_path, pdf_path | 89 | return doc_data_path, excel_path, src_excel_path, pdf_path |
| 89 | 90 | ||
| 90 | @staticmethod | 91 | @staticmethod |
| 91 | def append_sheet(wb, sheets_list, img_name, role_summary): | 92 | def append_sheet(wb, sheets_list, img_name, role_summary): |
| ... | @@ -134,7 +135,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -134,7 +135,7 @@ class Command(BaseCommand, LoggerMixin): |
| 134 | doc, business_type = self.get_doc_info() | 135 | doc, business_type = self.get_doc_info() |
| 135 | try: | 136 | try: |
| 136 | # 2. 从EDMS获取PDF文件 | 137 | # 2. 从EDMS获取PDF文件 |
| 137 | doc_data_path, excel_path, pdf_path = self.pdf_download(doc, business_type) | 138 | doc_data_path, excel_path, src_excel_path, pdf_path = self.pdf_download(doc, business_type) |
| 138 | # 队列为空时的处理 | 139 | # 队列为空时的处理 |
| 139 | if pdf_path is None: | 140 | if pdf_path is None: |
| 140 | time.sleep(sleep_second) | 141 | time.sleep(sleep_second) |
| ... | @@ -167,6 +168,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -167,6 +168,7 @@ class Command(BaseCommand, LoggerMixin): |
| 167 | # loop.close() | 168 | # loop.close() |
| 168 | 169 | ||
| 169 | # 整合excel文件 | 170 | # 整合excel文件 |
| 171 | wb.save(src_excel_path) | ||
| 170 | wb.rebuild(role_summary) | 172 | wb.rebuild(role_summary) |
| 171 | wb.save(excel_path) | 173 | wb.save(excel_path) |
| 172 | except Exception as e: | 174 | except Exception as e: | ... | ... |
| ... | @@ -6,31 +6,13 @@ from pandas.core.indexes.datetimes import DatetimeIndex | ... | @@ -6,31 +6,13 @@ from pandas.core.indexes.datetimes import DatetimeIndex |
| 6 | from openpyxl import Workbook | 6 | from openpyxl import Workbook |
| 7 | from openpyxl.styles import Border, Side, PatternFill, numbers | 7 | from openpyxl.styles import Border, Side, PatternFill, numbers |
| 8 | from openpyxl.utils import get_column_letter | 8 | from openpyxl.utils import get_column_letter |
| 9 | from apps.doc import consts | ||
| 9 | 10 | ||
| 10 | 11 | ||
| 11 | class BSWorkbook(Workbook): | 12 | class BSWorkbook(Workbook): |
| 12 | 13 | ||
| 13 | def __init__(self, interest_keyword, salary_keyword, loan_keyword, *args, **kwargs): | 14 | def __init__(self, interest_keyword, salary_keyword, loan_keyword, *args, **kwargs): |
| 14 | super().__init__(*args, **kwargs) | 15 | super().__init__(*args, **kwargs) |
| 15 | self.fixed_headers = ('记账日期', '记账时间', '金额', '余额', '交易名称', '附言', '对方账户名', | ||
| 16 | '对方卡号/账号', '对方开户行', '核对结果') | ||
| 17 | self.fixed_col_amount = len(self.fixed_headers) | ||
| 18 | self.headers_mapping = { | ||
| 19 | '记账日期': 1, | ||
| 20 | '交易日期': 1, | ||
| 21 | '记账时间': 2, | ||
| 22 | '金额': 3, | ||
| 23 | '交易金额': 3, | ||
| 24 | '余额': 4, | ||
| 25 | '账户余额': 4, | ||
| 26 | '交易名称': 5, | ||
| 27 | '附言': 6, | ||
| 28 | '摘要': 6, | ||
| 29 | '对方账户名': 7, | ||
| 30 | '对方卡号/账号': 8, | ||
| 31 | '对方账号与户名': 8, | ||
| 32 | '对方开户行': 9, | ||
| 33 | } | ||
| 34 | self.meta_sheet_title = '关键信息提取和展示' | 16 | self.meta_sheet_title = '关键信息提取和展示' |
| 35 | self.blank_row = (None,) | 17 | self.blank_row = (None,) |
| 36 | self.code_header = ('页数', '电子回单验证码') | 18 | self.code_header = ('页数', '电子回单验证码') |
| ... | @@ -47,16 +29,16 @@ class BSWorkbook(Workbook): | ... | @@ -47,16 +29,16 @@ class BSWorkbook(Workbook): |
| 47 | self.MAX_MEAN = 31 | 29 | self.MAX_MEAN = 31 |
| 48 | 30 | ||
| 49 | def sheet_prune(self, ws): | 31 | def sheet_prune(self, ws): |
| 50 | ws.insert_cols(1, amount=self.fixed_col_amount) | 32 | ws.insert_cols(1, amount=consts.FIXED_COL_AMOUNT) |
| 51 | for col in range(self.fixed_col_amount + 1, ws.max_column + 1): | 33 | for col in range(consts.FIXED_COL_AMOUNT + 1, ws.max_column + 1): |
| 52 | header_value = ws.cell(1, col).value | 34 | header_value = ws.cell(1, col).value |
| 53 | header_idx = self.headers_mapping.get(header_value) | 35 | header_idx = consts.HEADERS_MAPPING.get(header_value) |
| 54 | # TODO 关键字段再次查找 | 36 | # TODO 关键字段再次查找 |
| 55 | if header_idx is None: | 37 | if header_idx is None: |
| 56 | continue | 38 | continue |
| 57 | letter = get_column_letter(col) | 39 | letter = get_column_letter(col) |
| 58 | ws.move_range("{0}1:{0}{1}".format(letter, ws.max_row), cols=header_idx - col) | 40 | ws.move_range("{0}1:{0}{1}".format(letter, ws.max_row), cols=header_idx - col) |
| 59 | ws.delete_cols(self.fixed_col_amount + 1, amount=ws.max_column) | 41 | ws.delete_cols(consts.FIXED_COL_AMOUNT + 1, amount=ws.max_column) |
| 60 | 42 | ||
| 61 | @staticmethod | 43 | @staticmethod |
| 62 | def month_split(dti, date_list): | 44 | def month_split(dti, date_list): |
| ... | @@ -151,7 +133,7 @@ class BSWorkbook(Workbook): | ... | @@ -151,7 +133,7 @@ class BSWorkbook(Workbook): |
| 151 | # 3.1.拷贝数据 | 133 | # 3.1.拷贝数据 |
| 152 | parts = month_mapping.get(month) | 134 | parts = month_mapping.get(month) |
| 153 | new_ws = self.create_sheet('{0}({1})'.format(month, role)) | 135 | new_ws = self.create_sheet('{0}({1})'.format(month, role)) |
| 154 | new_ws.append(self.fixed_headers) | 136 | new_ws.append(consts.FIXED_HEADERS) |
| 155 | for part in parts: | 137 | for part in parts: |
| 156 | ws = self.get_sheet_by_name(part[0]) | 138 | ws = self.get_sheet_by_name(part[0]) |
| 157 | for row in ws.iter_rows(min_row=part[1], max_row=part[2], values_only=True): | 139 | for row in ws.iter_rows(min_row=part[1], max_row=part[2], values_only=True): | ... | ... |
-
Please register or sign in to post a comment