c8a54f58 by 周伟奇

Merge branch 'feature/license' into feature/mssql

2 parents 540f8777 56281e38
......@@ -34,4 +34,5 @@ data/*
src/*.sh
test*
ocr_test.py
\ No newline at end of file
ocr_test.py
ocr_process.py
\ No newline at end of file
......
import copy
PAGE_DEFAULT = 1
PAGE_SIZE_DEFAULT = 10
......@@ -53,15 +55,19 @@ TRANS_MAP = {
'L': "1",
'A': "4",
's': "5",
'S': "5",
'b': "6",
'g': "9",
'E': "9",
'B': "13",
}
TRANS = str.maketrans(TRANS_MAP)
ERROR_CHARS = {'.', ':', ':', '•', '}
ERROR_CHARS = {'.', '。', ':', ':', '•', '·', ',', ','}
SKIP_IMG_SHEET_NAME = '未处理图片'
SKIP_IMG_SHEET_HEADER = ('页码', '序号')
......@@ -70,12 +76,34 @@ UNKNOWN_CARD = '未知卡号'
UNKNOWN_ROLE = '未知户名'
DATE_FORMAT = ['%Y年%m月%d日', '%Y/%m/%d', '%Y-%m-%d', '%Y%m%d']
AMOUNT_COL_TITLE_SET = {"交易金额", "金额", "收入/支出金额", "发生额"}
OVERAGE_COL_TITLE_SET = {"账户余额", "余额"}
PROOF_COL_TITLE = '核对结果'
PROOF_RES = ('对', '错')
META_SHEET_TITLE = '关键信息提取和展示'
SUMMARY_KEY = 'summary_col'
DATE_KEY = 'date_col'
AMOUNT_KEY = 'amount_col'
OVER_KEY = 'over_col'
IMCOME_KEY = 'income_col'
OUTLAY_KEY = 'outlay_col'
BORROW_KEY = 'borrow_col'
MIN_ROW_KEY = 'min_row'
FIND_COUNT_KEY = 'find_count'
FIND_COL_KEY = 'find_col'
HEADER_KEY = 'header'
KEY_LIST = [SUMMARY_KEY, DATE_KEY, OVER_KEY, BORROW_KEY, AMOUNT_KEY, IMCOME_KEY, OUTLAY_KEY]
CLASSIFY_MAP = {
SUMMARY_KEY: 5,
DATE_KEY: 0,
AMOUNT_KEY: 2,
OVER_KEY: 3,
IMCOME_KEY: 11,
OUTLAY_KEY: 12,
BORROW_KEY: 10,
}
FIXED_HEADERS = ('记账日期', '记账时间', '金额', '余额', '交易名称', '附言', '对方账户名', '对方卡号/账号',
'对方开户行', '核对结果', '借贷', '收入', '支出')
FIXED_COL_AMOUNT = len(FIXED_HEADERS)
......@@ -103,36 +131,60 @@ OUTLAY_HEADERS_SET = {'支出金额', '支出', '支取金额(借)', '支取
# ------------------普通打印-全格线--------------------------------------------------------------------------------------
HEADERS_MAPPING = {}
# 借贷
HEADERS_MAPPING.update(
{
'借贷': BORROW_KEY,
'借贷状态': BORROW_KEY,
'收/支': BORROW_KEY,
}
)
# 收入
HEADERS_MAPPING.update(
{
'收入金额': IMCOME_KEY,
'收入': IMCOME_KEY,
'存入': IMCOME_KEY,
'存入金额(贷)': IMCOME_KEY,
'存入金额(贷)': IMCOME_KEY,
}
)
# 支出
HEADERS_MAPPING.update(
{
'支出金额': OUTLAY_KEY,
'支出': OUTLAY_KEY,
'支取金额(借)': OUTLAY_KEY,
'支取金额(借)': OUTLAY_KEY,
}
)
# 横版-表格-中国银行(不规则)
HEADERS_MAPPING.update(
{
'记账日期': BASE_HEADERS_MAPPING['记账日期'],
'记账时间': BASE_HEADERS_MAPPING['记账时间'],
'金额': BASE_HEADERS_MAPPING['金额'],
'余额': BASE_HEADERS_MAPPING['余额'],
'交易名称': BASE_HEADERS_MAPPING['交易名称'],
'附言': BASE_HEADERS_MAPPING['附言'],
'对方账户名': BASE_HEADERS_MAPPING['对方账户名'],
'对方卡号/账号': BASE_HEADERS_MAPPING['对方卡号/账号'],
'对方开户行': BASE_HEADERS_MAPPING['对方开户行'],
'记账日期': DATE_KEY,
'金额': AMOUNT_KEY,
'余额': OVER_KEY,
'附言': SUMMARY_KEY,
}
)
# 横版-表格-农业银行-中国农业银行个人账户明细
HEADERS_MAPPING.update(
{
'交易日期': BASE_HEADERS_MAPPING['记账日期'],
# '存入': BASE_HEADERS_MAPPING['金额'],
'对方账号': BASE_HEADERS_MAPPING['对方卡号/账号'],
'对方名称': BASE_HEADERS_MAPPING['对方账户名'],
'摘要': BASE_HEADERS_MAPPING['附言'],
'交易日期': DATE_KEY,
# '存入': AMOUNT_KEY,
'摘要': SUMMARY_KEY,
}
)
# 横版-表格-北京银行
HEADERS_MAPPING.update(
{
'业务摘要': BASE_HEADERS_MAPPING['附言'],
'发生额': BASE_HEADERS_MAPPING['金额'],
'对方户名': BASE_HEADERS_MAPPING['对方账户名'],
'业务摘要': SUMMARY_KEY,
'发生额': AMOUNT_KEY,
}
)
# 横版-表格-工商银行 借记卡账户历史明细清单
......@@ -142,8 +194,8 @@ HEADERS_MAPPING.update(
# 工商银行历史明细(申请单号:20042501303039397888)
HEADERS_MAPPING.update(
{
'收入/支出金额': BASE_HEADERS_MAPPING['金额'],
'工作日期': BASE_HEADERS_MAPPING['记账日期'],
'收入/支出金额': AMOUNT_KEY,
'工作日期': DATE_KEY,
}
)
......@@ -153,26 +205,23 @@ HEADERS_MAPPING.update(
# 竖版-表格-建设银行-个人活期账户交易明细 CH-B005832604 (2)
HEADERS_MAPPING.update(
{
'交易金额': BASE_HEADERS_MAPPING['金额'],
'账户余额': BASE_HEADERS_MAPPING['余额'],
'对方账号与户名': BASE_HEADERS_MAPPING['对方卡号/账号'],
'交易金额': AMOUNT_KEY,
'账户余额': OVER_KEY,
}
)
# 微信
HEADERS_MAPPING.update(
{
'交易时间': BASE_HEADERS_MAPPING['记账时间'],
'交易类型': BASE_HEADERS_MAPPING['附言'],
'金额(元)': BASE_HEADERS_MAPPING['金额'],
'金额(元)': BASE_HEADERS_MAPPING['金额'],
'交易对方': BASE_HEADERS_MAPPING['对方账户名'],
'交易类型': SUMMARY_KEY,
'金额(元)': AMOUNT_KEY,
'金额(元)': AMOUNT_KEY,
}
)
# 支付宝
HEADERS_MAPPING.update(
{
'时间': BASE_HEADERS_MAPPING['记账日期'],
'名称/备注': BASE_HEADERS_MAPPING['附言'],
'时间': DATE_KEY,
'名称/备注': SUMMARY_KEY,
}
)
......@@ -182,33 +231,28 @@ HEADERS_MAPPING.update(
# 竖版-无表格-农业银行CH-B008805428
HEADERS_MAPPING.update(
{
'摘要/附言': BASE_HEADERS_MAPPING['附言'],
'交易地点/对方账号和户名': BASE_HEADERS_MAPPING['对方卡号/账号'],
'摘要/附言': SUMMARY_KEY,
}
)
# 农业银行-窄页
HEADERS_MAPPING.update(
{
'交易对手账号': BASE_HEADERS_MAPPING['对方卡号/账号'],
}
)
# 竖版-特殊-农商行
HEADERS_MAPPING.update(
{
'交易发生额': BASE_HEADERS_MAPPING['金额'],
'交易发生额': AMOUNT_KEY,
}
)
# 横版-特殊-中信银行-账户交易明细
HEADERS_MAPPING.update(
{
'对方银行': BASE_HEADERS_MAPPING['对方开户行'],
'交易摘要': BASE_HEADERS_MAPPING['附言'],
'交易摘要': SUMMARY_KEY,
}
)
# 平安电子账单
HEADERS_MAPPING.update(
{
'借贷发生额(借:-贷:+)': BASE_HEADERS_MAPPING['金额'],
'借贷发生额(借:-贷:+)': AMOUNT_KEY,
'借贷发生额(借:-贷:+)': AMOUNT_KEY,
}
)
......@@ -218,7 +262,7 @@ HEADERS_MAPPING.update(
# 竖版-无表格-招商银行账户历史交易明细表
HEADERS_MAPPING.update(
{
'联机余额': BASE_HEADERS_MAPPING['余额'],
'联机余额': OVER_KEY,
}
)
# 竖版-无表格-邮储银行-账户对账单 含有对手方户名 对手方账户
......@@ -226,28 +270,23 @@ HEADERS_MAPPING.update(
# 竖版-无表格-邮储银行-电子章 邮储银行 账户对账单
HEADERS_MAPPING.update(
{
'交易金额(元)': BASE_HEADERS_MAPPING['金额'],
'交易金额(元)': BASE_HEADERS_MAPPING['金额'],
'账户余额(元)': BASE_HEADERS_MAPPING['余额'],
'账户余额(元)': BASE_HEADERS_MAPPING['余额'],
'对手方户名': BASE_HEADERS_MAPPING['对方账户名'],
'对手方账户': BASE_HEADERS_MAPPING['对方卡号/账号'],
'交易金额(元)': AMOUNT_KEY,
'交易金额(元)': AMOUNT_KEY,
'账户余额(元)': OVER_KEY,
'账户余额(元)': OVER_KEY,
}
)
# 横版-无表格-广发银行-账户交易历史 --> 已废弃
# 竖版-无表格-广发银行-账户交易历史 --> 已废弃
HEADERS_MAPPING.update(
{
'会计日期': BASE_HEADERS_MAPPING['记账日期'],
'对手户名': BASE_HEADERS_MAPPING['对方账户名'],
'对手账号': BASE_HEADERS_MAPPING['对方卡号/账号'],
'会计日期': DATE_KEY,
}
)
# 招行电子账单 TODO 有英文,需测试
HEADERS_MAPPING.update(
{
'对手信息': BASE_HEADERS_MAPPING['对方账户名'],
'摘要代码': BASE_HEADERS_MAPPING['附言'],
'摘要代码': SUMMARY_KEY,
}
)
# 横版-无表格-民生银行-中国民生银行个人账户对账单(客户卡号)
......@@ -255,46 +294,36 @@ HEADERS_MAPPING.update(
# 横版-无表格-民生银行
HEADERS_MAPPING.update(
{
'摘要信息': BASE_HEADERS_MAPPING['附言'],
'对方行名': BASE_HEADERS_MAPPING['对方开户行'],
'摘要信息': SUMMARY_KEY,
}
)
# 竖版-无表格-农业银行整数
# 竖版-无表格-农业银行-中国农业银行银行卡交易明细清单
HEADERS_MAPPING.update(
{
'对方账号和户名': BASE_HEADERS_MAPPING['对方卡号/账号'],
}
)
# 竖版-无表格-农业银行-中国农业银行银行卡活期存折交易明细清单.pdf
# 竖版-无表格-农业银行-扩张.pdf
# 竖版-无表格-农业银行-缩进.pdf
HEADERS_MAPPING.update(
{
'日期': BASE_HEADERS_MAPPING['记账日期'],
'短摘要': BASE_HEADERS_MAPPING['附言'],
'本次余额': BASE_HEADERS_MAPPING['余额'],
'日期': DATE_KEY,
'短摘要': SUMMARY_KEY,
'本次余额': OVER_KEY,
}
)
# 竖版-无表格-农业银行-无标题(对手帐号)
HEADERS_MAPPING.update(
{
'交易后余额': BASE_HEADERS_MAPPING['余额'],
'对手帐号': BASE_HEADERS_MAPPING['对方卡号/账号'],
'交易后余额': OVER_KEY,
}
)
# 竖版-无表格-农商行(非常规)
HEADERS_MAPPING.update(
{
'交易说明': BASE_HEADERS_MAPPING['附言'],
'交易说明': SUMMARY_KEY,
}
)
# 竖版-无表格-工商银行 抬头三行 活期历史明细清单
HEADERS_MAPPING.update(
{
'对方账户': BASE_HEADERS_MAPPING['对方卡号/账号'],
}
)
# -----------针式打印-全格线--------------------------------------------------------------------------------------------
# 竖版-表格-建设银行-中国建设银行活期账户交易明细
......@@ -302,25 +331,19 @@ HEADERS_MAPPING.update(
# 竖版-表格-建设银行-对私活期账户明细- (1).pdf
HEADERS_MAPPING.update(
{
'帐户余额': BASE_HEADERS_MAPPING['余额'],
'对方帐户名称': BASE_HEADERS_MAPPING['对方账户名'],
'帐户余额': OVER_KEY,
}
)
# 竖版-特殊-交通银行 零售客户交易清单 5000以上交易记录
HEADERS_MAPPING.update(
{
'交易日期 记账日期': BASE_HEADERS_MAPPING['记账日期'],
'交易日期 记账日期': DATE_KEY,
}
)
# ----------针式打印-部分格线------------------------------------------------------------------------------------------
# 竖版-特殊-邮储银行-一本通绿卡通交易明细(客户)
# 竖版-特殊-邮储银行-账户交易明细(客户)
HEADERS_MAPPING.update(
{
'对方账号/卡号/汇票号': BASE_HEADERS_MAPPING['对方卡号/账号'],
}
)
# --------------------------------------------------------------------------------------------------------------------
......@@ -432,63 +455,6 @@ HEADERS_MAPPING.update(
OTHER_TUPLE = (None, None, None, None, None, None, None, None, None, None, None, None, None)
# {
# "0":"其他",
# "1":"普通打印-全表格-中国农业银行",
# "2":"普通打印-全表格-中国银行",
# "3":"普通打印-全表格-北京银行",
# "4":"普通打印-全表格-工商银行",
# "5":"普通打印-全表格-建设银行",
# "6":"普通打印-全表格-微信账单",
# "7":"普通打印-全表格-支付宝账单",
# "8":"普通打印-无格线-中国邮政储蓄银行",
# "9":"普通打印-无格线-交通银行",
# "10":"普通打印-无格线-农业银行整数",
# "11":"普通打印-无格线-农业银行银行活期扩张缩进",
# "12":"普通打印-无格线-招商银行",
# "13":"普通打印-无格线-招行电子账单",
# "14":"普通打印-无格线-民生银行",
# "15":"普通打印-部分格线-横版-中信银行",
# "16":"普通打印-部分格线-竖版-中国农业银行分账户窄页",
# "17":"普通打印-部分格线-竖版-农业银行",
# "18":"普通打印-部分格线-竖版-农业银行银行卡交易明细",
# "19":"普通打印-部分格线-竖版-平安电子账单",
# "20":"针式打印-全格线-建设银行",
# "21":"针式打印-部分格线-竖版-邮储银行账户交易",
# "22":"针式打印-部分格线-邮储银行一本通绿卡"
# }
# CLASSIFY_LIST = [
# ('其他', OTHER_TUPLE),
# ('农业银行', (1, None, 3, 5, None, 8, 7, 6, None, None, None, None, None)),
# ('中国银行', (1, 2, 4, 5, 6, 9, 10, 11, 12, None, None, None, None)),
# ('北京银行', (1, None, 4, 5, None, 2, 6, 7, None, None, 3, None, None)),
# ('工商银行', (1, None, 9, 10, None, 7, None, None, None, None, None, None, None)),
# ('建设银行', (None, None, None, None, None, 2, None, None, None, None, None, None, None)),
# ('微信', (2, None, 6, None, None, 3, 7, None, None, None, None, None, None)),
# ('支付宝', (2, None, None, 6, None, 3, None, None, None, None, None, 4, 5)),
#
# ('交通银行', (1, None, 5, 6, None, 3, None, None, None, None, 4, None, None)),
# ('农业银行', (1, None, 3, None, None, 2, None, 4, None, None, None, None, None)),
# ('农业银行', (1, 2, None, None, None, None, None, None, None, None, None, None, None)),
# ('招商银行', (1, None, 3, 4, None, 6, None, None, None, None, None, None, None)),
# ('招商银行电子版', (1, None, 3, 4, None, 5, 6, None, None, None, None, None, None)),
# ('民生银行', (None, None, 5, 6, None, None, 7, None, 8, None, None, None, None)),
#
# ('中信银行', (1, None, None, 5, None, 2, 6, 7, 8, None, None, 3, 4)),
# ('农业银行', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)),
# ('农业银行', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)),
# ('农业银行', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)),
# ('平安电子账单', (2, None, 5, 6, None, 4, None, None, None, None, None, None, None)),
#
# ('建设银行', (None, None, None, None, None, None, None, None, None, None, None, None, None)),
# ('邮储银行', (2, None, 5, 6, None, 4, None, 7, None, None, None, None, None)),
# ('邮储银行', (2, None, 5, 6, None, 4, None, 7, None, None, None, None, None)),
# ]
# "4":"普通打印-全表格-中国银行",
# "5":"普通打印-全表格-农业银行-10列",
# "6":"普通打印-全表格-农业银行-10列-1",
......@@ -563,6 +529,49 @@ CLASSIFY_LIST = [
('其他', OTHER_TUPLE),
]
CLASSIFY_HEADER_LIST = [
OTHER_TUPLE,
OTHER_TUPLE,
OTHER_TUPLE,
OTHER_TUPLE,
('记账日期', '记账时间', '币别', '金额', '余额', '交易名称', '渠道', '网点名称', '附言', '对方账户名', '对方卡号/账号', '对方开户行'),
('交易日期', '交易网点', '存入', '支出', '余额', '对方账号', '对方名称', '摘要', '渠道', '附言'),
('序号', '日期', '摘要', '交易金额', '余额', '对方账号', '对方名称', '交易地点', '渠道', '附言'),
('交易日期', '摘要', '交易金额', '余额', '交易渠道', '交易网点', '对方账号', '对方名称', '附言'),
('交易日期', '业务摘要', '收/支', '发生额', '余额', '对方户名', '对方账号', '交易渠道'),
('交易日期', '账号', '储种', '序号', '币种', '钞汇', '摘要', '地区', '收入/支出金额', '余额', '渠道'),
('交易日期', '账号', '储种', '序号', '币种', '钞汇', '摘要', '地区', '收入/支出金额', '余额', '对方户名', '对方账号', '渠道'),
(None, '摘要', '交易日期', '交易金额', '账户余额', '商户/网点号及其名称', '对方账号与户名'),
('交易单号', '交易时间', '交易类型', '收/支/其他', '交易方式', '金额(元)', '交易对方', '商户单号'),
('流水号', '时间', '名称/备注', '收入', '支出', '账户余额', '资金渠道'),
('交易日期', '记账日期', '交易地点', '交易类型', '借贷状态', '交易金额', '余额'),
('交易日期', '交易类型', '交易金额(元)', '账户余额(元)', '操作柜员'),
('交易日期', '交易类型', '交易币种', '交易金额(元)', '账户余额(元)', '对手方户名', '对手方账户', '收支类型'),
('日期', '时间', '日志号', '短摘要', '交易金额', '本次余额', '交易网点', '渠道', '附言'),
('交易日期', '摘要/附言', '交易金额', '对方账号和户名'),
('记账日期', '货币', '交易金额', '联机余额', '冲补账', '交易摘要'),
('记账日期', '货币', '交易金额', '联机余额', '交易摘要', '对手信息'),
('凭证类型', '凭证号码', '交易时间', '摘要', '交易金额', '账户余额', '现转标志', '交易渠道', '交易机构', '对方户名', '对方行名'),
('交易日期', '交易摘要', '收入金额', '支出金额', '账户余额', '对方户名', '对方账号', '对方银行', '交易流水号'),
('交易日期', '摘要/附言', '交易金额', '余额', '交易地点/对方账号和户名'),
('日期', '地点', '摘要', '存入', '支出', '余额', '对方账号', '对方户名'),
('日期', '摘要', '交易金额', '余额', '地点', '交易对手账号', '对方户名'),
('序号', '交易日期', '交易网点', '摘要', '借贷发生额(借:-贷:+)', '账户余额'),
('序号', '摘要', '币别', '钞汇', '交易日期', '交易金额', '账户余额', '交易地点附言', '对方账号与户名'),
OTHER_TUPLE,
OTHER_TUPLE,
OTHER_TUPLE,
OTHER_TUPLE,
OTHER_TUPLE,
OTHER_TUPLE,
OTHER_TUPLE,
('序号', '交易日期', '交易渠道', '摘要', '交易金额', '账户余额', '对方账号/卡号/汇票号', '原子账号', '交易机构名称'),
('序号', '交易日期', '交易渠道', '摘要', '交易金额', '账户余额', '对方账号/卡号/汇票号', '原子账号', '交易机构名称'),
OTHER_TUPLE,
]
# ----------license相关------------------------------------------------------------------------------------------------
# "0":"AVT Invioce",
......@@ -603,9 +612,9 @@ RP_FIELD_ORDER_1 = IC_FIELD_ORDER_1
VAT_CN_NAME = 'VAT普票'
VAT_CLASSIFY = 0
VAT_FIELD_ORDER = (('发票代码', '发票代码'),
('发票代码(开具)', '发票代码(开具)'),
('发票代码_开具', '发票代码(开具)'),
('发票号码', '发票号码'),
('发票号码(开具)', '发票号码(开具)'),
('发票号码_开具', '发票号码(开具)'),
('开票日期', '开票日期'),
('校验码', '校验码'),
('货物或应税劳务、服务名称', '货物或应税劳务、服务名称'),
......@@ -622,7 +631,7 @@ VAT_FIELD_ORDER = (('发票代码', '发票代码'),
('销方纳税人识别号', '销售方纳税人识别号'),
('销方地址、电话', '销售方地址、电话'),
('销方开户行及账号', '销售方开户行及账号'),
('销售方:(章)', '销售方:(章)'),
('下盖章', '销售方:(章)'),
('备注', '备注'),)
# 机动车登记证书
MVC_CN_NAME = '机动车登记证书'
......@@ -856,3 +865,11 @@ LICENSE_CLASSIFY_MAPPING = dict(LICENSE_ORDER)
OTHER_CLASSIFY_SET = {OTHER_CLASSIFY}
LICENSE_CLASSIFY_SET_1 = {IC_CLASSIFY, VAT_CLASSIFY, MVC_CLASSIFY, MVI_CLASSIFY}
LICENSE_CLASSIFY_SET_2 = {BL_CLASSIFY, UCI_CLASSIFY, EEP_CLASSIFY, DL_CLASSIFY, PP_CLASSIFY, BC_CLASSIFY}
WECHART_CLASSIFY = 12
WECHART_HEADERS_MAPPING = copy.deepcopy(HEADERS_MAPPING)
WECHART_HEADERS_MAPPING.update(
{
'交易时间': DATE_KEY,
}
)
......
PAGE_DEFAULT = 1
PAGE_SIZE_DEFAULT = 10
FIXED_APPLICATION_ID_PREFIX = 'CH-S'
DOC_SCHEME_LIST = ['ACCEPTANCE', 'SETTLEMENT', 'CONTRACT MANAGEMENT']
DATA_SOURCE_LIST = ['POS', 'EAPP', 'ECONTRACT']
HIL_PREFIX = 'HIL'
AFC_PREFIX = 'AFC'
SPLIT_STR = '_'
BUSINESS_TYPE_LIST = [HIL_PREFIX, AFC_PREFIX]
HIL_SET = {'HIL', 'HIl', 'HiL', 'Hil', 'hIL', 'hIl', 'hiL', 'hil', 'CO00002'}
# -------EDMS相关---------------------------------------------------------------------------------------------------
SESSION_PREFIX = 'FHLSID'
CUSTOM_CLIENT = 'CustomClient'
FIXED_TOKEN = '00000000-0000-0000-0000-000000000000'
FIXED_FILE_SIZE = 0
DOWNLOAD_ACTION_TYPE = 'Downloaded'
DOC_SCHEMA_ID_FILL = {
'ACCEPTANCE': (1, 'DFE-AutoFilingScript'),
'SETTLEMENT': (20, 'DFE-AutoFilingScript'),
'CONTRACT MANAGEMENT': (86, 'Schema-Based')
}
BUSINESS_TYPE_DICT = {
HIL_PREFIX: 'CO00002',
AFC_PREFIX: 'CO00001'
}
DOC_SCHEMA_TYPE = 'ElectronicRecord'
APPLICATION_ID_META_FIELD_id = 1
DEALER_CODE_META_FIELD_id = 13
BUSINESS_TYPE_META_FIELD_id = 93
DEALER_CODE = 'ocr_situ_group'
RETRY_TIMES = 3
# ---------银行流水模板相关--------------------------------------------------------------------------------------------
TRANS_MAP = {
'C': "0",
'c': "0",
'(': "0",
'o': "0",
'O': "0",
'D': "0",
'[': "1",
']': "1",
'l': "1",
'L': "1",
'A': "4",
's': "5",
'S': "5",
'b': "6",
'g': "9",
'E': "9",
'B': "13",
}
TRANS = str.maketrans(TRANS_MAP)
ERROR_CHARS = {'.', '。', ':', ':', '•', '·', ',', ','}
SKIP_IMG_SHEET_NAME = '未处理图片'
SKIP_IMG_SHEET_HEADER = ('页码', '序号')
CARD_RATIO = 0.9
UNKNOWN_CARD = '未知卡号'
UNKNOWN_ROLE = '未知户名'
DATE_FORMAT = ['%Y年%m月%d日', '%Y/%m/%d', '%Y-%m-%d', '%Y%m%d']
PROOF_COL_TITLE = '核对结果'
PROOF_RES = ('对', '错')
META_SHEET_TITLE = '关键信息提取和展示'
FIXED_HEADERS = ('记账日期', '记账时间', '金额', '余额', '交易名称', '附言', '对方账户名', '对方卡号/账号',
'对方开户行', '核对结果', '借贷', '收入', '支出')
FIXED_COL_AMOUNT = len(FIXED_HEADERS)
BASE_HEADERS_MAPPING = {label: idx + 1 for idx, label in enumerate(FIXED_HEADERS)}
BORROW_HEADER_COL = BASE_HEADERS_MAPPING['借贷']
INCOME_HEADER_COL = BASE_HEADERS_MAPPING['收入']
OUTLAY_HEADER_COL = BASE_HEADERS_MAPPING['支出']
RESULT_HEADER_COL = BASE_HEADERS_MAPPING['核对结果']
BORROW_IDX = BORROW_HEADER_COL - 1
INCOME_IDX = INCOME_HEADER_COL - 1
OUTLAY_IDX = OUTLAY_HEADER_COL - 1
SUMMARY_IDX = FIXED_HEADERS.index('附言')
DATE_IDX = FIXED_HEADERS.index('记账日期')
AMOUNT_IDX = FIXED_HEADERS.index('金额')
OVER_IDX = FIXED_HEADERS.index('余额')
RESULT_IDX = FIXED_HEADERS.index('核对结果')
# '借贷': ('贷', '借'), # 竖版-无表格-广发银行
# '借贷状态': ('贷', '借'), # 竖版-特殊-交通银行
# '收/支': ('收入', '支出'), # 横版-表格-北京银行
BORROW_HEADERS_SET = {'借贷', '借贷状态', '收/支'}
BORROW_INCOME_SET = {'贷', '收入'}
BORROW_OUTLAY_SET = {'借', '支出'}
INCOME_HEADERS_SET = {'收入金额', '收入', '存入', '存入金额(贷)', '存入金额(贷)'}
OUTLAY_HEADERS_SET = {'支出金额', '支出', '支取金额(借)', '支取金额(借)'}
# ------------------普通打印-全格线--------------------------------------------------------------------------------------
HEADERS_MAPPING = {}
# 横版-表格-中国银行(不规则)
HEADERS_MAPPING.update(
{
'记账日期': BASE_HEADERS_MAPPING['记账日期'],
'记账时间': BASE_HEADERS_MAPPING['记账时间'],
'金额': BASE_HEADERS_MAPPING['金额'],
'余额': BASE_HEADERS_MAPPING['余额'],
'交易名称': BASE_HEADERS_MAPPING['交易名称'],
'附言': BASE_HEADERS_MAPPING['附言'],
'对方账户名': BASE_HEADERS_MAPPING['对方账户名'],
'对方卡号/账号': BASE_HEADERS_MAPPING['对方卡号/账号'],
'对方开户行': BASE_HEADERS_MAPPING['对方开户行'],
}
)
# 横版-表格-农业银行-中国农业银行个人账户明细
HEADERS_MAPPING.update(
{
'交易日期': BASE_HEADERS_MAPPING['记账日期'],
# '存入': BASE_HEADERS_MAPPING['金额'],
'对方账号': BASE_HEADERS_MAPPING['对方卡号/账号'],
'对方名称': BASE_HEADERS_MAPPING['对方账户名'],
'摘要': BASE_HEADERS_MAPPING['附言'],
}
)
# 横版-表格-北京银行
HEADERS_MAPPING.update(
{
'业务摘要': BASE_HEADERS_MAPPING['附言'],
'发生额': BASE_HEADERS_MAPPING['金额'],
'对方户名': BASE_HEADERS_MAPPING['对方账户名'],
}
)
# 横版-表格-工商银行 借记卡账户历史明细清单
# 横版-表格-工商银行-机打验证码 借记卡账户历史明细清单
# 横版-表格-工商银行CH-B008802400
# 横版-表格-工商银行 工资明细清单
# 工商银行历史明细(申请单号:20042501303039397888)
HEADERS_MAPPING.update(
{
'收入/支出金额': BASE_HEADERS_MAPPING['金额'],
'工作日期': BASE_HEADERS_MAPPING['记账日期'],
}
)
# 横版-表格-建设银行-个人活期账户交易明细
# 竖版-表格-建设银行-个人活期账户交易明细 CH-B005832604
# 竖版-表格-建设银行-工资账单CH-B008786812
# 竖版-表格-建设银行-个人活期账户交易明细 CH-B005832604 (2)
HEADERS_MAPPING.update(
{
'交易金额': BASE_HEADERS_MAPPING['金额'],
'账户余额': BASE_HEADERS_MAPPING['余额'],
'对方账号与户名': BASE_HEADERS_MAPPING['对方卡号/账号'],
}
)
# 微信
HEADERS_MAPPING.update(
{
'交易时间': BASE_HEADERS_MAPPING['记账时间'],
'交易类型': BASE_HEADERS_MAPPING['附言'],
'金额(元)': BASE_HEADERS_MAPPING['金额'],
'金额(元)': BASE_HEADERS_MAPPING['金额'],
'交易对方': BASE_HEADERS_MAPPING['对方账户名'],
}
)
# 支付宝
HEADERS_MAPPING.update(
{
'时间': BASE_HEADERS_MAPPING['记账日期'],
'名称/备注': BASE_HEADERS_MAPPING['附言'],
}
)
# ------------普通打印-部分格线-------------------------------------------------------------------------------------------
# 竖版-无表格-农业银行
# 竖版-无表格-农业银行CH-B008805428
HEADERS_MAPPING.update(
{
'摘要/附言': BASE_HEADERS_MAPPING['附言'],
'交易地点/对方账号和户名': BASE_HEADERS_MAPPING['对方卡号/账号'],
}
)
# 农业银行-窄页
HEADERS_MAPPING.update(
{
'交易对手账号': BASE_HEADERS_MAPPING['对方卡号/账号'],
}
)
# 竖版-特殊-农商行
HEADERS_MAPPING.update(
{
'交易发生额': BASE_HEADERS_MAPPING['金额'],
}
)
# 横版-特殊-中信银行-账户交易明细
HEADERS_MAPPING.update(
{
'对方银行': BASE_HEADERS_MAPPING['对方开户行'],
'交易摘要': BASE_HEADERS_MAPPING['附言'],
}
)
# 平安电子账单
HEADERS_MAPPING.update(
{
'借贷发生额(借:-贷:+)': BASE_HEADERS_MAPPING['金额'],
}
)
# ------------普通打印-无格线--------------------------------------------------------------------------------------------
# 竖版-无表格-招商银行(略歪)
# 竖版-无表格-招商银行账户历史交易明细表
HEADERS_MAPPING.update(
{
'联机余额': BASE_HEADERS_MAPPING['余额'],
}
)
# 竖版-无表格-邮储银行-账户对账单 含有对手方户名 对手方账户
# 竖版-无表格-邮储银行 账户对账单
# 竖版-无表格-邮储银行-电子章 邮储银行 账户对账单
HEADERS_MAPPING.update(
{
'交易金额(元)': BASE_HEADERS_MAPPING['金额'],
'交易金额(元)': BASE_HEADERS_MAPPING['金额'],
'账户余额(元)': BASE_HEADERS_MAPPING['余额'],
'账户余额(元)': BASE_HEADERS_MAPPING['余额'],
'对手方户名': BASE_HEADERS_MAPPING['对方账户名'],
'对手方账户': BASE_HEADERS_MAPPING['对方卡号/账号'],
}
)
# 横版-无表格-广发银行-账户交易历史 --> 已废弃
# 竖版-无表格-广发银行-账户交易历史 --> 已废弃
HEADERS_MAPPING.update(
{
'会计日期': BASE_HEADERS_MAPPING['记账日期'],
'对手户名': BASE_HEADERS_MAPPING['对方账户名'],
'对手账号': BASE_HEADERS_MAPPING['对方卡号/账号'],
}
)
# 招行电子账单 TODO 有英文,需测试
HEADERS_MAPPING.update(
{
'对手信息': BASE_HEADERS_MAPPING['对方账户名'],
'摘要代码': BASE_HEADERS_MAPPING['附言'],
}
)
# 横版-无表格-民生银行-中国民生银行个人账户对账单(客户卡号)
# 横版-无表格-民生银行-无标题(客户账户)
# 横版-无表格-民生银行
HEADERS_MAPPING.update(
{
'摘要信息': BASE_HEADERS_MAPPING['附言'],
'对方行名': BASE_HEADERS_MAPPING['对方开户行'],
}
)
# 竖版-无表格-农业银行整数
# 竖版-无表格-农业银行-中国农业银行银行卡交易明细清单
HEADERS_MAPPING.update(
{
'对方账号和户名': BASE_HEADERS_MAPPING['对方卡号/账号'],
}
)
# 竖版-无表格-农业银行-中国农业银行银行卡活期存折交易明细清单.pdf
# 竖版-无表格-农业银行-扩张.pdf
# 竖版-无表格-农业银行-缩进.pdf
HEADERS_MAPPING.update(
{
'日期': BASE_HEADERS_MAPPING['记账日期'],
'短摘要': BASE_HEADERS_MAPPING['附言'],
'本次余额': BASE_HEADERS_MAPPING['余额'],
}
)
# 竖版-无表格-农业银行-无标题(对手帐号)
HEADERS_MAPPING.update(
{
'交易后余额': BASE_HEADERS_MAPPING['余额'],
'对手帐号': BASE_HEADERS_MAPPING['对方卡号/账号'],
}
)
# 竖版-无表格-农商行(非常规)
HEADERS_MAPPING.update(
{
'交易说明': BASE_HEADERS_MAPPING['附言'],
}
)
# 竖版-无表格-工商银行 抬头三行 活期历史明细清单
HEADERS_MAPPING.update(
{
'对方账户': BASE_HEADERS_MAPPING['对方卡号/账号'],
}
)
# -----------针式打印-全格线--------------------------------------------------------------------------------------------
# 竖版-表格-建设银行-中国建设银行活期账户交易明细
# 竖版-表格-建设银行-中国建设银行活期账户明细清单
# 竖版-表格-建设银行-对私活期账户明细- (1).pdf
HEADERS_MAPPING.update(
{
'帐户余额': BASE_HEADERS_MAPPING['余额'],
'对方帐户名称': BASE_HEADERS_MAPPING['对方账户名'],
}
)
# 竖版-特殊-交通银行 零售客户交易清单 5000以上交易记录
HEADERS_MAPPING.update(
{
'交易日期 记账日期': BASE_HEADERS_MAPPING['记账日期'],
}
)
# ----------针式打印-部分格线------------------------------------------------------------------------------------------
# 竖版-特殊-邮储银行-一本通绿卡通交易明细(客户)
# 竖版-特殊-邮储银行-账户交易明细(客户)
HEADERS_MAPPING.update(
{
'对方账号/卡号/汇票号': BASE_HEADERS_MAPPING['对方卡号/账号'],
}
)
# --------------------------------------------------------------------------------------------------------------------
# ('记账日期', '记账时间', '金额', '余额', '交易名称', '附言', '对方账户名', '对方卡号/账号', '对方开户行', '核对结果', '借贷', '收入', '支出')
# CLASSIFY_LIST = [
# # --------------普通打印:全格线---------------------------------
# # 中国银行:记账日期 记账时间 币别 金额 余额 交易名称 渠道 网点名称 附言 对方账户名 对方卡号/账号 对方开户行
# ('中国银行', (1, 2, 4, 5, 6, 9, 10, 11, 12, None, None, None, None)), # 横版-表格-中国银行(不规则)
#
# # 农业银行:交易日期 交易网点 存入 支出 余额 对方账号 对方名称 摘要 渠道 附言
# ('农业银行-10', (1, None, None, 5, None, 8, 7, 6, None, None, None, 3, 4)), # 横版-表格-农业银行-中国农业银行个人账户明细
#
# # 农业银行:序号 日期 摘要 交易金额 余额 对方账号 对方名称 交易地点 渠道 附言
# ('农业银行-10-1', (2, None, 4, 5, None, 3, 7, 6, None, None, None, None, None)),
#
# # 农业银行:交易日期 摘要 交易金额 余额 交易渠道 交易网点 对方账号 对方名称 附言
# ('农业银行-9', (1, None, 3, 4, None, 2, 8, 7, None, None, None, None, None)),
#
# # 北京银行:交易日期 业务摘要 收/支 发生额 余额 对方户名 对方账号 交易渠道
# ('北京银行', (1, None, 4, 5, None, 2, 6, 7, None, None, 3, None, None)), # 横版-表格-北京银行
#
# # 工商银行:交易日期 账号 储种 序号 币种 钞汇 摘要 地区 收入/支出金额 余额 渠道
# ('工商银行', (1, None, 9, 10, None, 7, None, None, None, None, None, None, None)),
#
# # 工商银行:交易日期 账号 储种 序号 币种 钞汇 摘要 地区 收入/支出金额 余额 对方户名 对方账号 渠道
# ('工商银行-电子账单', (1, None, 9, 10, None, 7, 11, 12, None, None, None, None, None)),
#
# # 建设银行:空 摘要 交易日期 交易金额 账户余额 商户/网点号及其名称 对方账号与户名 --> 竖版-表格-建设银行
# # 序号 摘要 币别 钞汇 交易日期 交易金额 账户余额 交易地点附言 对方账号与户名 --> 横版-表格-建设银行
# ('建设银行-竖版', (3, None, 4, 5, None, 2, None, 7, None, None, None, None, None)),
# ('建设银行-横版', (5, None, 6, 7, None, 2, None, 9, None, None, None, None, None)),
#
# # 微信:交易单号 交易时间 交易类型 收/支/其他 交易方式 金额(元) 交易对方 商户单号
# ('微信', (2, None, 6, None, None, 3, 7, None, None, None, None, None, None)),
#
# # 支付宝:流水号 时间 名称/备注 收入 支出 账户余额 资金渠道
# ('支付宝', (2, None, None, 6, None, 3, None, None, None, None, None, 4, 5)),
#
# # -----------------普通打印:部分格线--------------------------------
#
# # 农业银行:交易日期 摘要/附言 交易金额 余额 交易地点/对方账号和户名
# ('农业银行-5', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)),
#
# # 农业银行:日期 地点 摘要 存入 支出 余额 对方账号 对方户名
# ('农业银行-8', (1, None, None, 6, None, 3, 8, 7, None, None, None, 4, 5)),
# # 农业银行:日期 摘要 交易金额 余额 地点 交易对手账号 对方户名
# ('农业银行-窄页', (1, None, 3, 4, None, 2, 7, 6, None, None, None, None, None)),
#
# # 农商行:交易日期 交易发生额 账户余额 对方账号 对方户名 摘要 备注
# ('农商行', (1, None, 2, 3, None, 6, 5, 4, None, None, None, None, None)),
#
# # 中信银行:交易日期 交易摘要 收入金额 支出金额 账户余额 对方户名 对方账号 对方银行 交易流水号
# ('中信银行', (1, None, None, 5, None, 2, 6, 7, 8, None, None, 3, 4)),
#
# # 平安电子账单:序号 交易日期 交易网点 摘要 借贷发生额(借:-贷:+) 账户余额
# ('平安电子账单', (2, None, 5, 6, None, 4, None, None, None, None, None, None, None)),
# # 建设银行:序号 摘要 币别 钞汇 交易日期 交易金额 账户余额 交易地点附言 对方账号与户名
# ('建设银行-电子账单', (5, None, 6, 7, None, 2, None, 9, None, None, None, None, None)),
#
# # -----------------普通打印:无格线-------------------------------------
#
# # 招商银行:记账日期 货币 交易金额 联机余额 冲补账 交易摘要
# ('招商银行', (1, None, 3, 4, None, 6, None, None, None, None, None, None, None)),
#
# # 邮储银行:交易日期、交易类型 交易币种 交易金额(元) 账户余额(元) [对手方户名 对手方账户 收支类型] --> 竖版-无表格-邮储银行-账户对账单 含有对手方户名 对手方账户
# # 交易日期、交易类型 交易金额(元) 账户余额(元) 操作柜员 --> 竖版-无表格-邮储银行 账户对账单
# ('邮储银行-8', (1, None, 4, 5, None, 2, 6, 7, None, None, None, None, None)),
# ('邮储银行-5', (1, None, 3, 4, None, 2, None, None, None, None, None, None, None)),
#
# # 工商银行电子版:交易日期 账号 储种 序号 币种 妙汇 摘要 地区 收入/支出金额 余额 [对方户名 对方账号] 渠道
# ('工商银行电子版', (1, None, 9, 10, None, 7, None, None, None, None, None, None, None)),
#
# # 招商银行电子版:记账日期 货币 交易金额 联机余额 交易摘要 对手信息
# ('招商银行电子版', (1, None, 3, 4, None, 5, 6, None, None, None, None, None, None)),
#
# # 民生银行:凭证类型 凭证号码 摘要信息 交易时间 交易金额 账户余额 现转标志 交易渠道 交易机构 对方户名 对方行名 --> 横版-无表格-民生银行-中国民生银行个人账户对账单(客户卡号)
# # 凭证类型 凭证号码 交易时间 摘要 交易金额 账户余额 现转标志 交易渠道 交易机构 对方户名 对方行名 --> 横版-无表格-民生银行
# ('民生银行', (None, None, 5, 6, None, None, 7, None, 8, None, None, None, None)),
#
# # 农业银行:交易日期 摘要/附言 交易金额 对方账号和户名
# ('农业银行-整数', (1, None, 3, None, None, 2, None, 4, None, None, None, None, None)),
#
# # 农业银行:交易日期 摘要/附言 交易金额 余额 交易地点/对方账号和户名
# ('农业银行', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)),
#
# # 农业银行:日期、时间、短摘要、交易金额、本次余额、交易网点、渠道、附言
# # 农业银行:日期、时间、日志号、短摘要、交易金额、本次余额、交易网点、渠道、附言
# ('农业银行', (1, 2, 4, 5, None, 3, None, None, None, None, None, None, None)),
# ('农业银行-扩张缩进', (1, 2, 5, 6, None, 4, None, None, None, None, None, None, None)),
#
# # 交通银行:交易日期 记账日期、交易地点、交易类型、借贷状态、交易金额、余额
# ('交通银行', (1, None, 5, 6, None, 3, None, None, None, None, 4, None, None)),
#
#
# # ================针式打印:有格线===================
#
# # 建设银行: 摘要、交易日期、交易金额、账户余额、商户/网点号及其名称、对方账号、对方户名 --> 竖版-表格-建设银行-中国建设银行活期账户明细清单
# # 交易日期、摘要、 币种、 钞汇、 交易金额、 帐户余额、对方账号、 对方帐户名称 --> 竖版-表格-建设银行-对私活期账户明细- (1)
# ('建设银行', (None, None, None, None, None, None, None, None, None, None, None, None, None)),
#
#
# # ================针式打印:无格线===================
#
# # 邮储银行:序号、交易日期、交易渠道、摘要、交易金额、账户余额、对方账号/卡号/汇票号、原子账号、交易机构名称
# ('邮储银行', (2, None, 5, 6, None, 4, None, 7, None, None, None, None, None)),
# ]
OTHER_TUPLE = (None, None, None, None, None, None, None, None, None, None, None, None, None)
# {
# "0":"其他",
# "1":"普通打印-全表格-中国农业银行",
# "2":"普通打印-全表格-中国银行",
# "3":"普通打印-全表格-北京银行",
# "4":"普通打印-全表格-工商银行",
# "5":"普通打印-全表格-建设银行",
# "6":"普通打印-全表格-微信账单",
# "7":"普通打印-全表格-支付宝账单",
# "8":"普通打印-无格线-中国邮政储蓄银行",
# "9":"普通打印-无格线-交通银行",
# "10":"普通打印-无格线-农业银行整数",
# "11":"普通打印-无格线-农业银行银行活期扩张缩进",
# "12":"普通打印-无格线-招商银行",
# "13":"普通打印-无格线-招行电子账单",
# "14":"普通打印-无格线-民生银行",
# "15":"普通打印-部分格线-横版-中信银行",
# "16":"普通打印-部分格线-竖版-中国农业银行分账户窄页",
# "17":"普通打印-部分格线-竖版-农业银行",
# "18":"普通打印-部分格线-竖版-农业银行银行卡交易明细",
# "19":"普通打印-部分格线-竖版-平安电子账单",
# "20":"针式打印-全格线-建设银行",
# "21":"针式打印-部分格线-竖版-邮储银行账户交易",
# "22":"针式打印-部分格线-邮储银行一本通绿卡"
# }
# CLASSIFY_LIST = [
# ('其他', OTHER_TUPLE),
# ('农业银行', (1, None, 3, 5, None, 8, 7, 6, None, None, None, None, None)),
# ('中国银行', (1, 2, 4, 5, 6, 9, 10, 11, 12, None, None, None, None)),
# ('北京银行', (1, None, 4, 5, None, 2, 6, 7, None, None, 3, None, None)),
# ('工商银行', (1, None, 9, 10, None, 7, None, None, None, None, None, None, None)),
# ('建设银行', (None, None, None, None, None, 2, None, None, None, None, None, None, None)),
# ('微信', (2, None, 6, None, None, 3, 7, None, None, None, None, None, None)),
# ('支付宝', (2, None, None, 6, None, 3, None, None, None, None, None, 4, 5)),
#
# ('交通银行', (1, None, 5, 6, None, 3, None, None, None, None, 4, None, None)),
# ('农业银行', (1, None, 3, None, None, 2, None, 4, None, None, None, None, None)),
# ('农业银行', (1, 2, None, None, None, None, None, None, None, None, None, None, None)),
# ('招商银行', (1, None, 3, 4, None, 6, None, None, None, None, None, None, None)),
# ('招商银行电子版', (1, None, 3, 4, None, 5, 6, None, None, None, None, None, None)),
# ('民生银行', (None, None, 5, 6, None, None, 7, None, 8, None, None, None, None)),
#
# ('中信银行', (1, None, None, 5, None, 2, 6, 7, 8, None, None, 3, 4)),
# ('农业银行', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)),
# ('农业银行', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)),
# ('农业银行', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)),
# ('平安电子账单', (2, None, 5, 6, None, 4, None, None, None, None, None, None, None)),
#
# ('建设银行', (None, None, None, None, None, None, None, None, None, None, None, None, None)),
# ('邮储银行', (2, None, 5, 6, None, 4, None, 7, None, None, None, None, None)),
# ('邮储银行', (2, None, 5, 6, None, 4, None, 7, None, None, None, None, None)),
# ]
# "4":"普通打印-全表格-中国银行",
# "5":"普通打印-全表格-农业银行-10列",
# "6":"普通打印-全表格-农业银行-10列-1",
# "7":"普通打印-全表格-农业银行-9列",
# "8":"普通打印-全表格-北京银行",
# "9":"普通打印-全表格-工商银行",
# "10":"普通打印-全表格-工商银行-电子账单",
# "11":"普通打印-全表格-建设银行",
# "12":"普通打印-全表格-微信账单",
# "13":"普通打印-全表格-支付宝账单",
# "14":"普通打印-无格线-交通银行",
# "15":"普通打印-无格线-储蓄银行-5列",
# "16":"普通打印-无格线-储蓄银行-8列",
# "17":"普通打印-无格线-农业银行-扩张缩进",
# "18":"普通打印-无格线-农业银行-整数",
# "19":"普通打印-无格线-招商银行",
# "20":"普通打印-无格线-招商银行-电子账单",
# "21":"普通打印-无格线-民生银行",
# "22":"普通打印-部分格线-横版-中信银行",
# "23":"普通打印-部分格线-竖版-农业银行-5列",
# "24":"普通打印-部分格线-竖版-农业银行-8列",
# "25":"普通打印-部分格线-竖版-农业银行-窄页",
# "26":"普通打印-部分格线-竖版-平安电子账单",
# "27":"普通打印-部分格线-竖版-建设银行-电子账单",
# "34":"针式打印-全格线-建设银行",
# "35":"针式打印-部分格线-竖版-邮储银行",
# "36":"针式打印-部分格线-竖版-邮储银行-绿卡",
CLASSIFY_LIST = [
('其他', OTHER_TUPLE),
('其他', OTHER_TUPLE),
('其他', OTHER_TUPLE),
('其他', OTHER_TUPLE),
('普通打印-全表格-中国银行', (1, 2, 4, 5, 6, 9, 10, 11, 12, None, None, None, None)),
('普通打印-全表格-农业银行-10列', (1, None, None, 5, None, 8, 7, 6, None, None, None, 3, 4)),
('普通打印-全表格-农业银行-10列-1', (2, None, 4, 5, None, 3, 7, 6, None, None, None, None, None)),
('普通打印-全表格-农业银行-9列', (1, None, 3, 4, None, 2, 8, 7, None, None, None, None, None)),
('普通打印-全表格-北京银行', (1, None, 4, 5, None, 2, 6, 7, None, None, 3, None, None)),
('普通打印-全表格-工商银行', (1, None, 9, 10, None, 7, None, None, None, None, None, None, None)),
('普通打印-全表格-工商银行-电子账单', (1, None, 9, 10, None, 7, 11, 12, None, None, None, None, None)),
('普通打印-全表格-建设银行', (3, None, 4, 5, None, 2, None, 7, None, None, None, None, None)),
('普通打印-全表格-微信账单', (2, None, 6, None, None, 3, 7, None, None, None, None, None, None)),
('普通打印-全表格-支付宝账单', (2, None, None, 6, None, 3, None, None, None, None, None, 4, 5)),
('普通打印-无格线-交通银行', (1, None, 5, 6, None, 3, None, None, None, None, 4, None, None)),
('普通打印-无格线-储蓄银行-5列', (1, None, 3, 4, None, 2, None, None, None, None, None, None, None)),
('普通打印-无格线-储蓄银行-8列', (1, None, 4, 5, None, 2, 6, 7, None, None, None, None, None)),
('普通打印-无格线-农业银行-扩张缩进', (1, 2, 5, 6, None, 4, None, None, None, None, None, None, None)),
('普通打印-无格线-农业银行-整数', (1, None, 3, None, None, 2, None, 4, None, None, None, None, None)),
('普通打印-无格线-招商银行', (1, None, 3, 4, None, 6, None, None, None, None, None, None, None)),
('普通打印-无格线-招商银行-电子账单', (1, None, 3, 4, None, 5, 6, None, None, None, None, None, None)),
('普通打印-无格线-民生银行', (None, None, 5, 6, None, None, 7, None, 8, None, None, None, None)),
('普通打印-部分格线-横版-中信银行', (1, None, None, 5, None, 2, 6, 7, 8, None, None, 3, 4)),
('普通打印-部分格线-竖版-农业银行-5列', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)),
('普通打印-部分格线-竖版-农业银行-8列', (1, None, None, 6, None, 3, 8, 7, None, None, None, 4, 5)),
('普通打印-部分格线-竖版-农业银行-窄页', (1, None, 3, 4, None, 2, 7, 6, None, None, None, None, None)),
('普通打印-部分格线-竖版-平安电子账单', (2, None, 5, 6, None, 4, None, None, None, None, None, None, None)),
('普通打印-部分格线-竖版-建设银行-电子账单', (5, None, 6, 7, None, 2, None, 9, None, None, None, None, None)),
('其他', OTHER_TUPLE),
('其他', OTHER_TUPLE),
('其他', OTHER_TUPLE),
('其他', OTHER_TUPLE),
('其他', OTHER_TUPLE),
('其他', OTHER_TUPLE),
('针式打印-全格线-建设银行', OTHER_TUPLE),
('针式打印-部分格线-竖版-邮储银行', (2, None, 5, 6, None, 4, None, 7, None, None, None, None, None)),
('针式打印-部分格线-竖版-邮储银行-绿卡', (2, None, 5, 6, None, 4, None, 7, None, None, None, None, None)),
('其他', OTHER_TUPLE),
]
# ----------license相关------------------------------------------------------------------------------------------------
# "0":"AVT Invioce",
# "1":"二手车发票",
# "2":"其他",
# "3":"护照",
# "28":"机动车登记证",
# "29":"机动车销售统一发票",
# "30":"港澳通行证",
# "31":"营业执照",
# "32":"行驶证",
# "33":"身份证",
# "37":"银行卡"
# 其他
OTHER_CLASSIFY = 2
# 身份证
IC_CN_NAME = '身份证'
IC_CLASSIFY = 33
IC_FIELD_ORDER_0 = (('姓名', '姓名'),
('公民身份号码', '公民身份号码'),
('出生年月', '出生年月'),
('住址', '住址'),
('性别', '性别'),
('民族', '民族'),)
IC_FIELD_ORDER_1 = (('有效期限', '有效期限'), ('签发机关', '签发机关'),)
# 居住证
RP_CN_NAME = '居住证'
RP_CLASSIFY = 10087
RP_FIELD_ORDER_0 = (('姓名', '姓名'),
('公民身份号码', '公民身份号码'),
('出生年月', '出生年月'),
('住址', '住址'),
('性别', '性别'),)
RP_FIELD_ORDER_1 = IC_FIELD_ORDER_1
# 增值税发票
VAT_CN_NAME = 'VAT普票'
VAT_CLASSIFY = 0
VAT_FIELD_ORDER = (('发票代码', '发票代码'),
('发票代码(开具)', '发票代码(开具)'),
('发票号码', '发票号码'),
('发票号码(开具)', '发票号码(开具)'),
('开票日期', '开票日期'),
('校验码', '校验码'),
('货物或应税劳务、服务名称', '货物或应税劳务、服务名称'),
('金额合计', '开具金额合计(不含税)'),
('税率', '税率'),
('税额合计', '税额合计'),
('价税合计小写', '价税合计(小写)'),
('价税合计大写', '价税合计(大写)'),
('购方名称', '购买方名称'),
('购方纳税人识别号', '购买方纳税人识别号'),
('购方地址、电话', '购买方地址、电话'),
('购方开户行及账号', '购买方开户行及账号'),
('销方名称', '销售方名称'),
('销方纳税人识别号', '销售方纳税人识别号'),
('销方地址、电话', '销售方地址、电话'),
('销方开户行及账号', '销售方开户行及账号'),
('销售方:(章)', '销售方:(章)'),
('备注', '备注'),)
# 机动车登记证书
MVC_CN_NAME = '机动车登记证书'
MVC_CLASSIFY = 28
MVC_CLASSIFY_SE = 10086
MVC_FIELD_ORDER_1_2 = (('1.机动车所有人/身份证名称/号码', '机动车所有人/身份证明名称/号码'),
('3.登记日期', '登记日期'),
('9.车辆识别代号/车架号', '车辆识别代号/车架号'),
('32.车辆出厂日期', '车辆出厂日期'),
('34.发证日期', '发证日期'),
('30.使用性质', '使用性质'),
('31.车辆获得方式', '车辆获得方式'),
('4.机动车登记编号', '机动车登记编号'),
('空行占位', None),
('5.车辆类型', '车辆类型'),
('6.车辆品牌', '车辆品牌'),
('7.车辆型号', '车辆型号'),
('8.车身颜色', '车身颜色'),
('10.国产/进口', '国产/进口'),
('11.发动机号', '发动机号'),
('12.发动机型号', '发动机型号'),
('15.制造厂名称', '制造厂名称'),
('2.登记机关', '登记机关'),
('编号', '机动车登记证书编号'),)
MVC_FIELD_ORDER_3_4 = (
('姓名/名称', '姓名/名称'),
('身份证明名称/号码', '身份证明名称/号码'),
('转移登记日期', '转移登记日期'),
)
MVC_SE_FIELD_ORDER_1_2 = (('9.车辆识别代号/车架号', '车辆识别代号/车架号'),
('1.机动车所有人/身份证名称/号码', '机动车所有人/身份证明名称/号码'),
('空行占位', None),
('3.登记日期', '登记日期'),
('32.车辆出厂日期', '车辆出厂日期'),
('34.发证日期', '发证日期'),
('30.使用性质', '使用性质'),
('31.车辆获得方式', '车辆获得方式'),
('5.车辆类型', '车辆类型'),
('6.车辆品牌', '车辆品牌'),
('7.车辆型号', '车辆型号'),
('8.车身颜色', '车身颜色'),
('10.国产/进口', '国产/进口'),
('11.发动机号', '发动机号'),
('12.发动机型号', '发动机型号'),
('13.燃料种类', '燃料种类'),
('14.排量/功率', '排量/功率'),
('15.制造厂名称', '制造厂名称'),
('16.转向形式', '转向形式'),
('17.轮距', '轮距'),
('18.轮胎数', '轮胎数'),
('19.轮胎规格', '轮胎规格'),
('20.钢板弹簧片数', '钢板弹簧片数'),
('21.轴距', '轴距'),
('22.轴数', '轴数'),
('23.外廓尺寸', '外廓尺寸'),
('24.货厢内部尺寸', '货厢内部尺寸'),
('25.总质量', '总质量'),
('26.核定载质量', '核定载质量'),
('27.核定载客', '核定载客'),
('28.准牵引总质量', '准牵引总质量'),
('29.驾驶室载客', '驾驶室载客'),
('2.登记机关', '登记机关'),
('4.机动车登记编号', '机动车登记编号'),
('编号', '机动车登记证书编号'),)
MVC_SE_FIELD_ORDER_3_4 = (
('姓名/名称', '姓名/名称'),
('身份证明名称/号码', '身份证明名称/号码'),
('转移登记日期', '转移登记日期'),
)
# 机动车销售统一发票
MVI_CN_NAME = '机动车销售统一发票'
MVI_CLASSIFY = 29
MVI_FIELD_ORDER = (('发票代码', '发票代码'),
('发票号码', '发票号码'),
('开票日期', '开票日期'),
('不含税价', '不含税价'),
('发票类型', '发票联'),
('购方名称', '购买方名称'),
('购买方身份证号或组织机构代码', '购买方证件号码'),
('纳税人识别号', '纳税人识别号'), # nodo
('车辆识别代码', '车架号'),
('价税合计小写', '价税合计小写'),
('销方名称', '销货单位名称'),
('增值税税额', '增值税税额'),
('增值税税率', '增值税税率'), # nodo
('发票章有无', '发票章有无'), # nodo 全国统一发票监制章 销售单位章
('价税合计大写', '价税合计大写'), # nodo
('', None),
('发动机号码', '发动机号'),
('车辆类型', '车辆类型'), # nodo
('厂牌型号', '厂牌型号'), # nodo
('产地', '产地'), # nodo
('合格证号', '合格证号'), # nodo
('进口证明书号', '进口证明书号'), # nodo
('商检单号', '商检单号'), # nodo
('电话', '电话'), # nodo
('销方纳税人识别号', '销货方纳税人识别号'),
('账号', '账号'), # nodo
('地址', '地址'), # nodo
('开户银行', '开户银行'), # nodo
('主管税务机关及代码', '主管税务机关及代码'), # nodo
('吨位', '吨位'), # nodo
('限乘人数', '限乘人数'),) # nodo
IC_PID = VAT_PID = MVC_PID = MVI_PID = None
# 营业执照
BL_CN_NAME = '营业执照'
BL_CLASSIFY = 31
BL_PID = 41
BL_FIELD_ORDER = (('注册号', '统一社会信用代码'),
('企业名称', '名称'),
('企业类型', '类型'),
('经营者姓名', '法定代表人'),
('成立日期', '成立日期'),
('营业期限', '营业期限'),
('注册资本', '注册资本'),
('地址', '住所'),
('经营范围', '经营范围'),)
# 二手车发票
UCI_CN_NAME = '二手车发票'
UCI_CLASSIFY = 1
UCI_PID = 60
UCI_FIELD_ORDER = (('发票代码', '发票代码'),
('发票号码', '发票号码'),
('开票日期', '开票日期'),
('车价合计', '车价合计小写'),
('发票联', '发票联'),
('购方单位', '买方单位/个人'),
('购方号码', '买方单位代码/身份证号码'),
('车架号码', '车架号'),
('车价合计大写', '车价合计大写'),
('二手车市场', '二手车市场'),
('发票章有无', '发票章有无'),
('空行占位', None),
('车牌照号', '车牌照号'),
('登记证号', '登记证号'),
('购方地址', '买方单位/住址'),
('车辆类型', '车辆类型'),
('厂牌型号', '厂牌型号'),
('车管所名称', '转入地车辆管理所名称'),
('销方名称', '卖方单位/个人'),
('销方号码', '卖方单位代码/身份证号码'),
('销方地址', '卖方单位/个人住址'),)
# 港澳台通行证
EEP_CN_NAME = '港澳台通行证'
EEP_CLASSIFY = 30
EEP_PID = 1018
EEP_FIELD_ORDER = (('中文名', '姓名'), # 英文名
('证件号码', '证件号码'),
('签发次数', '换证次数(签发次数)'),
('有效期限', '有效期限'),
('出生日期', '出生日期'),
('性别', '性别'),
('签发机关', '签发机关'),
('签发地点', '签发地点'),)
# 行驶证
DL_CN_NAME = '行驶证'
DL_CLASSIFY = 32
DL_PID = 5
DL_FIELD_ORDER_0 = (('号牌号码', '1 号牌号码'),
('所有人', '3 所有人'),
('使用性质', '5 使用性质'),
('车辆识别代码', '7 车辆识别代号'),
('注册日期', '9 注册日期'),
('发证日期', '10 发证日期'),
('车辆类型', '2 车辆类型'),
('地址', '4 住址'),
('品牌型号', '6 品牌型号'),
('发动机号', '8 发动机号码'),)
DL_FIELD_ORDER_1 = (('号牌号码', '1 号牌号码'),
('档案编号', '11 档案编号'),
('核定载人数', '12 核定载人数'),
('总质量', '13 总质量'),
('整备质量', '14 整备质量'),
('核定载质量', '15 核对载质量'),
('外廓尺寸', '16 外廓尺寸'),
('准牵引总质量', '17 准牵引总质量'),)
# 护照
PP_CN_NAME = '护照'
PP_CLASSIFY = 3
PP_PID = 8
PP_FIELD_ORDER = (('类型', '类型/Type'),
('英文姓名', '姓名/Name'),
('护照号码', '护照号码/Passport No'),
('有效期至', '有效期至/Date of expiry'),
('签发日期', '签发日期/Date of issue'),
('国家码', '国家码/Country Code'),
('性别', '性别/Sex'),
('国籍', '国籍/Nationality'),
('出生日期', '出生日期/Date of birth'),
('出生地点', '出生地点/Place of birth'),
('签发地点', '签发地点/Place of issue'),)
# 银行卡
BC_CN_NAME = '银行卡'
BC_CLASSIFY = 37
BC_PID = 4
# BC_FIELD = (('CardNum', '银行卡号'),
# ('BankName', '发卡行名称'),
# ('CardName', '银行卡名称'),
# ('BankCode', '发卡行代号'),
# ('CardType', '银行卡类型'),
# ('Date', '日期'))
BC_FIELD_ORDER = (('BankName', '发卡行名称'),
('CardNum', '银行卡号'),
('CardType', '银行卡类型'),)
SUCCESS_CODE_SET = {'0', 0}
FIELD_ORDER_MAP = {
IC_CLASSIFY: ('有效期限', IC_FIELD_ORDER_1, IC_FIELD_ORDER_0),
RP_CLASSIFY: ('有效期限', RP_FIELD_ORDER_1, RP_FIELD_ORDER_0),
DL_CLASSIFY: ('档案编号', DL_FIELD_ORDER_1, DL_FIELD_ORDER_0),
MVC_CLASSIFY: ('转移登记日期', MVC_FIELD_ORDER_3_4, MVC_FIELD_ORDER_1_2),
MVC_CLASSIFY_SE: ('转移登记日期', MVC_SE_FIELD_ORDER_3_4, MVC_SE_FIELD_ORDER_1_2)
}
LICENSE_ORDER = ((MVI_CLASSIFY, (MVI_PID, MVI_CN_NAME, MVI_FIELD_ORDER, False, False)),
(IC_CLASSIFY, (IC_PID, IC_CN_NAME, None, True, False)),
(RP_CLASSIFY, (None, RP_CN_NAME, None, True, False)),
(BC_CLASSIFY, (BC_PID, BC_CN_NAME, BC_FIELD_ORDER, False, False)),
(BL_CLASSIFY, (BL_PID, BL_CN_NAME, BL_FIELD_ORDER, False, False)),
(UCI_CLASSIFY, (UCI_PID, UCI_CN_NAME, UCI_FIELD_ORDER, False, False)),
(EEP_CLASSIFY, (EEP_PID, EEP_CN_NAME, EEP_FIELD_ORDER, False, False)),
(DL_CLASSIFY, (DL_PID, DL_CN_NAME, None, True, False)),
(PP_CLASSIFY, (PP_PID, PP_CN_NAME, PP_FIELD_ORDER, False, False)),
(MVC_CLASSIFY, (MVC_PID, MVC_CN_NAME, None, True, True)),
(VAT_CLASSIFY, (VAT_PID, VAT_CN_NAME, VAT_FIELD_ORDER, False, False)))
LICENSE_CLASSIFY_MAPPING = dict(LICENSE_ORDER)
OTHER_CLASSIFY_SET = {OTHER_CLASSIFY}
LICENSE_CLASSIFY_SET_1 = {IC_CLASSIFY, VAT_CLASSIFY, MVC_CLASSIFY, MVI_CLASSIFY}
LICENSE_CLASSIFY_SET_2 = {BL_CLASSIFY, UCI_CLASSIFY, EEP_CLASSIFY, DL_CLASSIFY, PP_CLASSIFY, BC_CLASSIFY}
......@@ -440,10 +440,23 @@ class Command(BaseCommand, LoggerMixin):
merged_bs_summary = {}
card_num = 1
for role_dict in unknown_summary.values():
for summary in role_dict.values():
if len(role_dict) == 2 and consts.UNKNOWN_ROLE in role_dict:
summary_dict = role_dict.pop(consts.UNKNOWN_ROLE, {})
for summary in role_dict.values():
summary_dict['confidence'].extend(summary['confidence'])
summary_dict['role'] = summary['role']
summary_dict['code'].extend(summary['code'])
summary_dict['print_time'].extend(summary['print_time'])
summary_dict['start_date'].extend(summary['start_date'])
summary_dict['end_date'].extend(summary['end_date'])
summary_dict['sheet'].extend(summary['sheet'])
card = '{0}_{1}'.format(consts.UNKNOWN_CARD, card_num)
card_num += 1
merged_bs_summary[card] = summary
merged_bs_summary[card] = summary_dict
else:
for summary in role_dict.values():
card = '{0}_{1}'.format(consts.UNKNOWN_CARD, card_num)
card_num += 1
merged_bs_summary[card] = summary
else:
# 1卡号
one_card = False
......@@ -460,6 +473,7 @@ class Command(BaseCommand, LoggerMixin):
for role, summary in classify_summary.items():
if one_card or role in card_summary['role_set']:
merge_role.append(role)
card_summary['confidence'].extend(summary['confidence'])
card_summary['sheet'].extend(summary['sheet'])
card_summary['code'].extend(summary['code'])
card_summary['print_time'].extend(summary['print_time'])
......
......@@ -18,7 +18,7 @@ class DocHandler:
def get_doc_list(self, doc_queryset, business_type):
for doc_dict in doc_queryset:
if doc_dict['status'] != DocStatus.COMPLETE.value:
if doc_dict['status'] not in [DocStatus.COMPLETE.value, DocStatus.UPLOAD_FAILED.value]:
continue
doc_id = doc_dict.get('id')
doc_dict['pdf_link'] = self.get_link(doc_id, business_type)
......
......@@ -30,51 +30,118 @@ class BSWorkbook(Workbook):
self.MAX_MEAN = 31
@staticmethod
def sheet_prune(ws, classify):
ws.insert_cols(1, amount=consts.FIXED_COL_AMOUNT)
moved_col_set = set()
header_col_set = set()
# 根据第一行关键词排列
for col in range(consts.FIXED_COL_AMOUNT + 1, ws.max_column + 1):
header_value = ws.cell(1, col).value
header_col = consts.HEADERS_MAPPING.get(header_value)
if header_col is not None and header_col not in header_col_set:
letter = get_column_letter(col)
ws.move_range("{0}1:{0}{1}".format(letter, ws.max_row), cols=header_col - col)
moved_col_set.add(col)
header_col_set.add(header_col)
elif header_value in consts.BORROW_HEADERS_SET:
letter = get_column_letter(col)
ws.move_range("{0}1:{0}{1}".format(letter, ws.max_row), cols=consts.BORROW_HEADER_COL - col)
moved_col_set.add(col)
header_col_set.add(consts.BORROW_HEADER_COL)
elif header_value in consts.INCOME_HEADERS_SET:
letter = get_column_letter(col)
ws.move_range("{0}1:{0}{1}".format(letter, ws.max_row), cols=consts.INCOME_HEADER_COL - col)
moved_col_set.add(col)
header_col_set.add(consts.INCOME_HEADER_COL)
elif header_value in consts.OUTLAY_HEADERS_SET:
letter = get_column_letter(col)
ws.move_range("{0}1:{0}{1}".format(letter, ws.max_row), cols=consts.OUTLAY_HEADER_COL - col)
moved_col_set.add(col)
header_col_set.add(consts.OUTLAY_HEADER_COL)
# 缺失表头再次查找
for header_col in range(1, consts.FIXED_COL_AMOUNT + 1):
if header_col in header_col_set or header_col == consts.RESULT_HEADER_COL:
continue
fix_col = consts.CLASSIFY_LIST[classify][1][header_col - 1]
if fix_col is None:
continue
fix_col = fix_col + consts.FIXED_COL_AMOUNT
if fix_col in moved_col_set:
break
letter = get_column_letter(fix_col)
ws.move_range("{0}1:{0}{1}".format(letter, ws.max_row), cols=header_col - fix_col)
def header_collect(ws, sheet_header_info, header_info, max_column_list, classify):
# sheet_header_info = {
# 'sheet_name': {
# 'summary_col': 1,
# 'date_col': 1,
# 'amount_col': 1,
# 'over_col': 1,
# 'income_col': 1,
# 'outlay_col': 1,
# 'borrow_col': 1,
# 'min_row': 2,
# 'find_count': 3,
# 'find_col': {1},
# 'header': ('日期', '金额')
# }
# }
# header_info = {
# 'summary_col': {
# 5: 2,
# 3: 1,
# },
# 'date_col': {},
# 'amount_col': {},
# 'over_col': {},
# 'income_col': {},
# 'outlay_col': {},
# 'borrow_col': {},
# }
# 第一行关键词
find_count = 0
for first_row in ws.iter_rows(max_row=1, min_row=1, values_only=True):
sheet_header_info.setdefault(ws.title, {}).setdefault(consts.HEADER_KEY, first_row)
for idx, header_value in enumerate(first_row):
if classify == consts.WECHART_CLASSIFY:
header_col = consts.WECHART_HEADERS_MAPPING.get(header_value)
else:
header_col = consts.HEADERS_MAPPING.get(header_value)
if header_col is not None:
find_count += 1
sheet_header_info.setdefault(ws.title, {}).setdefault(header_col, idx)
find_col_set = sheet_header_info.setdefault(ws.title, {}).setdefault(consts.FIND_COL_KEY, set())
find_col_set.add(idx)
col_count = header_info.setdefault(header_col, {}).get(idx)
header_info.setdefault(header_col, {})[idx] = 1 if col_count is None else col_count+1
sheet_header_info.setdefault(ws.title, {}).setdefault(consts.FIND_COUNT_KEY, find_count)
min_row = 1 if find_count == 0 else 2
sheet_header_info.setdefault(ws.title, {}).setdefault(consts.MIN_ROW_KEY, min_row)
max_column_list.append(ws.max_column)
ws.delete_cols(consts.FIXED_COL_AMOUNT + 1, amount=ws.max_column)
min_row = 1 if len(moved_col_set) == 0 else 2
return min_row
@staticmethod
def header_statistics(sheet_header_info, header_info, classify):
# statistics_header_info = {
# SUMMARY_KEY: 2,
# DATE_KEY: 3,
# AMOUNT_KEY: 4,
# OVER_KEY: 5,
# IMCOME_KEY: 6,
# OUTLAY_KEY: 7,
# BORROW_KEY: 8,
# 'header': ('日期', '金额')
# }
statistics_header_info = {}
sheet_order_list = sorted(sheet_header_info, reverse=True,
key=lambda x: sheet_header_info[x][consts.FIND_COUNT_KEY])
best_sheet_info = sheet_header_info.get(sheet_order_list[0])
if best_sheet_info.get(consts.FIND_COUNT_KEY, 0) == 0:
for key, value in consts.CLASSIFY_MAP.items():
col = consts.CLASSIFY_LIST[classify][1][value]
statistics_header_info[key] = col - 1 if isinstance(col, int) else None
statistics_header_info[consts.HEADER_KEY] = consts.CLASSIFY_HEADER_LIST[classify]
else:
find_col_set = best_sheet_info.get(consts.FIND_COL_KEY, set())
# SUMMARY_KEY DATE_KEY OVER_KEY BORROW_KEY
for key in consts.KEY_LIST:
col = best_sheet_info.get(key)
if col is None:
col_dict = header_info.get(key, {})
for idx in sorted(col_dict, key=lambda x: col_dict[x], reverse=True):
if idx in find_col_set:
continue
col = idx
find_col_set.add(col)
break
else:
fixed_col = consts.CLASSIFY_LIST[classify][1][consts.CLASSIFY_MAP[key]]
if fixed_col not in find_col_set and isinstance(fixed_col, int):
col = fixed_col - 1
find_col_set.add(col)
statistics_header_info[key] = col
statistics_header_info[consts.HEADER_KEY] = best_sheet_info.get(consts.HEADER_KEY)
return statistics_header_info
@staticmethod
def get_data_col_min_row(sheet, sheet_header_info, header_info, classify):
date_col = sheet_header_info.get(sheet, {}).get(consts.DATE_KEY)
if date_col is None:
date_col_dict = header_info.get(consts.DATE_KEY, {})
find_col_set = sheet_header_info.get(sheet, {}).get(consts.FIND_COL_KEY, set())
for idx in sorted(date_col_dict, key=lambda x: date_col_dict[x], reverse=True):
if idx in find_col_set:
continue
date_col = idx
break
else:
fixed_col = consts.CLASSIFY_LIST[classify][1][consts.CLASSIFY_MAP[consts.DATE_KEY]]
if fixed_col not in find_col_set and isinstance(fixed_col, int):
date_col = fixed_col - 1
min_row = sheet_header_info.get(sheet, {}).get(consts.MIN_ROW_KEY, 2)
return date_col, min_row
@staticmethod
def month_split(dti, date_list, date_statistics):
......@@ -122,8 +189,14 @@ class BSWorkbook(Workbook):
reverse_trend = -1
return reverse_trend
def sheet_split(self, ws, month_mapping, reverse_trend_list, min_row, date_list, date_statistics):
for date_tuple_src in ws.iter_cols(min_col=1, max_col=1, min_row=min_row, values_only=True):
def sheet_split(self, ws, date_col, min_row, month_mapping, reverse_trend_list, date_list, date_statistics):
if date_col is None:
# month_info process
month_info = month_mapping.setdefault('xxxx-xx', [])
month_info.append((ws.title, min_row, ws.max_row, 0))
return
date_col = date_col + 1
for date_tuple_src in ws.iter_cols(min_col=date_col, max_col=date_col, min_row=min_row, values_only=True):
date_tuple = [date[:10] if isinstance(date, str) else date for date in date_tuple_src]
dt_array, tz_parsed = tslib.array_to_datetime(
np.array(date_tuple, copy=False, dtype=np.object_),
......@@ -199,11 +272,12 @@ class BSWorkbook(Workbook):
return amount_str
# 1.替换
res_str = amount_str.translate(consts.TRANS)
# 2.删除多余的-
res_str = res_str[0] + res_str[1:].replace('-', '')
# 3.首字符处理
if res_str[0] in consts.ERROR_CHARS:
res_str = '-{0}'.format(res_str[1:])
# 2.首字符处理
first_char = res_str[0]
if first_char in consts.ERROR_CHARS:
first_char = '-'
# 3.删除多余的-
res_str = first_char + res_str[1:].replace('-', '')
# 4.逗号与句号处理
if len(res_str) >= 4:
period_idx = len(res_str) - 3
......@@ -213,90 +287,132 @@ class BSWorkbook(Workbook):
res_str = '{0}.{1}'.format(res_str[:period_idx], res_str[period_idx + 1:])
return res_str
def build_month_sheet(self, card, month_mapping, ms, is_reverse):
def build_month_sheet(self, ms, card, month_mapping, is_reverse, statistics_header_info, max_column):
summary_cell_idx = statistics_header_info.get(consts.SUMMARY_KEY)
date_cell_idx = statistics_header_info.get(consts.DATE_KEY)
amount_cell_idx = statistics_header_info.get(consts.AMOUNT_KEY) # None or src or append
over_cell_idx = statistics_header_info.get(consts.OVER_KEY)
income_cell_idx = statistics_header_info.get(consts.IMCOME_KEY)
outlay_cell_idx = statistics_header_info.get(consts.OUTLAY_KEY)
borrow_cell_idx = statistics_header_info.get(consts.BORROW_KEY)
header = list(statistics_header_info.get(consts.HEADER_KEY))
src_header_len = len(header)
if max_column > src_header_len:
for i in range(max_column - src_header_len):
header.append(None)
add_col = ['核对结果']
if amount_cell_idx is None:
if income_cell_idx is not None or outlay_cell_idx is not None:
add_col = ['金额', '核对结果']
amount_cell_idx = len(header)
header.extend(add_col)
result_idx = len(header) - 1
tmp_ws = self.create_sheet('tmp_ws')
for month in sorted(month_mapping.keys()):
# 3.1.拷贝数据
parts = month_mapping.get(month)
new_ws = self.create_sheet('{0}({1})'.format(month, card[-6:]))
new_ws.append(consts.FIXED_HEADERS)
new_ws.append(header)
for part in parts:
ws = self.get_sheet_by_name(part[0])
for row_value in ws.iter_rows(min_row=part[1], max_row=part[2], values_only=True):
new_ws.append(row_value)
if any(row_value):
new_ws.append(row_value)
# 3.2.提取信息、高亮
amount_mapping = {}
amount_fill_row = set()
for rows in new_ws.iter_rows(min_row=2):
summary_cell = rows[consts.SUMMARY_IDX]
date_cell = rows[consts.DATE_IDX]
amount_cell = rows[consts.AMOUNT_IDX]
row = summary_cell.row
# 关键词1提取
if summary_cell.value in self.interest_keyword:
ms.append((summary_cell.value, date_cell.value, amount_cell.value))
# 关键词2提取至临时表
elif summary_cell.value in self.salary_keyword:
tmp_ws.append((summary_cell.value, date_cell.value, amount_cell.value))
# 贷款关键词高亮
elif summary_cell.value in self.loan_keyword:
summary_cell.fill = self.loan_fill
amount_error = False
# TODO 删除空行
summary_cell = None if summary_cell_idx is None else rows[summary_cell_idx]
date_cell = None if date_cell_idx is None else rows[date_cell_idx]
amount_cell = None if amount_cell_idx is None else rows[amount_cell_idx]
over_cell = None if over_cell_idx is None else rows[over_cell_idx]
income_cell = None if income_cell_idx is None else rows[income_cell_idx]
outlay_cell = None if outlay_cell_idx is None else rows[outlay_cell_idx]
borrow_cell = None if borrow_cell_idx is None else rows[borrow_cell_idx]
summary_cell_value = None if summary_cell is None else summary_cell.value
date_cell_value = None if date_cell is None else date_cell.value
amount_cell_value = None if amount_cell is None else amount_cell.value
over_cell_value = None if over_cell is None else over_cell.value
income_cell_value = None if income_cell is None else income_cell.value
outlay_cell_value = None if outlay_cell is None else outlay_cell.value
borrow_cell_value = None if borrow_cell is None else borrow_cell.value
# row = summary_cell.row
if summary_cell is not None:
# 关键词1提取
if summary_cell_value in self.interest_keyword:
ms.append((summary_cell_value, date_cell_value, amount_cell_value))
# 关键词2提取至临时表
elif summary_cell_value in self.salary_keyword:
tmp_ws.append((summary_cell_value, date_cell_value, amount_cell_value))
# 贷款关键词高亮
elif summary_cell_value in self.loan_keyword:
summary_cell.fill = self.loan_fill
# 3.3.余额转数值
over_cell = rows[consts.OVER_IDX]
try:
over_cell.value = locale.atof(self.amount_format(over_cell.value))
except Exception as e:
amount_error = True
else:
over_cell.number_format = numbers.FORMAT_NUMBER_00
over_success = False
if over_cell is not None:
try:
over_cell.value = locale.atof(self.amount_format(over_cell_value))
except Exception as e:
pass
else:
over_success = True
over_cell.number_format = numbers.FORMAT_NUMBER_00
# 3.4.金额转数值
try:
amount_success = False
if amount_cell is not None:
try:
amount_cell.value = locale.atof(self.amount_format(amount_cell.value))
except Exception as e:
try:
amount_cell.value = locale.atof(self.amount_format(rows[consts.INCOME_IDX].value))
if amount_cell.value == 0:
raise
elif amount_cell.value < 0:
amount_cell.value = -amount_cell.value
amount_cell.value = locale.atof(self.amount_format(amount_cell_value))
except Exception as e:
amount_cell.value = locale.atof(self.amount_format(rows[consts.OUTLAY_IDX].value))
if amount_cell.value > 0:
amount_cell.value = -amount_cell.value
except Exception as e:
amount_error = True
else:
if rows[consts.BORROW_IDX].value in consts.BORROW_OUTLAY_SET:
amount_cell.value = -amount_cell.value
amount_cell.number_format = numbers.FORMAT_NUMBER_00
same_amount_mapping = amount_mapping.get(date_cell.value, {})
fill_rows = same_amount_mapping.get(-amount_cell.value)
if fill_rows:
amount_fill_row.add(row)
amount_fill_row.update(fill_rows)
amount_mapping.setdefault(date_cell.value, {}).setdefault(
amount_cell.value, []).append(row)
try:
amount_cell.value = locale.atof(self.amount_format(income_cell_value))
if amount_cell.value == 0:
raise
elif amount_cell.value < 0:
amount_cell.value = -amount_cell.value
except Exception as e:
amount_cell.value = locale.atof(self.amount_format(outlay_cell_value))
if amount_cell.value > 0:
amount_cell.value = -amount_cell.value
except Exception as e:
pass
else:
amount_success = True
if borrow_cell_value in consts.BORROW_OUTLAY_SET:
amount_cell.value = -amount_cell.value
amount_cell.number_format = numbers.FORMAT_NUMBER_00
if date_cell is not None:
same_amount_mapping = amount_mapping.get(date_cell.value, {})
fill_rows = same_amount_mapping.get(-amount_cell.value)
if fill_rows:
amount_fill_row.add(amount_cell.row)
amount_fill_row.update(fill_rows)
amount_mapping.setdefault(date_cell.value, {}).setdefault(
amount_cell.value, []).append(amount_cell.row)
# 3.5.核对结果
if row > 2 and not amount_error:
if amount_success and over_success and amount_cell.row > 2:
amount_col_letter = get_column_letter(amount_cell_idx + 1)
over_col_letter = get_column_letter(over_cell_idx + 1)
if is_reverse:
rows[consts.RESULT_IDX].value = '=IF(D{0}=ROUND(SUM(D{1},C{0}),2), "{2}", "{3}")'.format(
row - 1, row, *self.proof_res)
rows[result_idx].value = '=IF({2}{0}=ROUND(SUM({2}{1},{3}{0}),4), "{4}", "{5}")'.format(
amount_cell.row - 1, amount_cell.row, over_col_letter, amount_col_letter, *self.proof_res)
else:
rows[consts.RESULT_IDX].value = '=IF(D{0}=ROUND(SUM(D{1},C{0}),2), "{2}", "{3}")'.format(
row, row - 1, *self.proof_res)
# 删除金额辅助列
new_ws.delete_cols(consts.BORROW_HEADER_COL, amount=new_ws.max_column)
rows[result_idx].value = '=IF({2}{0}=ROUND(SUM({2}{1},{3}{0}),4), "{4}", "{5}")'.format(
amount_cell.row, amount_cell.row - 1, over_col_letter, amount_col_letter, *self.proof_res)
# 3.6.同一天相同进出账高亮
del amount_mapping
for row in amount_fill_row:
new_ws[row][consts.AMOUNT_IDX].fill = self.amount_fill
new_ws[row][amount_cell_idx].fill = self.amount_fill
# 关键词2信息提取
ms.append(self.blank_row)
......@@ -319,21 +435,29 @@ class BSWorkbook(Workbook):
# }
# }
for card, summary in bs_summary.items():
# 1.原表修剪、排列、按照月份分割
# 1.原表表头收集、按照月份分割
# 1.1 总结首行信息
classify = summary.get('classify', 0)
sheet_header_info = {}
header_info = {}
max_column_list = []
for sheet in summary.get('sheet', []):
ws = self.get_sheet_by_name(sheet)
self.header_collect(ws, sheet_header_info, header_info, max_column_list, classify)
statistics_header_info = self.header_statistics(sheet_header_info, header_info, classify)
max_column = max(max_column_list)
# 1.2.按月份分割 min_row 正文第一行 date_col 日期行
start_date = summary.get('start_date')
end_date = summary.get('end_date')
date_statistics = False
if start_date is None or end_date is None:
date_statistics = True
date_list = []
month_mapping = {}
reverse_trend_list = []
date_statistics = True if start_date is None or end_date is None else False # 用于判断是否需要收集各表中日期
date_list = [] # 用于收集各表中日期
month_mapping = {} # 用于创建月份表
reverse_trend_list = [] # 用于判断倒序与正序
for sheet in summary.get('sheet', []):
ws = self.get_sheet_by_name(sheet)
# 1.1.删除多余列、排列
min_row = self.sheet_prune(ws, summary.get('classify', 0))
# 1.2.按月份分割
self.sheet_split(ws, month_mapping, reverse_trend_list, min_row, date_list, date_statistics)
date_col, min_row = self.get_data_col_min_row(sheet, sheet_header_info, header_info, classify)
self.sheet_split(ws, date_col, min_row, month_mapping, reverse_trend_list, date_list, date_statistics)
if date_statistics is True and len(date_list) > 1:
start_date = min(date_list) if start_date is None else start_date
......@@ -353,7 +477,7 @@ class BSWorkbook(Workbook):
for month_list in month_mapping.values():
month_list.sort(key=lambda x: x[-1], reverse=is_reverse)
self.build_month_sheet(card, month_mapping, ms, is_reverse)
self.build_month_sheet(ms, card, month_mapping, is_reverse, statistics_header_info, max_column)
# 4.删除原表
for sheet in summary.get('sheet'):
......
import locale
import numpy as np
from pandas._libs import tslib
from pandas._libs.tslibs.nattype import NaTType
from pandas.core.indexes.datetimes import DatetimeIndex
from openpyxl import Workbook
from openpyxl.styles import Border, Side, PatternFill, numbers
from openpyxl.utils import get_column_letter
from apps.doc import consts
class BSWorkbook(Workbook):
def __init__(self, interest_keyword, salary_keyword, loan_keyword, *args, **kwargs):
super().__init__(*args, **kwargs)
locale.setlocale(locale.LC_NUMERIC, 'en_US.UTF-8')
self.meta_sheet_title = '关键信息提取和展示'
self.blank_row = (None,)
self.code_header = ('页数', '电子回单验证码')
self.date_header = ('打印时间', '起始日期', '终止日期', '流水区间结果')
self.keyword_header = ('关键词', '记账日期', '金额')
self.interest_keyword = interest_keyword
self.salary_keyword = salary_keyword
self.loan_keyword = loan_keyword
self.proof_res = ('对', '错')
self.loan_fill = PatternFill("solid", fgColor="00FFCC00")
self.amount_fill = PatternFill("solid", fgColor="00FFFF00")
# self.bd = Side(style='thin', color="000000")
# self.border = Border(left=self.bd, top=self.bd, right=self.bd, bottom=self.bd)
self.MAX_MEAN = 31
@staticmethod
def sheet_prune(ws, classify):
ws.insert_cols(1, amount=consts.FIXED_COL_AMOUNT)
moved_col_set = set()
header_col_set = set()
# 根据第一行关键词排列
for col in range(consts.FIXED_COL_AMOUNT + 1, ws.max_column + 1):
header_value = ws.cell(1, col).value
header_col = consts.HEADERS_MAPPING.get(header_value)
if header_col is not None and header_col not in header_col_set:
letter = get_column_letter(col)
ws.move_range("{0}1:{0}{1}".format(letter, ws.max_row), cols=header_col - col)
moved_col_set.add(col)
header_col_set.add(header_col)
elif header_value in consts.BORROW_HEADERS_SET:
letter = get_column_letter(col)
ws.move_range("{0}1:{0}{1}".format(letter, ws.max_row), cols=consts.BORROW_HEADER_COL - col)
moved_col_set.add(col)
header_col_set.add(consts.BORROW_HEADER_COL)
elif header_value in consts.INCOME_HEADERS_SET:
letter = get_column_letter(col)
ws.move_range("{0}1:{0}{1}".format(letter, ws.max_row), cols=consts.INCOME_HEADER_COL - col)
moved_col_set.add(col)
header_col_set.add(consts.INCOME_HEADER_COL)
elif header_value in consts.OUTLAY_HEADERS_SET:
letter = get_column_letter(col)
ws.move_range("{0}1:{0}{1}".format(letter, ws.max_row), cols=consts.OUTLAY_HEADER_COL - col)
moved_col_set.add(col)
header_col_set.add(consts.OUTLAY_HEADER_COL)
# 缺失表头再次查找
for header_col in range(1, consts.FIXED_COL_AMOUNT + 1):
if header_col in header_col_set or header_col == consts.RESULT_HEADER_COL:
continue
fix_col = consts.CLASSIFY_LIST[classify][1][header_col - 1]
if fix_col is None:
continue
fix_col = fix_col + consts.FIXED_COL_AMOUNT
if fix_col in moved_col_set:
break
letter = get_column_letter(fix_col)
ws.move_range("{0}1:{0}{1}".format(letter, ws.max_row), cols=header_col - fix_col)
ws.delete_cols(consts.FIXED_COL_AMOUNT + 1, amount=ws.max_column)
min_row = 1 if len(moved_col_set) == 0 else 2
return min_row
@staticmethod
def month_split(dti, date_list, date_statistics):
month_list = []
idx_list = []
month_pre = None
for idx, month_str in enumerate(dti.strftime('%Y-%m')):
if isinstance(month_str, float):
continue
if month_str != month_pre:
month_list.append(month_str)
if month_pre is None:
if date_statistics:
date_list.append(dti[idx].date())
idx = 0
idx_list.append(idx)
month_pre = month_str
if date_statistics:
for idx in range(len(dti) - 1, -1, -1):
if isinstance(dti[idx], NaTType):
continue
date_list.append(dti[idx].date())
break
return month_list, idx_list
@staticmethod
def get_reverse_trend(day_idx, idx_list):
reverse_trend = 0
pre_day = None
for idx, day in enumerate(day_idx):
if np.isnan(day):
continue
if idx in idx_list or pre_day is None:
pre_day = day
continue
if day < pre_day:
reverse_trend += 1
pre_day = day
elif day > pre_day:
reverse_trend -= 1
pre_day = day
if reverse_trend > 0:
reverse_trend = 1
elif reverse_trend < 0:
reverse_trend = -1
return reverse_trend
def sheet_split(self, ws, month_mapping, reverse_trend_list, min_row, date_list, date_statistics):
for date_tuple_src in ws.iter_cols(min_col=1, max_col=1, min_row=min_row, values_only=True):
date_tuple = [date[:10] if isinstance(date, str) else date for date in date_tuple_src]
dt_array, tz_parsed = tslib.array_to_datetime(
np.array(date_tuple, copy=False, dtype=np.object_),
errors="coerce",
utc=False,
dayfirst=False,
yearfirst=False,
require_iso8601=True,
)
dti = DatetimeIndex(dt_array, tz=None, name=None)
month_list, idx_list = self.month_split(dti, date_list, date_statistics)
if len(month_list) == 0:
# month_info process
month_info = month_mapping.setdefault('xxxx-xx', [])
month_info.append((ws.title, min_row, ws.max_row, 0))
else:
# reverse_trend_list process
reverse_trend = self.get_reverse_trend(dti.day, idx_list)
reverse_trend_list.append(reverse_trend)
# month_info process
day_idx = dti.day
idx_list_max_idx = len(idx_list) - 1
for i, item in enumerate(month_list):
if i == idx_list_max_idx:
day_mean = np.mean(day_idx[idx_list[i]:].dropna())
month_mapping.setdefault(item, []).append(
(ws.title, idx_list[i] + min_row, ws.max_row, day_mean))
else:
day_mean = np.mean(day_idx[idx_list[i]: idx_list[i + 1]].dropna())
month_mapping.setdefault(item, []).append(
(ws.title, idx_list[i] + min_row, idx_list[i + 1] + min_row - 1, day_mean))
def build_metadata_rows(self, confidence, code, print_time, start_date, end_date):
if start_date is None or end_date is None:
timedelta = None
else:
timedelta = (end_date - start_date).days
metadata_rows = [
('流水识别置信度', confidence),
self.blank_row,
self.code_header,
]
metadata_rows.extend(code)
metadata_rows.extend(
[self.blank_row,
self.date_header,
(print_time, start_date, end_date, timedelta),
self.blank_row,
self.keyword_header]
)
return metadata_rows
def create_meta_sheet(self, card):
if self.worksheets[0].title == 'Sheet':
ms = self.worksheets[0]
ms.title = '{0}({1})'.format(self.meta_sheet_title, card[-6:])
else:
ms = self.create_sheet('{0}({1})'.format(self.meta_sheet_title, card[-6:]))
return ms
def build_meta_sheet(self, card, confidence, code, print_time, start_date, end_date):
metadata_rows = self.build_metadata_rows(confidence, code, print_time, start_date, end_date)
ms = self.create_meta_sheet(card)
for row in metadata_rows:
ms.append(row)
return ms
@staticmethod
def amount_format(amount_str):
if not isinstance(amount_str, str) or amount_str == '':
return amount_str
# 1.替换
res_str = amount_str.translate(consts.TRANS)
# 2.首字符处理
first_char = res_str[0]
if first_char in consts.ERROR_CHARS:
first_char = '-'
# 3.删除多余的-
res_str = first_char + res_str[1:].replace('-', '')
# 4.逗号与句号处理
if len(res_str) >= 4:
period_idx = len(res_str) - 3
if res_str[period_idx] == '.' and res_str[period_idx - 1] == ',':
res_str = '{0}{1}'.format(res_str[:period_idx - 1], res_str[period_idx:])
elif res_str[period_idx] == ',':
res_str = '{0}.{1}'.format(res_str[:period_idx], res_str[period_idx + 1:])
return res_str
def build_month_sheet(self, card, month_mapping, ms, is_reverse):
tmp_ws = self.create_sheet('tmp_ws')
for month in sorted(month_mapping.keys()):
# 3.1.拷贝数据
parts = month_mapping.get(month)
new_ws = self.create_sheet('{0}({1})'.format(month, card[-6:]))
new_ws.append(consts.FIXED_HEADERS)
for part in parts:
ws = self.get_sheet_by_name(part[0])
for row_value in ws.iter_rows(min_row=part[1], max_row=part[2], values_only=True):
new_ws.append(row_value)
# 3.2.提取信息、高亮
amount_mapping = {}
amount_fill_row = set()
for rows in new_ws.iter_rows(min_row=2):
summary_cell = rows[consts.SUMMARY_IDX]
date_cell = rows[consts.DATE_IDX]
amount_cell = rows[consts.AMOUNT_IDX]
row = summary_cell.row
# 关键词1提取
if summary_cell.value in self.interest_keyword:
ms.append((summary_cell.value, date_cell.value, amount_cell.value))
# 关键词2提取至临时表
elif summary_cell.value in self.salary_keyword:
tmp_ws.append((summary_cell.value, date_cell.value, amount_cell.value))
# 贷款关键词高亮
elif summary_cell.value in self.loan_keyword:
summary_cell.fill = self.loan_fill
amount_error = False
# 3.3.余额转数值
over_cell = rows[consts.OVER_IDX]
try:
over_cell.value = locale.atof(self.amount_format(over_cell.value))
except Exception as e:
amount_error = True
else:
over_cell.number_format = numbers.FORMAT_NUMBER_00
# 3.4.金额转数值
try:
try:
amount_cell.value = locale.atof(self.amount_format(amount_cell.value))
except Exception as e:
try:
amount_cell.value = locale.atof(self.amount_format(rows[consts.INCOME_IDX].value))
if amount_cell.value == 0:
raise
elif amount_cell.value < 0:
amount_cell.value = -amount_cell.value
except Exception as e:
amount_cell.value = locale.atof(self.amount_format(rows[consts.OUTLAY_IDX].value))
if amount_cell.value > 0:
amount_cell.value = -amount_cell.value
except Exception as e:
amount_error = True
else:
if rows[consts.BORROW_IDX].value in consts.BORROW_OUTLAY_SET:
amount_cell.value = -amount_cell.value
amount_cell.number_format = numbers.FORMAT_NUMBER_00
same_amount_mapping = amount_mapping.get(date_cell.value, {})
fill_rows = same_amount_mapping.get(-amount_cell.value)
if fill_rows:
amount_fill_row.add(row)
amount_fill_row.update(fill_rows)
amount_mapping.setdefault(date_cell.value, {}).setdefault(
amount_cell.value, []).append(row)
# 3.5.核对结果
if row > 2 and not amount_error:
if is_reverse:
rows[consts.RESULT_IDX].value = '=IF(D{0}=ROUND(SUM(D{1},C{0}),2), "{2}", "{3}")'.format(
row - 1, row, *self.proof_res)
else:
rows[consts.RESULT_IDX].value = '=IF(D{0}=ROUND(SUM(D{1},C{0}),2), "{2}", "{3}")'.format(
row, row - 1, *self.proof_res)
# 删除金额辅助列
new_ws.delete_cols(consts.BORROW_HEADER_COL, amount=new_ws.max_column)
# 3.6.同一天相同进出账高亮
del amount_mapping
for row in amount_fill_row:
new_ws[row][consts.AMOUNT_IDX].fill = self.amount_fill
# 关键词2信息提取
ms.append(self.blank_row)
ms.append(self.keyword_header)
for row in tmp_ws.iter_rows(values_only=True):
ms.append(row)
self.remove(tmp_ws)
def bs_rebuild(self, bs_summary):
# bs_summary = {
# '卡号': {
# 'classify': 0,
# 'confidence': 0.9,
# 'role': '柳雪',
# 'code': [('page', 'code')],
# 'print_time': 'datetime',
# 'start_date': 'datetime',
# 'end_date': 'datetime',
# 'sheet': ['sheet_name']
# }
# }
for card, summary in bs_summary.items():
# 1.原表修剪、排列、按照月份分割
start_date = summary.get('start_date')
end_date = summary.get('end_date')
date_statistics = False
if start_date is None or end_date is None:
date_statistics = True
date_list = []
month_mapping = {}
reverse_trend_list = []
for sheet in summary.get('sheet', []):
ws = self.get_sheet_by_name(sheet)
# 1.1.删除多余列、排列
min_row = self.sheet_prune(ws, summary.get('classify', 0))
# 1.2.按月份分割
self.sheet_split(ws, month_mapping, reverse_trend_list, min_row, date_list, date_statistics)
if date_statistics is True and len(date_list) > 1:
start_date = min(date_list) if start_date is None else start_date
end_date = max(date_list) if end_date is None else end_date
# 2.元信息提取表
ms = self.build_meta_sheet(card,
summary.get('confidence', 1),
summary.get('code'),
summary.get('print_time'),
start_date,
end_date)
# 3.创建月份表、提取/高亮关键行
# 倒序处理
is_reverse = True if sum(reverse_trend_list) > 0 else False
for month_list in month_mapping.values():
month_list.sort(key=lambda x: x[-1], reverse=is_reverse)
self.build_month_sheet(card, month_mapping, ms, is_reverse)
# 4.删除原表
for sheet in summary.get('sheet'):
self.remove(self.get_sheet_by_name(sheet))
def license_rebuild(self, license_summary, document_scheme):
for classify, (_, name, field_order, side_diff, scheme_diff) in consts.LICENSE_ORDER:
license_list = license_summary.get(classify)
if not license_list:
continue
ws = self.create_sheet(name)
if scheme_diff and document_scheme == consts.DOC_SCHEME_LIST[1]:
classify = consts.MVC_CLASSIFY_SE
for license_dict in license_list:
if classify == consts.IC_CLASSIFY and license_dict.get('类别') == '1':
license_summary.setdefault(consts.RP_CLASSIFY, []).append(license_dict)
continue
if side_diff:
key, field_order_yes, field_order_no = consts.FIELD_ORDER_MAP.get(classify)
field_order = field_order_yes if key in license_dict else field_order_no
for search_field, write_field in field_order:
ws.append((write_field, license_dict.get(search_field, '')))
ws.append((None, ))
def skip_img_sheet(self, skip_img):
if skip_img:
ws = self.create_sheet(consts.SKIP_IMG_SHEET_NAME)
ws.append(consts.SKIP_IMG_SHEET_HEADER)
for img_tuple in skip_img:
ws.append(img_tuple)
def rebuild(self, bs_summary, license_summary, skip_img, document_scheme):
self.bs_rebuild(bs_summary)
self.license_rebuild(license_summary, document_scheme)
self.skip_img_sheet(skip_img)
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!