Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
2a723026
authored
2020-10-22 11:34:23 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
modify wb header
1 parent
3c98a180
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
1598 additions
and
172 deletions
src/apps/doc/consts.py
src/apps/doc/consts_bak.py
src/apps/doc/ocr/wb.py
src/apps/doc/ocr/wb_bak.py
src/apps/doc/consts.py
View file @
2a72302
...
...
@@ -53,15 +53,19 @@ TRANS_MAP = {
'L'
:
"1"
,
'A'
:
"4"
,
's'
:
"5"
,
'S'
:
"5"
,
'b'
:
"6"
,
'g'
:
"9"
,
'E'
:
"9"
,
'B'
:
"13"
,
}
TRANS
=
str
.
maketrans
(
TRANS_MAP
)
ERROR_CHARS
=
{
'.'
,
'
:'
,
':'
,
'•'
,
'·
'
}
ERROR_CHARS
=
{
'.'
,
'
。'
,
':'
,
':'
,
'•'
,
'·'
,
','
,
',
'
}
SKIP_IMG_SHEET_NAME
=
'未处理图片'
SKIP_IMG_SHEET_HEADER
=
(
'页码'
,
'序号'
)
...
...
@@ -70,12 +74,34 @@ UNKNOWN_CARD = '未知卡号'
UNKNOWN_ROLE
=
'未知户名'
DATE_FORMAT
=
[
'
%
Y年
%
m月
%
d日'
,
'
%
Y/
%
m/
%
d'
,
'
%
Y-
%
m-
%
d'
,
'
%
Y
%
m
%
d'
]
AMOUNT_COL_TITLE_SET
=
{
"交易金额"
,
"金额"
,
"收入/支出金额"
,
"发生额"
}
OVERAGE_COL_TITLE_SET
=
{
"账户余额"
,
"余额"
}
PROOF_COL_TITLE
=
'核对结果'
PROOF_RES
=
(
'对'
,
'错'
)
META_SHEET_TITLE
=
'关键信息提取和展示'
SUMMARY_KEY
=
'summary_col'
DATE_KEY
=
'date_col'
AMOUNT_KEY
=
'amount_col'
OVER_KEY
=
'over_col'
IMCOME_KEY
=
'income_col'
OUTLAY_KEY
=
'outlay_col'
BORROW_KEY
=
'borrow_col'
MIN_ROW_KEY
=
'min_row'
FIND_COUNT_KEY
=
'find_count'
FIND_COL_KEY
=
'find_col'
HEADER_KEY
=
'header'
KEY_LIST
=
[
SUMMARY_KEY
,
DATE_KEY
,
OVER_KEY
,
BORROW_KEY
,
AMOUNT_KEY
,
IMCOME_KEY
,
OUTLAY_KEY
]
CLASSIFY_MAP
=
{
SUMMARY_KEY
:
5
,
DATE_KEY
:
0
,
AMOUNT_KEY
:
2
,
OVER_KEY
:
3
,
IMCOME_KEY
:
11
,
OUTLAY_KEY
:
12
,
BORROW_KEY
:
10
,
}
FIXED_HEADERS
=
(
'记账日期'
,
'记账时间'
,
'金额'
,
'余额'
,
'交易名称'
,
'附言'
,
'对方账户名'
,
'对方卡号/账号'
,
'对方开户行'
,
'核对结果'
,
'借贷'
,
'收入'
,
'支出'
)
FIXED_COL_AMOUNT
=
len
(
FIXED_HEADERS
)
...
...
@@ -103,36 +129,60 @@ OUTLAY_HEADERS_SET = {'支出金额', '支出', '支取金额(借)', '支取
# ------------------普通打印-全格线--------------------------------------------------------------------------------------
HEADERS_MAPPING
=
{}
# 借贷
HEADERS_MAPPING
.
update
(
{
'借贷'
:
BORROW_KEY
,
'借贷状态'
:
BORROW_KEY
,
'收/支'
:
BORROW_KEY
,
}
)
# 收入
HEADERS_MAPPING
.
update
(
{
'收入金额'
:
IMCOME_KEY
,
'收入'
:
IMCOME_KEY
,
'存入'
:
IMCOME_KEY
,
'存入金额(贷)'
:
IMCOME_KEY
,
'存入金额(贷)'
:
IMCOME_KEY
,
}
)
# 支出
HEADERS_MAPPING
.
update
(
{
'支出金额'
:
OUTLAY_KEY
,
'支出'
:
OUTLAY_KEY
,
'支取金额(借)'
:
OUTLAY_KEY
,
'支取金额(借)'
:
OUTLAY_KEY
,
}
)
# 横版-表格-中国银行(不规则)
HEADERS_MAPPING
.
update
(
{
'记账日期'
:
BASE_HEADERS_MAPPING
[
'记账日期'
],
'记账时间'
:
BASE_HEADERS_MAPPING
[
'记账时间'
],
'金额'
:
BASE_HEADERS_MAPPING
[
'金额'
],
'余额'
:
BASE_HEADERS_MAPPING
[
'余额'
],
'交易名称'
:
BASE_HEADERS_MAPPING
[
'交易名称'
],
'附言'
:
BASE_HEADERS_MAPPING
[
'附言'
],
'对方账户名'
:
BASE_HEADERS_MAPPING
[
'对方账户名'
],
'对方卡号/账号'
:
BASE_HEADERS_MAPPING
[
'对方卡号/账号'
],
'对方开户行'
:
BASE_HEADERS_MAPPING
[
'对方开户行'
],
'记账日期'
:
DATE_KEY
,
'金额'
:
AMOUNT_KEY
,
'余额'
:
OVER_KEY
,
'附言'
:
SUMMARY_KEY
,
}
)
# 横版-表格-农业银行-中国农业银行个人账户明细
HEADERS_MAPPING
.
update
(
{
'交易日期'
:
BASE_HEADERS_MAPPING
[
'记账日期'
],
# '存入': BASE_HEADERS_MAPPING['金额'],
'对方账号'
:
BASE_HEADERS_MAPPING
[
'对方卡号/账号'
],
'对方名称'
:
BASE_HEADERS_MAPPING
[
'对方账户名'
],
'摘要'
:
BASE_HEADERS_MAPPING
[
'附言'
],
'交易日期'
:
DATE_KEY
,
# '存入': AMOUNT_KEY,
'摘要'
:
SUMMARY_KEY
,
}
)
# 横版-表格-北京银行
HEADERS_MAPPING
.
update
(
{
'业务摘要'
:
BASE_HEADERS_MAPPING
[
'附言'
],
'发生额'
:
BASE_HEADERS_MAPPING
[
'金额'
],
'对方户名'
:
BASE_HEADERS_MAPPING
[
'对方账户名'
],
'业务摘要'
:
SUMMARY_KEY
,
'发生额'
:
AMOUNT_KEY
,
}
)
# 横版-表格-工商银行 借记卡账户历史明细清单
...
...
@@ -142,8 +192,8 @@ HEADERS_MAPPING.update(
# 工商银行历史明细(申请单号:20042501303039397888)
HEADERS_MAPPING
.
update
(
{
'收入/支出金额'
:
BASE_HEADERS_MAPPING
[
'金额'
]
,
'工作日期'
:
BASE_HEADERS_MAPPING
[
'记账日期'
]
,
'收入/支出金额'
:
AMOUNT_KEY
,
'工作日期'
:
DATE_KEY
,
}
)
...
...
@@ -153,26 +203,23 @@ HEADERS_MAPPING.update(
# 竖版-表格-建设银行-个人活期账户交易明细 CH-B005832604 (2)
HEADERS_MAPPING
.
update
(
{
'交易金额'
:
BASE_HEADERS_MAPPING
[
'金额'
],
'账户余额'
:
BASE_HEADERS_MAPPING
[
'余额'
],
'对方账号与户名'
:
BASE_HEADERS_MAPPING
[
'对方卡号/账号'
],
'交易金额'
:
AMOUNT_KEY
,
'账户余额'
:
OVER_KEY
,
}
)
# 微信
HEADERS_MAPPING
.
update
(
{
'交易时间'
:
BASE_HEADERS_MAPPING
[
'记账时间'
],
'交易类型'
:
BASE_HEADERS_MAPPING
[
'附言'
],
'金额(元)'
:
BASE_HEADERS_MAPPING
[
'金额'
],
'金额(元)'
:
BASE_HEADERS_MAPPING
[
'金额'
],
'交易对方'
:
BASE_HEADERS_MAPPING
[
'对方账户名'
],
'交易类型'
:
SUMMARY_KEY
,
'金额(元)'
:
AMOUNT_KEY
,
'金额(元)'
:
AMOUNT_KEY
,
}
)
# 支付宝
HEADERS_MAPPING
.
update
(
{
'时间'
:
BASE_HEADERS_MAPPING
[
'记账日期'
]
,
'名称/备注'
:
BASE_HEADERS_MAPPING
[
'附言'
]
,
'时间'
:
DATE_KEY
,
'名称/备注'
:
SUMMARY_KEY
,
}
)
...
...
@@ -182,33 +229,27 @@ HEADERS_MAPPING.update(
# 竖版-无表格-农业银行CH-B008805428
HEADERS_MAPPING
.
update
(
{
'摘要/附言'
:
BASE_HEADERS_MAPPING
[
'附言'
],
'交易地点/对方账号和户名'
:
BASE_HEADERS_MAPPING
[
'对方卡号/账号'
],
'摘要/附言'
:
SUMMARY_KEY
,
}
)
# 农业银行-窄页
HEADERS_MAPPING
.
update
(
{
'交易对手账号'
:
BASE_HEADERS_MAPPING
[
'对方卡号/账号'
],
}
)
# 竖版-特殊-农商行
HEADERS_MAPPING
.
update
(
{
'交易发生额'
:
BASE_HEADERS_MAPPING
[
'金额'
]
,
'交易发生额'
:
AMOUNT_KEY
,
}
)
# 横版-特殊-中信银行-账户交易明细
HEADERS_MAPPING
.
update
(
{
'对方银行'
:
BASE_HEADERS_MAPPING
[
'对方开户行'
],
'交易摘要'
:
BASE_HEADERS_MAPPING
[
'附言'
],
'交易摘要'
:
SUMMARY_KEY
,
}
)
# 平安电子账单
HEADERS_MAPPING
.
update
(
{
'借贷发生额(借:-贷:+)'
:
BASE_HEADERS_MAPPING
[
'金额'
]
,
'借贷发生额(借:-贷:+)'
:
AMOUNT_KEY
,
}
)
...
...
@@ -218,7 +259,7 @@ HEADERS_MAPPING.update(
# 竖版-无表格-招商银行账户历史交易明细表
HEADERS_MAPPING
.
update
(
{
'联机余额'
:
BASE_HEADERS_MAPPING
[
'余额'
]
,
'联机余额'
:
OVER_KEY
,
}
)
# 竖版-无表格-邮储银行-账户对账单 含有对手方户名 对手方账户
...
...
@@ -226,28 +267,23 @@ HEADERS_MAPPING.update(
# 竖版-无表格-邮储银行-电子章 邮储银行 账户对账单
HEADERS_MAPPING
.
update
(
{
'交易金额(元)'
:
BASE_HEADERS_MAPPING
[
'金额'
],
'交易金额(元)'
:
BASE_HEADERS_MAPPING
[
'金额'
],
'账户余额(元)'
:
BASE_HEADERS_MAPPING
[
'余额'
],
'账户余额(元)'
:
BASE_HEADERS_MAPPING
[
'余额'
],
'对手方户名'
:
BASE_HEADERS_MAPPING
[
'对方账户名'
],
'对手方账户'
:
BASE_HEADERS_MAPPING
[
'对方卡号/账号'
],
'交易金额(元)'
:
AMOUNT_KEY
,
'交易金额(元)'
:
AMOUNT_KEY
,
'账户余额(元)'
:
OVER_KEY
,
'账户余额(元)'
:
OVER_KEY
,
}
)
# 横版-无表格-广发银行-账户交易历史 --> 已废弃
# 竖版-无表格-广发银行-账户交易历史 --> 已废弃
HEADERS_MAPPING
.
update
(
{
'会计日期'
:
BASE_HEADERS_MAPPING
[
'记账日期'
],
'对手户名'
:
BASE_HEADERS_MAPPING
[
'对方账户名'
],
'对手账号'
:
BASE_HEADERS_MAPPING
[
'对方卡号/账号'
],
'会计日期'
:
DATE_KEY
,
}
)
# 招行电子账单 TODO 有英文,需测试
HEADERS_MAPPING
.
update
(
{
'对手信息'
:
BASE_HEADERS_MAPPING
[
'对方账户名'
],
'摘要代码'
:
BASE_HEADERS_MAPPING
[
'附言'
],
'摘要代码'
:
SUMMARY_KEY
,
}
)
# 横版-无表格-民生银行-中国民生银行个人账户对账单(客户卡号)
...
...
@@ -255,46 +291,36 @@ HEADERS_MAPPING.update(
# 横版-无表格-民生银行
HEADERS_MAPPING
.
update
(
{
'摘要信息'
:
BASE_HEADERS_MAPPING
[
'附言'
],
'对方行名'
:
BASE_HEADERS_MAPPING
[
'对方开户行'
],
'摘要信息'
:
SUMMARY_KEY
,
}
)
# 竖版-无表格-农业银行整数
# 竖版-无表格-农业银行-中国农业银行银行卡交易明细清单
HEADERS_MAPPING
.
update
(
{
'对方账号和户名'
:
BASE_HEADERS_MAPPING
[
'对方卡号/账号'
],
}
)
# 竖版-无表格-农业银行-中国农业银行银行卡活期存折交易明细清单.pdf
# 竖版-无表格-农业银行-扩张.pdf
# 竖版-无表格-农业银行-缩进.pdf
HEADERS_MAPPING
.
update
(
{
'日期'
:
BASE_HEADERS_MAPPING
[
'记账日期'
]
,
'短摘要'
:
BASE_HEADERS_MAPPING
[
'附言'
]
,
'本次余额'
:
BASE_HEADERS_MAPPING
[
'余额'
]
,
'日期'
:
DATE_KEY
,
'短摘要'
:
SUMMARY_KEY
,
'本次余额'
:
OVER_KEY
,
}
)
# 竖版-无表格-农业银行-无标题(对手帐号)
HEADERS_MAPPING
.
update
(
{
'交易后余额'
:
BASE_HEADERS_MAPPING
[
'余额'
],
'对手帐号'
:
BASE_HEADERS_MAPPING
[
'对方卡号/账号'
],
'交易后余额'
:
OVER_KEY
,
}
)
# 竖版-无表格-农商行(非常规)
HEADERS_MAPPING
.
update
(
{
'交易说明'
:
BASE_HEADERS_MAPPING
[
'附言'
]
,
'交易说明'
:
SUMMARY_KEY
,
}
)
# 竖版-无表格-工商银行 抬头三行 活期历史明细清单
HEADERS_MAPPING
.
update
(
{
'对方账户'
:
BASE_HEADERS_MAPPING
[
'对方卡号/账号'
],
}
)
# -----------针式打印-全格线--------------------------------------------------------------------------------------------
# 竖版-表格-建设银行-中国建设银行活期账户交易明细
...
...
@@ -302,25 +328,19 @@ HEADERS_MAPPING.update(
# 竖版-表格-建设银行-对私活期账户明细- (1).pdf
HEADERS_MAPPING
.
update
(
{
'帐户余额'
:
BASE_HEADERS_MAPPING
[
'余额'
],
'对方帐户名称'
:
BASE_HEADERS_MAPPING
[
'对方账户名'
],
'帐户余额'
:
OVER_KEY
,
}
)
# 竖版-特殊-交通银行 零售客户交易清单 5000以上交易记录
HEADERS_MAPPING
.
update
(
{
'交易日期 记账日期'
:
BASE_HEADERS_MAPPING
[
'记账日期'
]
,
'交易日期 记账日期'
:
DATE_KEY
,
}
)
# ----------针式打印-部分格线------------------------------------------------------------------------------------------
# 竖版-特殊-邮储银行-一本通绿卡通交易明细(客户)
# 竖版-特殊-邮储银行-账户交易明细(客户)
HEADERS_MAPPING
.
update
(
{
'对方账号/卡号/汇票号'
:
BASE_HEADERS_MAPPING
[
'对方卡号/账号'
],
}
)
# --------------------------------------------------------------------------------------------------------------------
...
...
@@ -563,6 +583,49 @@ CLASSIFY_LIST = [
(
'其他'
,
OTHER_TUPLE
),
]
CLASSIFY_HEADER_LIST
=
[
OTHER_TUPLE
,
OTHER_TUPLE
,
OTHER_TUPLE
,
OTHER_TUPLE
,
(
'记账日期'
,
'记账时间'
,
'币别'
,
'金额'
,
'余额'
,
'交易名称'
,
'渠道'
,
'网点名称'
,
'附言'
,
'对方账户名'
,
'对方卡号/账号'
,
'对方开户行'
),
(
'交易日期'
,
'交易网点'
,
'存入'
,
'支出'
,
'余额'
,
'对方账号'
,
'对方名称'
,
'摘要'
,
'渠道'
,
'附言'
),
(
'序号'
,
'日期'
,
'摘要'
,
'交易金额'
,
'余额'
,
'对方账号'
,
'对方名称'
,
'交易地点'
,
'渠道'
,
'附言'
),
(
'交易日期'
,
'摘要'
,
'交易金额'
,
'余额'
,
'交易渠道'
,
'交易网点'
,
'对方账号'
,
'对方名称'
,
'附言'
),
(
'交易日期'
,
'业务摘要'
,
'收/支'
,
'发生额'
,
'余额'
,
'对方户名'
,
'对方账号'
,
'交易渠道'
),
(
'交易日期'
,
'账号'
,
'储种'
,
'序号'
,
'币种'
,
'钞汇'
,
'摘要'
,
'地区'
,
'收入/支出金额'
,
'余额'
,
'渠道'
),
(
'交易日期'
,
'账号'
,
'储种'
,
'序号'
,
'币种'
,
'钞汇'
,
'摘要'
,
'地区'
,
'收入/支出金额'
,
'余额'
,
'对方户名'
,
'对方账号'
,
'渠道'
),
(
None
,
'摘要'
,
'交易日期'
,
'交易金额'
,
'账户余额'
,
'商户/网点号及其名称'
,
'对方账号与户名'
),
(
'交易单号'
,
'交易时间'
,
'交易类型'
,
'收/支/其他'
,
'交易方式'
,
'金额(元)'
,
'交易对方'
,
'商户单号'
),
(
'流水号'
,
'时间'
,
'名称/备注'
,
'收入'
,
'支出'
,
'账户余额'
,
'资金渠道'
),
(
'交易日期'
,
'记账日期'
,
'交易地点'
,
'交易类型'
,
'借贷状态'
,
'交易金额'
,
'余额'
),
(
'交易日期'
,
'交易类型'
,
'交易金额(元)'
,
'账户余额(元)'
,
'操作柜员'
),
(
'交易日期'
,
'交易类型'
,
'交易币种'
,
'交易金额(元)'
,
'账户余额(元)'
,
'对手方户名'
,
'对手方账户'
,
'收支类型'
),
(
'日期'
,
'时间'
,
'日志号'
,
'短摘要'
,
'交易金额'
,
'本次余额'
,
'交易网点'
,
'渠道'
,
'附言'
),
(
'交易日期'
,
'摘要/附言'
,
'交易金额'
,
'对方账号和户名'
),
(
'记账日期'
,
'货币'
,
'交易金额'
,
'联机余额'
,
'冲补账'
,
'交易摘要'
),
(
'记账日期'
,
'货币'
,
'交易金额'
,
'联机余额'
,
'交易摘要'
,
'对手信息'
),
(
'凭证类型'
,
'凭证号码'
,
'交易时间'
,
'摘要'
,
'交易金额'
,
'账户余额'
,
'现转标志'
,
'交易渠道'
,
'交易机构'
,
'对方户名'
,
'对方行名'
),
(
'交易日期'
,
'交易摘要'
,
'收入金额'
,
'支出金额'
,
'账户余额'
,
'对方户名'
,
'对方账号'
,
'对方银行'
,
'交易流水号'
),
(
'交易日期'
,
'摘要/附言'
,
'交易金额'
,
'余额'
,
'交易地点/对方账号和户名'
),
(
'日期'
,
'地点'
,
'摘要'
,
'存入'
,
'支出'
,
'余额'
,
'对方账号'
,
'对方户名'
),
(
'日期'
,
'摘要'
,
'交易金额'
,
'余额'
,
'地点'
,
'交易对手账号'
,
'对方户名'
),
(
'序号'
,
'交易日期'
,
'交易网点'
,
'摘要'
,
'借贷发生额(借:-贷:+)'
,
'账户余额'
),
(
'序号'
,
'摘要'
,
'币别'
,
'钞汇'
,
'交易日期'
,
'交易金额'
,
'账户余额'
,
'交易地点附言'
,
'对方账号与户名'
),
OTHER_TUPLE
,
OTHER_TUPLE
,
OTHER_TUPLE
,
OTHER_TUPLE
,
OTHER_TUPLE
,
OTHER_TUPLE
,
OTHER_TUPLE
,
(
'序号'
,
'交易日期'
,
'交易渠道'
,
'摘要'
,
'交易金额'
,
'账户余额'
,
'对方账号/卡号/汇票号'
,
'原子账号'
,
'交易机构名称'
),
(
'序号'
,
'交易日期'
,
'交易渠道'
,
'摘要'
,
'交易金额'
,
'账户余额'
,
'对方账号/卡号/汇票号'
,
'原子账号'
,
'交易机构名称'
),
OTHER_TUPLE
,
]
# ----------license相关------------------------------------------------------------------------------------------------
# "0":"AVT Invioce",
...
...
src/apps/doc/consts_bak.py
0 → 100644
View file @
2a72302
PAGE_DEFAULT
=
1
PAGE_SIZE_DEFAULT
=
10
FIXED_APPLICATION_ID_PREFIX
=
'CH-S'
DOC_SCHEME_LIST
=
[
'ACCEPTANCE'
,
'SETTLEMENT'
,
'CONTRACT MANAGEMENT'
]
DATA_SOURCE_LIST
=
[
'POS'
,
'EAPP'
,
'ECONTRACT'
]
HIL_PREFIX
=
'HIL'
AFC_PREFIX
=
'AFC'
SPLIT_STR
=
'_'
BUSINESS_TYPE_LIST
=
[
HIL_PREFIX
,
AFC_PREFIX
]
HIL_SET
=
{
'HIL'
,
'HIl'
,
'HiL'
,
'Hil'
,
'hIL'
,
'hIl'
,
'hiL'
,
'hil'
,
'CO00002'
}
# -------EDMS相关---------------------------------------------------------------------------------------------------
SESSION_PREFIX
=
'FHLSID'
CUSTOM_CLIENT
=
'CustomClient'
FIXED_TOKEN
=
'00000000-0000-0000-0000-000000000000'
FIXED_FILE_SIZE
=
0
DOWNLOAD_ACTION_TYPE
=
'Downloaded'
DOC_SCHEMA_ID_FILL
=
{
'ACCEPTANCE'
:
(
1
,
'DFE-AutoFilingScript'
),
'SETTLEMENT'
:
(
20
,
'DFE-AutoFilingScript'
),
'CONTRACT MANAGEMENT'
:
(
86
,
'Schema-Based'
)
}
BUSINESS_TYPE_DICT
=
{
HIL_PREFIX
:
'CO00002'
,
AFC_PREFIX
:
'CO00001'
}
DOC_SCHEMA_TYPE
=
'ElectronicRecord'
APPLICATION_ID_META_FIELD_id
=
1
DEALER_CODE_META_FIELD_id
=
13
BUSINESS_TYPE_META_FIELD_id
=
93
DEALER_CODE
=
'ocr_situ_group'
RETRY_TIMES
=
3
# ---------银行流水模板相关--------------------------------------------------------------------------------------------
TRANS_MAP
=
{
'C'
:
"0"
,
'c'
:
"0"
,
'('
:
"0"
,
'o'
:
"0"
,
'O'
:
"0"
,
'D'
:
"0"
,
'['
:
"1"
,
']'
:
"1"
,
'l'
:
"1"
,
'L'
:
"1"
,
'A'
:
"4"
,
's'
:
"5"
,
'S'
:
"5"
,
'b'
:
"6"
,
'g'
:
"9"
,
'E'
:
"9"
,
'B'
:
"13"
,
}
TRANS
=
str
.
maketrans
(
TRANS_MAP
)
ERROR_CHARS
=
{
'.'
,
'。'
,
':'
,
':'
,
'•'
,
'·'
,
','
,
','
}
SKIP_IMG_SHEET_NAME
=
'未处理图片'
SKIP_IMG_SHEET_HEADER
=
(
'页码'
,
'序号'
)
CARD_RATIO
=
0.9
UNKNOWN_CARD
=
'未知卡号'
UNKNOWN_ROLE
=
'未知户名'
DATE_FORMAT
=
[
'
%
Y年
%
m月
%
d日'
,
'
%
Y/
%
m/
%
d'
,
'
%
Y-
%
m-
%
d'
,
'
%
Y
%
m
%
d'
]
PROOF_COL_TITLE
=
'核对结果'
PROOF_RES
=
(
'对'
,
'错'
)
META_SHEET_TITLE
=
'关键信息提取和展示'
FIXED_HEADERS
=
(
'记账日期'
,
'记账时间'
,
'金额'
,
'余额'
,
'交易名称'
,
'附言'
,
'对方账户名'
,
'对方卡号/账号'
,
'对方开户行'
,
'核对结果'
,
'借贷'
,
'收入'
,
'支出'
)
FIXED_COL_AMOUNT
=
len
(
FIXED_HEADERS
)
BASE_HEADERS_MAPPING
=
{
label
:
idx
+
1
for
idx
,
label
in
enumerate
(
FIXED_HEADERS
)}
BORROW_HEADER_COL
=
BASE_HEADERS_MAPPING
[
'借贷'
]
INCOME_HEADER_COL
=
BASE_HEADERS_MAPPING
[
'收入'
]
OUTLAY_HEADER_COL
=
BASE_HEADERS_MAPPING
[
'支出'
]
RESULT_HEADER_COL
=
BASE_HEADERS_MAPPING
[
'核对结果'
]
BORROW_IDX
=
BORROW_HEADER_COL
-
1
INCOME_IDX
=
INCOME_HEADER_COL
-
1
OUTLAY_IDX
=
OUTLAY_HEADER_COL
-
1
SUMMARY_IDX
=
FIXED_HEADERS
.
index
(
'附言'
)
DATE_IDX
=
FIXED_HEADERS
.
index
(
'记账日期'
)
AMOUNT_IDX
=
FIXED_HEADERS
.
index
(
'金额'
)
OVER_IDX
=
FIXED_HEADERS
.
index
(
'余额'
)
RESULT_IDX
=
FIXED_HEADERS
.
index
(
'核对结果'
)
# '借贷': ('贷', '借'), # 竖版-无表格-广发银行
# '借贷状态': ('贷', '借'), # 竖版-特殊-交通银行
# '收/支': ('收入', '支出'), # 横版-表格-北京银行
BORROW_HEADERS_SET
=
{
'借贷'
,
'借贷状态'
,
'收/支'
}
BORROW_INCOME_SET
=
{
'贷'
,
'收入'
}
BORROW_OUTLAY_SET
=
{
'借'
,
'支出'
}
INCOME_HEADERS_SET
=
{
'收入金额'
,
'收入'
,
'存入'
,
'存入金额(贷)'
,
'存入金额(贷)'
}
OUTLAY_HEADERS_SET
=
{
'支出金额'
,
'支出'
,
'支取金额(借)'
,
'支取金额(借)'
}
# ------------------普通打印-全格线--------------------------------------------------------------------------------------
HEADERS_MAPPING
=
{}
# 横版-表格-中国银行(不规则)
HEADERS_MAPPING
.
update
(
{
'记账日期'
:
BASE_HEADERS_MAPPING
[
'记账日期'
],
'记账时间'
:
BASE_HEADERS_MAPPING
[
'记账时间'
],
'金额'
:
BASE_HEADERS_MAPPING
[
'金额'
],
'余额'
:
BASE_HEADERS_MAPPING
[
'余额'
],
'交易名称'
:
BASE_HEADERS_MAPPING
[
'交易名称'
],
'附言'
:
BASE_HEADERS_MAPPING
[
'附言'
],
'对方账户名'
:
BASE_HEADERS_MAPPING
[
'对方账户名'
],
'对方卡号/账号'
:
BASE_HEADERS_MAPPING
[
'对方卡号/账号'
],
'对方开户行'
:
BASE_HEADERS_MAPPING
[
'对方开户行'
],
}
)
# 横版-表格-农业银行-中国农业银行个人账户明细
HEADERS_MAPPING
.
update
(
{
'交易日期'
:
BASE_HEADERS_MAPPING
[
'记账日期'
],
# '存入': BASE_HEADERS_MAPPING['金额'],
'对方账号'
:
BASE_HEADERS_MAPPING
[
'对方卡号/账号'
],
'对方名称'
:
BASE_HEADERS_MAPPING
[
'对方账户名'
],
'摘要'
:
BASE_HEADERS_MAPPING
[
'附言'
],
}
)
# 横版-表格-北京银行
HEADERS_MAPPING
.
update
(
{
'业务摘要'
:
BASE_HEADERS_MAPPING
[
'附言'
],
'发生额'
:
BASE_HEADERS_MAPPING
[
'金额'
],
'对方户名'
:
BASE_HEADERS_MAPPING
[
'对方账户名'
],
}
)
# 横版-表格-工商银行 借记卡账户历史明细清单
# 横版-表格-工商银行-机打验证码 借记卡账户历史明细清单
# 横版-表格-工商银行CH-B008802400
# 横版-表格-工商银行 工资明细清单
# 工商银行历史明细(申请单号:20042501303039397888)
HEADERS_MAPPING
.
update
(
{
'收入/支出金额'
:
BASE_HEADERS_MAPPING
[
'金额'
],
'工作日期'
:
BASE_HEADERS_MAPPING
[
'记账日期'
],
}
)
# 横版-表格-建设银行-个人活期账户交易明细
# 竖版-表格-建设银行-个人活期账户交易明细 CH-B005832604
# 竖版-表格-建设银行-工资账单CH-B008786812
# 竖版-表格-建设银行-个人活期账户交易明细 CH-B005832604 (2)
HEADERS_MAPPING
.
update
(
{
'交易金额'
:
BASE_HEADERS_MAPPING
[
'金额'
],
'账户余额'
:
BASE_HEADERS_MAPPING
[
'余额'
],
'对方账号与户名'
:
BASE_HEADERS_MAPPING
[
'对方卡号/账号'
],
}
)
# 微信
HEADERS_MAPPING
.
update
(
{
'交易时间'
:
BASE_HEADERS_MAPPING
[
'记账时间'
],
'交易类型'
:
BASE_HEADERS_MAPPING
[
'附言'
],
'金额(元)'
:
BASE_HEADERS_MAPPING
[
'金额'
],
'金额(元)'
:
BASE_HEADERS_MAPPING
[
'金额'
],
'交易对方'
:
BASE_HEADERS_MAPPING
[
'对方账户名'
],
}
)
# 支付宝
HEADERS_MAPPING
.
update
(
{
'时间'
:
BASE_HEADERS_MAPPING
[
'记账日期'
],
'名称/备注'
:
BASE_HEADERS_MAPPING
[
'附言'
],
}
)
# ------------普通打印-部分格线-------------------------------------------------------------------------------------------
# 竖版-无表格-农业银行
# 竖版-无表格-农业银行CH-B008805428
HEADERS_MAPPING
.
update
(
{
'摘要/附言'
:
BASE_HEADERS_MAPPING
[
'附言'
],
'交易地点/对方账号和户名'
:
BASE_HEADERS_MAPPING
[
'对方卡号/账号'
],
}
)
# 农业银行-窄页
HEADERS_MAPPING
.
update
(
{
'交易对手账号'
:
BASE_HEADERS_MAPPING
[
'对方卡号/账号'
],
}
)
# 竖版-特殊-农商行
HEADERS_MAPPING
.
update
(
{
'交易发生额'
:
BASE_HEADERS_MAPPING
[
'金额'
],
}
)
# 横版-特殊-中信银行-账户交易明细
HEADERS_MAPPING
.
update
(
{
'对方银行'
:
BASE_HEADERS_MAPPING
[
'对方开户行'
],
'交易摘要'
:
BASE_HEADERS_MAPPING
[
'附言'
],
}
)
# 平安电子账单
HEADERS_MAPPING
.
update
(
{
'借贷发生额(借:-贷:+)'
:
BASE_HEADERS_MAPPING
[
'金额'
],
}
)
# ------------普通打印-无格线--------------------------------------------------------------------------------------------
# 竖版-无表格-招商银行(略歪)
# 竖版-无表格-招商银行账户历史交易明细表
HEADERS_MAPPING
.
update
(
{
'联机余额'
:
BASE_HEADERS_MAPPING
[
'余额'
],
}
)
# 竖版-无表格-邮储银行-账户对账单 含有对手方户名 对手方账户
# 竖版-无表格-邮储银行 账户对账单
# 竖版-无表格-邮储银行-电子章 邮储银行 账户对账单
HEADERS_MAPPING
.
update
(
{
'交易金额(元)'
:
BASE_HEADERS_MAPPING
[
'金额'
],
'交易金额(元)'
:
BASE_HEADERS_MAPPING
[
'金额'
],
'账户余额(元)'
:
BASE_HEADERS_MAPPING
[
'余额'
],
'账户余额(元)'
:
BASE_HEADERS_MAPPING
[
'余额'
],
'对手方户名'
:
BASE_HEADERS_MAPPING
[
'对方账户名'
],
'对手方账户'
:
BASE_HEADERS_MAPPING
[
'对方卡号/账号'
],
}
)
# 横版-无表格-广发银行-账户交易历史 --> 已废弃
# 竖版-无表格-广发银行-账户交易历史 --> 已废弃
HEADERS_MAPPING
.
update
(
{
'会计日期'
:
BASE_HEADERS_MAPPING
[
'记账日期'
],
'对手户名'
:
BASE_HEADERS_MAPPING
[
'对方账户名'
],
'对手账号'
:
BASE_HEADERS_MAPPING
[
'对方卡号/账号'
],
}
)
# 招行电子账单 TODO 有英文,需测试
HEADERS_MAPPING
.
update
(
{
'对手信息'
:
BASE_HEADERS_MAPPING
[
'对方账户名'
],
'摘要代码'
:
BASE_HEADERS_MAPPING
[
'附言'
],
}
)
# 横版-无表格-民生银行-中国民生银行个人账户对账单(客户卡号)
# 横版-无表格-民生银行-无标题(客户账户)
# 横版-无表格-民生银行
HEADERS_MAPPING
.
update
(
{
'摘要信息'
:
BASE_HEADERS_MAPPING
[
'附言'
],
'对方行名'
:
BASE_HEADERS_MAPPING
[
'对方开户行'
],
}
)
# 竖版-无表格-农业银行整数
# 竖版-无表格-农业银行-中国农业银行银行卡交易明细清单
HEADERS_MAPPING
.
update
(
{
'对方账号和户名'
:
BASE_HEADERS_MAPPING
[
'对方卡号/账号'
],
}
)
# 竖版-无表格-农业银行-中国农业银行银行卡活期存折交易明细清单.pdf
# 竖版-无表格-农业银行-扩张.pdf
# 竖版-无表格-农业银行-缩进.pdf
HEADERS_MAPPING
.
update
(
{
'日期'
:
BASE_HEADERS_MAPPING
[
'记账日期'
],
'短摘要'
:
BASE_HEADERS_MAPPING
[
'附言'
],
'本次余额'
:
BASE_HEADERS_MAPPING
[
'余额'
],
}
)
# 竖版-无表格-农业银行-无标题(对手帐号)
HEADERS_MAPPING
.
update
(
{
'交易后余额'
:
BASE_HEADERS_MAPPING
[
'余额'
],
'对手帐号'
:
BASE_HEADERS_MAPPING
[
'对方卡号/账号'
],
}
)
# 竖版-无表格-农商行(非常规)
HEADERS_MAPPING
.
update
(
{
'交易说明'
:
BASE_HEADERS_MAPPING
[
'附言'
],
}
)
# 竖版-无表格-工商银行 抬头三行 活期历史明细清单
HEADERS_MAPPING
.
update
(
{
'对方账户'
:
BASE_HEADERS_MAPPING
[
'对方卡号/账号'
],
}
)
# -----------针式打印-全格线--------------------------------------------------------------------------------------------
# 竖版-表格-建设银行-中国建设银行活期账户交易明细
# 竖版-表格-建设银行-中国建设银行活期账户明细清单
# 竖版-表格-建设银行-对私活期账户明细- (1).pdf
HEADERS_MAPPING
.
update
(
{
'帐户余额'
:
BASE_HEADERS_MAPPING
[
'余额'
],
'对方帐户名称'
:
BASE_HEADERS_MAPPING
[
'对方账户名'
],
}
)
# 竖版-特殊-交通银行 零售客户交易清单 5000以上交易记录
HEADERS_MAPPING
.
update
(
{
'交易日期 记账日期'
:
BASE_HEADERS_MAPPING
[
'记账日期'
],
}
)
# ----------针式打印-部分格线------------------------------------------------------------------------------------------
# 竖版-特殊-邮储银行-一本通绿卡通交易明细(客户)
# 竖版-特殊-邮储银行-账户交易明细(客户)
HEADERS_MAPPING
.
update
(
{
'对方账号/卡号/汇票号'
:
BASE_HEADERS_MAPPING
[
'对方卡号/账号'
],
}
)
# --------------------------------------------------------------------------------------------------------------------
# ('记账日期', '记账时间', '金额', '余额', '交易名称', '附言', '对方账户名', '对方卡号/账号', '对方开户行', '核对结果', '借贷', '收入', '支出')
# CLASSIFY_LIST = [
# # --------------普通打印:全格线---------------------------------
# # 中国银行:记账日期 记账时间 币别 金额 余额 交易名称 渠道 网点名称 附言 对方账户名 对方卡号/账号 对方开户行
# ('中国银行', (1, 2, 4, 5, 6, 9, 10, 11, 12, None, None, None, None)), # 横版-表格-中国银行(不规则)
#
# # 农业银行:交易日期 交易网点 存入 支出 余额 对方账号 对方名称 摘要 渠道 附言
# ('农业银行-10', (1, None, None, 5, None, 8, 7, 6, None, None, None, 3, 4)), # 横版-表格-农业银行-中国农业银行个人账户明细
#
# # 农业银行:序号 日期 摘要 交易金额 余额 对方账号 对方名称 交易地点 渠道 附言
# ('农业银行-10-1', (2, None, 4, 5, None, 3, 7, 6, None, None, None, None, None)),
#
# # 农业银行:交易日期 摘要 交易金额 余额 交易渠道 交易网点 对方账号 对方名称 附言
# ('农业银行-9', (1, None, 3, 4, None, 2, 8, 7, None, None, None, None, None)),
#
# # 北京银行:交易日期 业务摘要 收/支 发生额 余额 对方户名 对方账号 交易渠道
# ('北京银行', (1, None, 4, 5, None, 2, 6, 7, None, None, 3, None, None)), # 横版-表格-北京银行
#
# # 工商银行:交易日期 账号 储种 序号 币种 钞汇 摘要 地区 收入/支出金额 余额 渠道
# ('工商银行', (1, None, 9, 10, None, 7, None, None, None, None, None, None, None)),
#
# # 工商银行:交易日期 账号 储种 序号 币种 钞汇 摘要 地区 收入/支出金额 余额 对方户名 对方账号 渠道
# ('工商银行-电子账单', (1, None, 9, 10, None, 7, 11, 12, None, None, None, None, None)),
#
# # 建设银行:空 摘要 交易日期 交易金额 账户余额 商户/网点号及其名称 对方账号与户名 --> 竖版-表格-建设银行
# # 序号 摘要 币别 钞汇 交易日期 交易金额 账户余额 交易地点附言 对方账号与户名 --> 横版-表格-建设银行
# ('建设银行-竖版', (3, None, 4, 5, None, 2, None, 7, None, None, None, None, None)),
# ('建设银行-横版', (5, None, 6, 7, None, 2, None, 9, None, None, None, None, None)),
#
# # 微信:交易单号 交易时间 交易类型 收/支/其他 交易方式 金额(元) 交易对方 商户单号
# ('微信', (2, None, 6, None, None, 3, 7, None, None, None, None, None, None)),
#
# # 支付宝:流水号 时间 名称/备注 收入 支出 账户余额 资金渠道
# ('支付宝', (2, None, None, 6, None, 3, None, None, None, None, None, 4, 5)),
#
# # -----------------普通打印:部分格线--------------------------------
#
# # 农业银行:交易日期 摘要/附言 交易金额 余额 交易地点/对方账号和户名
# ('农业银行-5', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)),
#
# # 农业银行:日期 地点 摘要 存入 支出 余额 对方账号 对方户名
# ('农业银行-8', (1, None, None, 6, None, 3, 8, 7, None, None, None, 4, 5)),
# # 农业银行:日期 摘要 交易金额 余额 地点 交易对手账号 对方户名
# ('农业银行-窄页', (1, None, 3, 4, None, 2, 7, 6, None, None, None, None, None)),
#
# # 农商行:交易日期 交易发生额 账户余额 对方账号 对方户名 摘要 备注
# ('农商行', (1, None, 2, 3, None, 6, 5, 4, None, None, None, None, None)),
#
# # 中信银行:交易日期 交易摘要 收入金额 支出金额 账户余额 对方户名 对方账号 对方银行 交易流水号
# ('中信银行', (1, None, None, 5, None, 2, 6, 7, 8, None, None, 3, 4)),
#
# # 平安电子账单:序号 交易日期 交易网点 摘要 借贷发生额(借:-贷:+) 账户余额
# ('平安电子账单', (2, None, 5, 6, None, 4, None, None, None, None, None, None, None)),
# # 建设银行:序号 摘要 币别 钞汇 交易日期 交易金额 账户余额 交易地点附言 对方账号与户名
# ('建设银行-电子账单', (5, None, 6, 7, None, 2, None, 9, None, None, None, None, None)),
#
# # -----------------普通打印:无格线-------------------------------------
#
# # 招商银行:记账日期 货币 交易金额 联机余额 冲补账 交易摘要
# ('招商银行', (1, None, 3, 4, None, 6, None, None, None, None, None, None, None)),
#
# # 邮储银行:交易日期、交易类型 交易币种 交易金额(元) 账户余额(元) [对手方户名 对手方账户 收支类型] --> 竖版-无表格-邮储银行-账户对账单 含有对手方户名 对手方账户
# # 交易日期、交易类型 交易金额(元) 账户余额(元) 操作柜员 --> 竖版-无表格-邮储银行 账户对账单
# ('邮储银行-8', (1, None, 4, 5, None, 2, 6, 7, None, None, None, None, None)),
# ('邮储银行-5', (1, None, 3, 4, None, 2, None, None, None, None, None, None, None)),
#
# # 工商银行电子版:交易日期 账号 储种 序号 币种 妙汇 摘要 地区 收入/支出金额 余额 [对方户名 对方账号] 渠道
# ('工商银行电子版', (1, None, 9, 10, None, 7, None, None, None, None, None, None, None)),
#
# # 招商银行电子版:记账日期 货币 交易金额 联机余额 交易摘要 对手信息
# ('招商银行电子版', (1, None, 3, 4, None, 5, 6, None, None, None, None, None, None)),
#
# # 民生银行:凭证类型 凭证号码 摘要信息 交易时间 交易金额 账户余额 现转标志 交易渠道 交易机构 对方户名 对方行名 --> 横版-无表格-民生银行-中国民生银行个人账户对账单(客户卡号)
# # 凭证类型 凭证号码 交易时间 摘要 交易金额 账户余额 现转标志 交易渠道 交易机构 对方户名 对方行名 --> 横版-无表格-民生银行
# ('民生银行', (None, None, 5, 6, None, None, 7, None, 8, None, None, None, None)),
#
# # 农业银行:交易日期 摘要/附言 交易金额 对方账号和户名
# ('农业银行-整数', (1, None, 3, None, None, 2, None, 4, None, None, None, None, None)),
#
# # 农业银行:交易日期 摘要/附言 交易金额 余额 交易地点/对方账号和户名
# ('农业银行', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)),
#
# # 农业银行:日期、时间、短摘要、交易金额、本次余额、交易网点、渠道、附言
# # 农业银行:日期、时间、日志号、短摘要、交易金额、本次余额、交易网点、渠道、附言
# ('农业银行', (1, 2, 4, 5, None, 3, None, None, None, None, None, None, None)),
# ('农业银行-扩张缩进', (1, 2, 5, 6, None, 4, None, None, None, None, None, None, None)),
#
# # 交通银行:交易日期 记账日期、交易地点、交易类型、借贷状态、交易金额、余额
# ('交通银行', (1, None, 5, 6, None, 3, None, None, None, None, 4, None, None)),
#
#
# # ================针式打印:有格线===================
#
# # 建设银行: 摘要、交易日期、交易金额、账户余额、商户/网点号及其名称、对方账号、对方户名 --> 竖版-表格-建设银行-中国建设银行活期账户明细清单
# # 交易日期、摘要、 币种、 钞汇、 交易金额、 帐户余额、对方账号、 对方帐户名称 --> 竖版-表格-建设银行-对私活期账户明细- (1)
# ('建设银行', (None, None, None, None, None, None, None, None, None, None, None, None, None)),
#
#
# # ================针式打印:无格线===================
#
# # 邮储银行:序号、交易日期、交易渠道、摘要、交易金额、账户余额、对方账号/卡号/汇票号、原子账号、交易机构名称
# ('邮储银行', (2, None, 5, 6, None, 4, None, 7, None, None, None, None, None)),
# ]
OTHER_TUPLE
=
(
None
,
None
,
None
,
None
,
None
,
None
,
None
,
None
,
None
,
None
,
None
,
None
,
None
)
# {
# "0":"其他",
# "1":"普通打印-全表格-中国农业银行",
# "2":"普通打印-全表格-中国银行",
# "3":"普通打印-全表格-北京银行",
# "4":"普通打印-全表格-工商银行",
# "5":"普通打印-全表格-建设银行",
# "6":"普通打印-全表格-微信账单",
# "7":"普通打印-全表格-支付宝账单",
# "8":"普通打印-无格线-中国邮政储蓄银行",
# "9":"普通打印-无格线-交通银行",
# "10":"普通打印-无格线-农业银行整数",
# "11":"普通打印-无格线-农业银行银行活期扩张缩进",
# "12":"普通打印-无格线-招商银行",
# "13":"普通打印-无格线-招行电子账单",
# "14":"普通打印-无格线-民生银行",
# "15":"普通打印-部分格线-横版-中信银行",
# "16":"普通打印-部分格线-竖版-中国农业银行分账户窄页",
# "17":"普通打印-部分格线-竖版-农业银行",
# "18":"普通打印-部分格线-竖版-农业银行银行卡交易明细",
# "19":"普通打印-部分格线-竖版-平安电子账单",
# "20":"针式打印-全格线-建设银行",
# "21":"针式打印-部分格线-竖版-邮储银行账户交易",
# "22":"针式打印-部分格线-邮储银行一本通绿卡"
# }
# CLASSIFY_LIST = [
# ('其他', OTHER_TUPLE),
# ('农业银行', (1, None, 3, 5, None, 8, 7, 6, None, None, None, None, None)),
# ('中国银行', (1, 2, 4, 5, 6, 9, 10, 11, 12, None, None, None, None)),
# ('北京银行', (1, None, 4, 5, None, 2, 6, 7, None, None, 3, None, None)),
# ('工商银行', (1, None, 9, 10, None, 7, None, None, None, None, None, None, None)),
# ('建设银行', (None, None, None, None, None, 2, None, None, None, None, None, None, None)),
# ('微信', (2, None, 6, None, None, 3, 7, None, None, None, None, None, None)),
# ('支付宝', (2, None, None, 6, None, 3, None, None, None, None, None, 4, 5)),
#
# ('交通银行', (1, None, 5, 6, None, 3, None, None, None, None, 4, None, None)),
# ('农业银行', (1, None, 3, None, None, 2, None, 4, None, None, None, None, None)),
# ('农业银行', (1, 2, None, None, None, None, None, None, None, None, None, None, None)),
# ('招商银行', (1, None, 3, 4, None, 6, None, None, None, None, None, None, None)),
# ('招商银行电子版', (1, None, 3, 4, None, 5, 6, None, None, None, None, None, None)),
# ('民生银行', (None, None, 5, 6, None, None, 7, None, 8, None, None, None, None)),
#
# ('中信银行', (1, None, None, 5, None, 2, 6, 7, 8, None, None, 3, 4)),
# ('农业银行', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)),
# ('农业银行', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)),
# ('农业银行', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)),
# ('平安电子账单', (2, None, 5, 6, None, 4, None, None, None, None, None, None, None)),
#
# ('建设银行', (None, None, None, None, None, None, None, None, None, None, None, None, None)),
# ('邮储银行', (2, None, 5, 6, None, 4, None, 7, None, None, None, None, None)),
# ('邮储银行', (2, None, 5, 6, None, 4, None, 7, None, None, None, None, None)),
# ]
# "4":"普通打印-全表格-中国银行",
# "5":"普通打印-全表格-农业银行-10列",
# "6":"普通打印-全表格-农业银行-10列-1",
# "7":"普通打印-全表格-农业银行-9列",
# "8":"普通打印-全表格-北京银行",
# "9":"普通打印-全表格-工商银行",
# "10":"普通打印-全表格-工商银行-电子账单",
# "11":"普通打印-全表格-建设银行",
# "12":"普通打印-全表格-微信账单",
# "13":"普通打印-全表格-支付宝账单",
# "14":"普通打印-无格线-交通银行",
# "15":"普通打印-无格线-储蓄银行-5列",
# "16":"普通打印-无格线-储蓄银行-8列",
# "17":"普通打印-无格线-农业银行-扩张缩进",
# "18":"普通打印-无格线-农业银行-整数",
# "19":"普通打印-无格线-招商银行",
# "20":"普通打印-无格线-招商银行-电子账单",
# "21":"普通打印-无格线-民生银行",
# "22":"普通打印-部分格线-横版-中信银行",
# "23":"普通打印-部分格线-竖版-农业银行-5列",
# "24":"普通打印-部分格线-竖版-农业银行-8列",
# "25":"普通打印-部分格线-竖版-农业银行-窄页",
# "26":"普通打印-部分格线-竖版-平安电子账单",
# "27":"普通打印-部分格线-竖版-建设银行-电子账单",
# "34":"针式打印-全格线-建设银行",
# "35":"针式打印-部分格线-竖版-邮储银行",
# "36":"针式打印-部分格线-竖版-邮储银行-绿卡",
CLASSIFY_LIST
=
[
(
'其他'
,
OTHER_TUPLE
),
(
'其他'
,
OTHER_TUPLE
),
(
'其他'
,
OTHER_TUPLE
),
(
'其他'
,
OTHER_TUPLE
),
(
'普通打印-全表格-中国银行'
,
(
1
,
2
,
4
,
5
,
6
,
9
,
10
,
11
,
12
,
None
,
None
,
None
,
None
)),
(
'普通打印-全表格-农业银行-10列'
,
(
1
,
None
,
None
,
5
,
None
,
8
,
7
,
6
,
None
,
None
,
None
,
3
,
4
)),
(
'普通打印-全表格-农业银行-10列-1'
,
(
2
,
None
,
4
,
5
,
None
,
3
,
7
,
6
,
None
,
None
,
None
,
None
,
None
)),
(
'普通打印-全表格-农业银行-9列'
,
(
1
,
None
,
3
,
4
,
None
,
2
,
8
,
7
,
None
,
None
,
None
,
None
,
None
)),
(
'普通打印-全表格-北京银行'
,
(
1
,
None
,
4
,
5
,
None
,
2
,
6
,
7
,
None
,
None
,
3
,
None
,
None
)),
(
'普通打印-全表格-工商银行'
,
(
1
,
None
,
9
,
10
,
None
,
7
,
None
,
None
,
None
,
None
,
None
,
None
,
None
)),
(
'普通打印-全表格-工商银行-电子账单'
,
(
1
,
None
,
9
,
10
,
None
,
7
,
11
,
12
,
None
,
None
,
None
,
None
,
None
)),
(
'普通打印-全表格-建设银行'
,
(
3
,
None
,
4
,
5
,
None
,
2
,
None
,
7
,
None
,
None
,
None
,
None
,
None
)),
(
'普通打印-全表格-微信账单'
,
(
2
,
None
,
6
,
None
,
None
,
3
,
7
,
None
,
None
,
None
,
None
,
None
,
None
)),
(
'普通打印-全表格-支付宝账单'
,
(
2
,
None
,
None
,
6
,
None
,
3
,
None
,
None
,
None
,
None
,
None
,
4
,
5
)),
(
'普通打印-无格线-交通银行'
,
(
1
,
None
,
5
,
6
,
None
,
3
,
None
,
None
,
None
,
None
,
4
,
None
,
None
)),
(
'普通打印-无格线-储蓄银行-5列'
,
(
1
,
None
,
3
,
4
,
None
,
2
,
None
,
None
,
None
,
None
,
None
,
None
,
None
)),
(
'普通打印-无格线-储蓄银行-8列'
,
(
1
,
None
,
4
,
5
,
None
,
2
,
6
,
7
,
None
,
None
,
None
,
None
,
None
)),
(
'普通打印-无格线-农业银行-扩张缩进'
,
(
1
,
2
,
5
,
6
,
None
,
4
,
None
,
None
,
None
,
None
,
None
,
None
,
None
)),
(
'普通打印-无格线-农业银行-整数'
,
(
1
,
None
,
3
,
None
,
None
,
2
,
None
,
4
,
None
,
None
,
None
,
None
,
None
)),
(
'普通打印-无格线-招商银行'
,
(
1
,
None
,
3
,
4
,
None
,
6
,
None
,
None
,
None
,
None
,
None
,
None
,
None
)),
(
'普通打印-无格线-招商银行-电子账单'
,
(
1
,
None
,
3
,
4
,
None
,
5
,
6
,
None
,
None
,
None
,
None
,
None
,
None
)),
(
'普通打印-无格线-民生银行'
,
(
None
,
None
,
5
,
6
,
None
,
None
,
7
,
None
,
8
,
None
,
None
,
None
,
None
)),
(
'普通打印-部分格线-横版-中信银行'
,
(
1
,
None
,
None
,
5
,
None
,
2
,
6
,
7
,
8
,
None
,
None
,
3
,
4
)),
(
'普通打印-部分格线-竖版-农业银行-5列'
,
(
1
,
None
,
3
,
4
,
None
,
2
,
None
,
5
,
None
,
None
,
None
,
None
,
None
)),
(
'普通打印-部分格线-竖版-农业银行-8列'
,
(
1
,
None
,
None
,
6
,
None
,
3
,
8
,
7
,
None
,
None
,
None
,
4
,
5
)),
(
'普通打印-部分格线-竖版-农业银行-窄页'
,
(
1
,
None
,
3
,
4
,
None
,
2
,
7
,
6
,
None
,
None
,
None
,
None
,
None
)),
(
'普通打印-部分格线-竖版-平安电子账单'
,
(
2
,
None
,
5
,
6
,
None
,
4
,
None
,
None
,
None
,
None
,
None
,
None
,
None
)),
(
'普通打印-部分格线-竖版-建设银行-电子账单'
,
(
5
,
None
,
6
,
7
,
None
,
2
,
None
,
9
,
None
,
None
,
None
,
None
,
None
)),
(
'其他'
,
OTHER_TUPLE
),
(
'其他'
,
OTHER_TUPLE
),
(
'其他'
,
OTHER_TUPLE
),
(
'其他'
,
OTHER_TUPLE
),
(
'其他'
,
OTHER_TUPLE
),
(
'其他'
,
OTHER_TUPLE
),
(
'针式打印-全格线-建设银行'
,
OTHER_TUPLE
),
(
'针式打印-部分格线-竖版-邮储银行'
,
(
2
,
None
,
5
,
6
,
None
,
4
,
None
,
7
,
None
,
None
,
None
,
None
,
None
)),
(
'针式打印-部分格线-竖版-邮储银行-绿卡'
,
(
2
,
None
,
5
,
6
,
None
,
4
,
None
,
7
,
None
,
None
,
None
,
None
,
None
)),
(
'其他'
,
OTHER_TUPLE
),
]
# ----------license相关------------------------------------------------------------------------------------------------
# "0":"AVT Invioce",
# "1":"二手车发票",
# "2":"其他",
# "3":"护照",
# "28":"机动车登记证",
# "29":"机动车销售统一发票",
# "30":"港澳通行证",
# "31":"营业执照",
# "32":"行驶证",
# "33":"身份证",
# "37":"银行卡"
# 其他
OTHER_CLASSIFY
=
2
# 身份证
IC_CN_NAME
=
'身份证'
IC_CLASSIFY
=
33
IC_FIELD_ORDER_0
=
((
'姓名'
,
'姓名'
),
(
'公民身份号码'
,
'公民身份号码'
),
(
'出生年月'
,
'出生年月'
),
(
'住址'
,
'住址'
),
(
'性别'
,
'性别'
),
(
'民族'
,
'民族'
),)
IC_FIELD_ORDER_1
=
((
'有效期限'
,
'有效期限'
),
(
'签发机关'
,
'签发机关'
),)
# 居住证
RP_CN_NAME
=
'居住证'
RP_CLASSIFY
=
10087
RP_FIELD_ORDER_0
=
((
'姓名'
,
'姓名'
),
(
'公民身份号码'
,
'公民身份号码'
),
(
'出生年月'
,
'出生年月'
),
(
'住址'
,
'住址'
),
(
'性别'
,
'性别'
),)
RP_FIELD_ORDER_1
=
IC_FIELD_ORDER_1
# 增值税发票
VAT_CN_NAME
=
'VAT普票'
VAT_CLASSIFY
=
0
VAT_FIELD_ORDER
=
((
'发票代码'
,
'发票代码'
),
(
'发票代码(开具)'
,
'发票代码(开具)'
),
(
'发票号码'
,
'发票号码'
),
(
'发票号码(开具)'
,
'发票号码(开具)'
),
(
'开票日期'
,
'开票日期'
),
(
'校验码'
,
'校验码'
),
(
'货物或应税劳务、服务名称'
,
'货物或应税劳务、服务名称'
),
(
'金额合计'
,
'开具金额合计(不含税)'
),
(
'税率'
,
'税率'
),
(
'税额合计'
,
'税额合计'
),
(
'价税合计小写'
,
'价税合计(小写)'
),
(
'价税合计大写'
,
'价税合计(大写)'
),
(
'购方名称'
,
'购买方名称'
),
(
'购方纳税人识别号'
,
'购买方纳税人识别号'
),
(
'购方地址、电话'
,
'购买方地址、电话'
),
(
'购方开户行及账号'
,
'购买方开户行及账号'
),
(
'销方名称'
,
'销售方名称'
),
(
'销方纳税人识别号'
,
'销售方纳税人识别号'
),
(
'销方地址、电话'
,
'销售方地址、电话'
),
(
'销方开户行及账号'
,
'销售方开户行及账号'
),
(
'销售方:(章)'
,
'销售方:(章)'
),
(
'备注'
,
'备注'
),)
# 机动车登记证书
MVC_CN_NAME
=
'机动车登记证书'
MVC_CLASSIFY
=
28
MVC_CLASSIFY_SE
=
10086
MVC_FIELD_ORDER_1_2
=
((
'1.机动车所有人/身份证名称/号码'
,
'机动车所有人/身份证明名称/号码'
),
(
'3.登记日期'
,
'登记日期'
),
(
'9.车辆识别代号/车架号'
,
'车辆识别代号/车架号'
),
(
'32.车辆出厂日期'
,
'车辆出厂日期'
),
(
'34.发证日期'
,
'发证日期'
),
(
'30.使用性质'
,
'使用性质'
),
(
'31.车辆获得方式'
,
'车辆获得方式'
),
(
'4.机动车登记编号'
,
'机动车登记编号'
),
(
'空行占位'
,
None
),
(
'5.车辆类型'
,
'车辆类型'
),
(
'6.车辆品牌'
,
'车辆品牌'
),
(
'7.车辆型号'
,
'车辆型号'
),
(
'8.车身颜色'
,
'车身颜色'
),
(
'10.国产/进口'
,
'国产/进口'
),
(
'11.发动机号'
,
'发动机号'
),
(
'12.发动机型号'
,
'发动机型号'
),
(
'15.制造厂名称'
,
'制造厂名称'
),
(
'2.登记机关'
,
'登记机关'
),
(
'编号'
,
'机动车登记证书编号'
),)
MVC_FIELD_ORDER_3_4
=
(
(
'姓名/名称'
,
'姓名/名称'
),
(
'身份证明名称/号码'
,
'身份证明名称/号码'
),
(
'转移登记日期'
,
'转移登记日期'
),
)
MVC_SE_FIELD_ORDER_1_2
=
((
'9.车辆识别代号/车架号'
,
'车辆识别代号/车架号'
),
(
'1.机动车所有人/身份证名称/号码'
,
'机动车所有人/身份证明名称/号码'
),
(
'空行占位'
,
None
),
(
'3.登记日期'
,
'登记日期'
),
(
'32.车辆出厂日期'
,
'车辆出厂日期'
),
(
'34.发证日期'
,
'发证日期'
),
(
'30.使用性质'
,
'使用性质'
),
(
'31.车辆获得方式'
,
'车辆获得方式'
),
(
'5.车辆类型'
,
'车辆类型'
),
(
'6.车辆品牌'
,
'车辆品牌'
),
(
'7.车辆型号'
,
'车辆型号'
),
(
'8.车身颜色'
,
'车身颜色'
),
(
'10.国产/进口'
,
'国产/进口'
),
(
'11.发动机号'
,
'发动机号'
),
(
'12.发动机型号'
,
'发动机型号'
),
(
'13.燃料种类'
,
'燃料种类'
),
(
'14.排量/功率'
,
'排量/功率'
),
(
'15.制造厂名称'
,
'制造厂名称'
),
(
'16.转向形式'
,
'转向形式'
),
(
'17.轮距'
,
'轮距'
),
(
'18.轮胎数'
,
'轮胎数'
),
(
'19.轮胎规格'
,
'轮胎规格'
),
(
'20.钢板弹簧片数'
,
'钢板弹簧片数'
),
(
'21.轴距'
,
'轴距'
),
(
'22.轴数'
,
'轴数'
),
(
'23.外廓尺寸'
,
'外廓尺寸'
),
(
'24.货厢内部尺寸'
,
'货厢内部尺寸'
),
(
'25.总质量'
,
'总质量'
),
(
'26.核定载质量'
,
'核定载质量'
),
(
'27.核定载客'
,
'核定载客'
),
(
'28.准牵引总质量'
,
'准牵引总质量'
),
(
'29.驾驶室载客'
,
'驾驶室载客'
),
(
'2.登记机关'
,
'登记机关'
),
(
'4.机动车登记编号'
,
'机动车登记编号'
),
(
'编号'
,
'机动车登记证书编号'
),)
MVC_SE_FIELD_ORDER_3_4
=
(
(
'姓名/名称'
,
'姓名/名称'
),
(
'身份证明名称/号码'
,
'身份证明名称/号码'
),
(
'转移登记日期'
,
'转移登记日期'
),
)
# 机动车销售统一发票
MVI_CN_NAME
=
'机动车销售统一发票'
MVI_CLASSIFY
=
29
MVI_FIELD_ORDER
=
((
'发票代码'
,
'发票代码'
),
(
'发票号码'
,
'发票号码'
),
(
'开票日期'
,
'开票日期'
),
(
'不含税价'
,
'不含税价'
),
(
'发票类型'
,
'发票联'
),
(
'购方名称'
,
'购买方名称'
),
(
'购买方身份证号或组织机构代码'
,
'购买方证件号码'
),
(
'纳税人识别号'
,
'纳税人识别号'
),
# nodo
(
'车辆识别代码'
,
'车架号'
),
(
'价税合计小写'
,
'价税合计小写'
),
(
'销方名称'
,
'销货单位名称'
),
(
'增值税税额'
,
'增值税税额'
),
(
'增值税税率'
,
'增值税税率'
),
# nodo
(
'发票章有无'
,
'发票章有无'
),
# nodo 全国统一发票监制章 销售单位章
(
'价税合计大写'
,
'价税合计大写'
),
# nodo
(
''
,
None
),
(
'发动机号码'
,
'发动机号'
),
(
'车辆类型'
,
'车辆类型'
),
# nodo
(
'厂牌型号'
,
'厂牌型号'
),
# nodo
(
'产地'
,
'产地'
),
# nodo
(
'合格证号'
,
'合格证号'
),
# nodo
(
'进口证明书号'
,
'进口证明书号'
),
# nodo
(
'商检单号'
,
'商检单号'
),
# nodo
(
'电话'
,
'电话'
),
# nodo
(
'销方纳税人识别号'
,
'销货方纳税人识别号'
),
(
'账号'
,
'账号'
),
# nodo
(
'地址'
,
'地址'
),
# nodo
(
'开户银行'
,
'开户银行'
),
# nodo
(
'主管税务机关及代码'
,
'主管税务机关及代码'
),
# nodo
(
'吨位'
,
'吨位'
),
# nodo
(
'限乘人数'
,
'限乘人数'
),)
# nodo
IC_PID
=
VAT_PID
=
MVC_PID
=
MVI_PID
=
None
# 营业执照
BL_CN_NAME
=
'营业执照'
BL_CLASSIFY
=
31
BL_PID
=
41
BL_FIELD_ORDER
=
((
'注册号'
,
'统一社会信用代码'
),
(
'企业名称'
,
'名称'
),
(
'企业类型'
,
'类型'
),
(
'经营者姓名'
,
'法定代表人'
),
(
'成立日期'
,
'成立日期'
),
(
'营业期限'
,
'营业期限'
),
(
'注册资本'
,
'注册资本'
),
(
'地址'
,
'住所'
),
(
'经营范围'
,
'经营范围'
),)
# 二手车发票
UCI_CN_NAME
=
'二手车发票'
UCI_CLASSIFY
=
1
UCI_PID
=
60
UCI_FIELD_ORDER
=
((
'发票代码'
,
'发票代码'
),
(
'发票号码'
,
'发票号码'
),
(
'开票日期'
,
'开票日期'
),
(
'车价合计'
,
'车价合计小写'
),
(
'发票联'
,
'发票联'
),
(
'购方单位'
,
'买方单位/个人'
),
(
'购方号码'
,
'买方单位代码/身份证号码'
),
(
'车架号码'
,
'车架号'
),
(
'车价合计大写'
,
'车价合计大写'
),
(
'二手车市场'
,
'二手车市场'
),
(
'发票章有无'
,
'发票章有无'
),
(
'空行占位'
,
None
),
(
'车牌照号'
,
'车牌照号'
),
(
'登记证号'
,
'登记证号'
),
(
'购方地址'
,
'买方单位/住址'
),
(
'车辆类型'
,
'车辆类型'
),
(
'厂牌型号'
,
'厂牌型号'
),
(
'车管所名称'
,
'转入地车辆管理所名称'
),
(
'销方名称'
,
'卖方单位/个人'
),
(
'销方号码'
,
'卖方单位代码/身份证号码'
),
(
'销方地址'
,
'卖方单位/个人住址'
),)
# 港澳台通行证
EEP_CN_NAME
=
'港澳台通行证'
EEP_CLASSIFY
=
30
EEP_PID
=
1018
EEP_FIELD_ORDER
=
((
'中文名'
,
'姓名'
),
# 英文名
(
'证件号码'
,
'证件号码'
),
(
'签发次数'
,
'换证次数(签发次数)'
),
(
'有效期限'
,
'有效期限'
),
(
'出生日期'
,
'出生日期'
),
(
'性别'
,
'性别'
),
(
'签发机关'
,
'签发机关'
),
(
'签发地点'
,
'签发地点'
),)
# 行驶证
DL_CN_NAME
=
'行驶证'
DL_CLASSIFY
=
32
DL_PID
=
5
DL_FIELD_ORDER_0
=
((
'号牌号码'
,
'1 号牌号码'
),
(
'所有人'
,
'3 所有人'
),
(
'使用性质'
,
'5 使用性质'
),
(
'车辆识别代码'
,
'7 车辆识别代号'
),
(
'注册日期'
,
'9 注册日期'
),
(
'发证日期'
,
'10 发证日期'
),
(
'车辆类型'
,
'2 车辆类型'
),
(
'地址'
,
'4 住址'
),
(
'品牌型号'
,
'6 品牌型号'
),
(
'发动机号'
,
'8 发动机号码'
),)
DL_FIELD_ORDER_1
=
((
'号牌号码'
,
'1 号牌号码'
),
(
'档案编号'
,
'11 档案编号'
),
(
'核定载人数'
,
'12 核定载人数'
),
(
'总质量'
,
'13 总质量'
),
(
'整备质量'
,
'14 整备质量'
),
(
'核定载质量'
,
'15 核对载质量'
),
(
'外廓尺寸'
,
'16 外廓尺寸'
),
(
'准牵引总质量'
,
'17 准牵引总质量'
),)
# 护照
PP_CN_NAME
=
'护照'
PP_CLASSIFY
=
3
PP_PID
=
8
PP_FIELD_ORDER
=
((
'类型'
,
'类型/Type'
),
(
'英文姓名'
,
'姓名/Name'
),
(
'护照号码'
,
'护照号码/Passport No'
),
(
'有效期至'
,
'有效期至/Date of expiry'
),
(
'签发日期'
,
'签发日期/Date of issue'
),
(
'国家码'
,
'国家码/Country Code'
),
(
'性别'
,
'性别/Sex'
),
(
'国籍'
,
'国籍/Nationality'
),
(
'出生日期'
,
'出生日期/Date of birth'
),
(
'出生地点'
,
'出生地点/Place of birth'
),
(
'签发地点'
,
'签发地点/Place of issue'
),)
# 银行卡
BC_CN_NAME
=
'银行卡'
BC_CLASSIFY
=
37
BC_PID
=
4
# BC_FIELD = (('CardNum', '银行卡号'),
# ('BankName', '发卡行名称'),
# ('CardName', '银行卡名称'),
# ('BankCode', '发卡行代号'),
# ('CardType', '银行卡类型'),
# ('Date', '日期'))
BC_FIELD_ORDER
=
((
'BankName'
,
'发卡行名称'
),
(
'CardNum'
,
'银行卡号'
),
(
'CardType'
,
'银行卡类型'
),)
SUCCESS_CODE_SET
=
{
'0'
,
0
}
FIELD_ORDER_MAP
=
{
IC_CLASSIFY
:
(
'有效期限'
,
IC_FIELD_ORDER_1
,
IC_FIELD_ORDER_0
),
RP_CLASSIFY
:
(
'有效期限'
,
RP_FIELD_ORDER_1
,
RP_FIELD_ORDER_0
),
DL_CLASSIFY
:
(
'档案编号'
,
DL_FIELD_ORDER_1
,
DL_FIELD_ORDER_0
),
MVC_CLASSIFY
:
(
'转移登记日期'
,
MVC_FIELD_ORDER_3_4
,
MVC_FIELD_ORDER_1_2
),
MVC_CLASSIFY_SE
:
(
'转移登记日期'
,
MVC_SE_FIELD_ORDER_3_4
,
MVC_SE_FIELD_ORDER_1_2
)
}
LICENSE_ORDER
=
((
MVI_CLASSIFY
,
(
MVI_PID
,
MVI_CN_NAME
,
MVI_FIELD_ORDER
,
False
,
False
)),
(
IC_CLASSIFY
,
(
IC_PID
,
IC_CN_NAME
,
None
,
True
,
False
)),
(
RP_CLASSIFY
,
(
None
,
RP_CN_NAME
,
None
,
True
,
False
)),
(
BC_CLASSIFY
,
(
BC_PID
,
BC_CN_NAME
,
BC_FIELD_ORDER
,
False
,
False
)),
(
BL_CLASSIFY
,
(
BL_PID
,
BL_CN_NAME
,
BL_FIELD_ORDER
,
False
,
False
)),
(
UCI_CLASSIFY
,
(
UCI_PID
,
UCI_CN_NAME
,
UCI_FIELD_ORDER
,
False
,
False
)),
(
EEP_CLASSIFY
,
(
EEP_PID
,
EEP_CN_NAME
,
EEP_FIELD_ORDER
,
False
,
False
)),
(
DL_CLASSIFY
,
(
DL_PID
,
DL_CN_NAME
,
None
,
True
,
False
)),
(
PP_CLASSIFY
,
(
PP_PID
,
PP_CN_NAME
,
PP_FIELD_ORDER
,
False
,
False
)),
(
MVC_CLASSIFY
,
(
MVC_PID
,
MVC_CN_NAME
,
None
,
True
,
True
)),
(
VAT_CLASSIFY
,
(
VAT_PID
,
VAT_CN_NAME
,
VAT_FIELD_ORDER
,
False
,
False
)))
LICENSE_CLASSIFY_MAPPING
=
dict
(
LICENSE_ORDER
)
OTHER_CLASSIFY_SET
=
{
OTHER_CLASSIFY
}
LICENSE_CLASSIFY_SET_1
=
{
IC_CLASSIFY
,
VAT_CLASSIFY
,
MVC_CLASSIFY
,
MVI_CLASSIFY
}
LICENSE_CLASSIFY_SET_2
=
{
BL_CLASSIFY
,
UCI_CLASSIFY
,
EEP_CLASSIFY
,
DL_CLASSIFY
,
PP_CLASSIFY
,
BC_CLASSIFY
}
src/apps/doc/ocr/wb.py
View file @
2a72302
...
...
@@ -30,51 +30,114 @@ class BSWorkbook(Workbook):
self
.
MAX_MEAN
=
31
@staticmethod
def
sheet_prune
(
ws
,
classify
):
ws
.
insert_cols
(
1
,
amount
=
consts
.
FIXED_COL_AMOUNT
)
moved_col_set
=
set
()
header_col_set
=
set
()
# 根据第一行关键词排列
for
col
in
range
(
consts
.
FIXED_COL_AMOUNT
+
1
,
ws
.
max_column
+
1
):
header_value
=
ws
.
cell
(
1
,
col
)
.
value
def
header_collect
(
ws
,
sheet_header_info
,
header_info
):
# sheet_header_info = {
# 'sheet_name': {
# 'summary_col': 1,
# 'date_col': 1,
# 'amount_col': 1,
# 'over_col': 1,
# 'income_col': 1,
# 'outlay_col': 1,
# 'borrow_col': 1,
# 'min_row': 2,
# 'find_count': 3,
# 'find_col': {1},
# 'header': ('日期', '金额')
# }
# }
# header_info = {
# 'summary_col': {
# 5: 2,
# 3: 1,
# },
# 'date_col': {},
# 'amount_col': {},
# 'over_col': {},
# 'income_col': {},
# 'outlay_col': {},
# 'borrow_col': {},
# }
# 第一行关键词
find_count
=
0
for
first_row
in
ws
.
iter_rows
(
max_row
=
1
,
min_row
=
1
,
values_only
=
True
):
sheet_header_info
.
setdefault
(
ws
.
title
,
{})
.
setdefault
(
consts
.
HEADER_KEY
,
first_row
)
for
idx
,
header_value
in
enumerate
(
first_row
):
header_col
=
consts
.
HEADERS_MAPPING
.
get
(
header_value
)
if
header_col
is
not
None
and
header_col
not
in
header_col_set
:
letter
=
get_column_letter
(
col
)
ws
.
move_range
(
"{0}1:{0}{1}"
.
format
(
letter
,
ws
.
max_row
),
cols
=
header_col
-
col
)
moved_col_set
.
add
(
col
)
header_col_set
.
add
(
header_col
)
elif
header_value
in
consts
.
BORROW_HEADERS_SET
:
letter
=
get_column_letter
(
col
)
ws
.
move_range
(
"{0}1:{0}{1}"
.
format
(
letter
,
ws
.
max_row
),
cols
=
consts
.
BORROW_HEADER_COL
-
col
)
moved_col_set
.
add
(
col
)
header_col_set
.
add
(
consts
.
BORROW_HEADER_COL
)
elif
header_value
in
consts
.
INCOME_HEADERS_SET
:
letter
=
get_column_letter
(
col
)
ws
.
move_range
(
"{0}1:{0}{1}"
.
format
(
letter
,
ws
.
max_row
),
cols
=
consts
.
INCOME_HEADER_COL
-
col
)
moved_col_set
.
add
(
col
)
header_col_set
.
add
(
consts
.
INCOME_HEADER_COL
)
elif
header_value
in
consts
.
OUTLAY_HEADERS_SET
:
letter
=
get_column_letter
(
col
)
ws
.
move_range
(
"{0}1:{0}{1}"
.
format
(
letter
,
ws
.
max_row
),
cols
=
consts
.
OUTLAY_HEADER_COL
-
col
)
moved_col_set
.
add
(
col
)
header_col_set
.
add
(
consts
.
OUTLAY_HEADER_COL
)
# 缺失表头再次查找
for
header_col
in
range
(
1
,
consts
.
FIXED_COL_AMOUNT
+
1
):
if
header_col
in
header_col_set
or
header_col
==
consts
.
RESULT_HEADER_COL
:
continue
fix_col
=
consts
.
CLASSIFY_LIST
[
classify
][
1
][
header_col
-
1
]
if
fix_col
is
None
:
if
header_col
is
not
None
:
find_count
+=
1
sheet_header_info
.
setdefault
(
ws
.
title
,
{})
.
setdefault
(
header_col
,
idx
)
find_col_set
=
sheet_header_info
.
setdefault
(
ws
.
title
,
{})
.
setdefault
(
consts
.
FIND_COL_KEY
,
set
())
find_col_set
.
add
(
idx
)
col_count
=
header_info
.
setdefault
(
header_col
,
{})
.
get
(
idx
)
header_info
.
setdefault
(
header_col
,
{})[
idx
]
=
1
if
col_count
is
None
else
col_count
+
1
sheet_header_info
.
setdefault
(
ws
.
title
,
{})
.
setdefault
(
consts
.
FIND_COUNT_KEY
,
find_count
)
min_row
=
1
if
find_count
==
0
else
2
sheet_header_info
.
setdefault
(
ws
.
title
,
{})
.
setdefault
(
consts
.
MIN_ROW_KEY
,
min_row
)
@staticmethod
def
header_statistics
(
sheet_header_info
,
header_info
,
classify
):
# statistics_header_info = {
# SUMMARY_KEY: 2,
# DATE_KEY: 3,
# AMOUNT_KEY: 4,
# OVER_KEY: 5,
# IMCOME_KEY: 6,
# OUTLAY_KEY: 7,
# BORROW_KEY: 8,
# 'header': ('日期', '金额')
# }
statistics_header_info
=
{}
sheet_order_list
=
sorted
(
sheet_header_info
,
reverse
=
True
,
key
=
lambda
x
:
sheet_header_info
[
x
][
consts
.
FIND_COUNT_KEY
])
best_sheet_info
=
sheet_header_info
.
get
(
sheet_order_list
[
0
])
if
best_sheet_info
.
get
(
consts
.
FIND_COUNT_KEY
,
0
)
==
0
:
for
key
,
value
in
consts
.
CLASSIFY_MAP
.
items
():
col
=
consts
.
CLASSIFY_LIST
[
classify
][
1
][
value
]
-
1
statistics_header_info
[
key
]
=
col
statistics_header_info
[
consts
.
HEADER_KEY
]
=
consts
.
CLASSIFY_HEADER_LIST
[
classify
]
else
:
find_col_set
=
best_sheet_info
.
get
(
consts
.
FIND_COL_KEY
,
set
())
# SUMMARY_KEY DATE_KEY OVER_KEY BORROW_KEY
for
key
in
consts
.
KEY_LIST
:
col
=
best_sheet_info
.
get
(
key
)
if
col
is
None
:
col_dict
=
header_info
.
get
(
key
,
{})
for
idx
in
sorted
(
col_dict
,
key
=
lambda
x
:
col_dict
[
x
],
reverse
=
True
):
if
idx
in
find_col_set
:
continue
fix_col
=
fix_col
+
consts
.
FIXED_COL_AMOUNT
if
fix_col
in
moved_col_set
:
col
=
idx
find_col_set
.
add
(
col
)
break
letter
=
get_column_letter
(
fix_col
)
ws
.
move_range
(
"{0}1:{0}{1}"
.
format
(
letter
,
ws
.
max_row
),
cols
=
header_col
-
fix_col
)
else
:
fixed_col
=
consts
.
CLASSIFY_LIST
[
classify
][
1
][
consts
.
CLASSIFY_MAP
[
key
]]
-
1
if
fixed_col
not
in
find_col_set
:
col
=
fixed_col
find_col_set
.
add
(
col
)
statistics_header_info
[
key
]
=
col
statistics_header_info
[
consts
.
HEADER_KEY
]
=
best_sheet_info
.
get
(
consts
.
HEADER_KEY
)
return
statistics_header_info
ws
.
delete_cols
(
consts
.
FIXED_COL_AMOUNT
+
1
,
amount
=
ws
.
max_column
)
min_row
=
1
if
len
(
moved_col_set
)
==
0
else
2
return
min_row
@staticmethod
def
get_data_col_min_row
(
sheet
,
sheet_header_info
,
header_info
,
classify
):
date_col
=
sheet_header_info
.
get
(
sheet
,
{})
.
get
(
consts
.
DATE_KEY
)
if
date_col
is
None
:
date_col_dict
=
header_info
.
get
(
consts
.
DATE_KEY
,
{})
find_col_set
=
sheet_header_info
.
get
(
sheet
,
{})
.
get
(
consts
.
FIND_COL_KEY
,
set
())
for
idx
in
sorted
(
date_col_dict
,
key
=
lambda
x
:
date_col_dict
[
x
],
reverse
=
True
):
if
idx
in
find_col_set
:
continue
date_col
=
idx
break
else
:
fixed_col
=
consts
.
CLASSIFY_LIST
[
classify
][
1
][
consts
.
CLASSIFY_MAP
[
consts
.
DATE_KEY
]]
if
fixed_col
not
in
find_col_set
:
date_col
=
fixed_col
min_row
=
sheet_header_info
.
get
(
sheet
,
{})
.
get
(
consts
.
MIN_ROW_KEY
,
2
)
return
date_col
,
min_row
@staticmethod
def
month_split
(
dti
,
date_list
,
date_statistics
):
...
...
@@ -122,8 +185,14 @@ class BSWorkbook(Workbook):
reverse_trend
=
-
1
return
reverse_trend
def
sheet_split
(
self
,
ws
,
month_mapping
,
reverse_trend_list
,
min_row
,
date_list
,
date_statistics
):
for
date_tuple_src
in
ws
.
iter_cols
(
min_col
=
1
,
max_col
=
1
,
min_row
=
min_row
,
values_only
=
True
):
def
sheet_split
(
self
,
ws
,
date_col
,
min_row
,
month_mapping
,
reverse_trend_list
,
date_list
,
date_statistics
):
if
date_col
is
None
:
# month_info process
month_info
=
month_mapping
.
setdefault
(
'xxxx-xx'
,
[])
month_info
.
append
((
ws
.
title
,
min_row
,
ws
.
max_row
,
0
))
return
date_col
=
date_col
+
1
for
date_tuple_src
in
ws
.
iter_cols
(
min_col
=
date_col
,
max_col
=
date_col
,
min_row
=
min_row
,
values_only
=
True
):
date_tuple
=
[
date
[:
10
]
if
isinstance
(
date
,
str
)
else
date
for
date
in
date_tuple_src
]
dt_array
,
tz_parsed
=
tslib
.
array_to_datetime
(
np
.
array
(
date_tuple
,
copy
=
False
,
dtype
=
np
.
object_
),
...
...
@@ -199,11 +268,12 @@ class BSWorkbook(Workbook):
return
amount_str
# 1.替换
res_str
=
amount_str
.
translate
(
consts
.
TRANS
)
# 2.删除多余的-
res_str
=
res_str
[
0
]
+
res_str
[
1
:]
.
replace
(
'-'
,
''
)
# 3.首字符处理
if
res_str
[
0
]
in
consts
.
ERROR_CHARS
:
res_str
=
'-{0}'
.
format
(
res_str
[
1
:])
# 2.首字符处理
first_char
=
res_str
[
0
]
if
first_char
in
consts
.
ERROR_CHARS
:
first_char
=
'-'
# 3.删除多余的-
res_str
=
first_char
+
res_str
[
1
:]
.
replace
(
'-'
,
''
)
# 4.逗号与句号处理
if
len
(
res_str
)
>=
4
:
period_idx
=
len
(
res_str
)
-
3
...
...
@@ -213,13 +283,29 @@ class BSWorkbook(Workbook):
res_str
=
'{0}.{1}'
.
format
(
res_str
[:
period_idx
],
res_str
[
period_idx
+
1
:])
return
res_str
def
build_month_sheet
(
self
,
card
,
month_mapping
,
ms
,
is_reverse
):
def
build_month_sheet
(
self
,
ms
,
card
,
month_mapping
,
is_reverse
,
statistics_header_info
):
summary_cell_idx
=
statistics_header_info
.
get
(
consts
.
SUMMARY_KEY
)
date_cell_idx
=
statistics_header_info
.
get
(
consts
.
DATE_KEY
)
amount_cell_idx
=
statistics_header_info
.
get
(
consts
.
AMOUNT_KEY
)
# None or src or append
over_cell_idx
=
statistics_header_info
.
get
(
consts
.
OVER_KEY
)
income_cell_idx
=
statistics_header_info
.
get
(
consts
.
IMCOME_KEY
)
outlay_cell_idx
=
statistics_header_info
.
get
(
consts
.
OUTLAY_KEY
)
borrow_cell_idx
=
statistics_header_info
.
get
(
consts
.
BORROW_KEY
)
header
=
list
(
statistics_header_info
.
get
(
consts
.
HEADER_KEY
))
add_col
=
[
'核对结果'
]
if
amount_cell_idx
is
None
:
if
income_cell_idx
is
not
None
or
outlay_cell_idx
is
not
None
:
add_col
=
[
'金额'
,
'核对结果'
]
amount_cell_idx
=
len
(
header
)
header
.
extend
(
add_col
)
tmp_ws
=
self
.
create_sheet
(
'tmp_ws'
)
for
month
in
sorted
(
month_mapping
.
keys
()):
# 3.1.拷贝数据
parts
=
month_mapping
.
get
(
month
)
new_ws
=
self
.
create_sheet
(
'{0}({1})'
.
format
(
month
,
card
[
-
6
:]))
new_ws
.
append
(
consts
.
FIXED_HEADERS
)
new_ws
.
append
(
header
)
for
part
in
parts
:
ws
=
self
.
get_sheet_by_name
(
part
[
0
])
for
row_value
in
ws
.
iter_rows
(
min_row
=
part
[
1
],
max_row
=
part
[
2
],
values_only
=
True
):
...
...
@@ -227,76 +313,95 @@ class BSWorkbook(Workbook):
# 3.2.提取信息、高亮
amount_mapping
=
{}
amount_fill_row
=
set
()
for
rows
in
new_ws
.
iter_rows
(
min_row
=
2
):
summary_cell
=
rows
[
consts
.
SUMMARY_IDX
]
date_cell
=
rows
[
consts
.
DATE_IDX
]
amount_cell
=
rows
[
consts
.
AMOUNT_IDX
]
row
=
summary_cell
.
row
# TODO 删除空行
summary_cell
=
None
if
summary_cell_idx
is
None
else
rows
[
summary_cell_idx
]
date_cell
=
None
if
summary_cell_idx
is
None
else
rows
[
date_cell_idx
]
amount_cell
=
None
if
summary_cell_idx
is
None
else
rows
[
amount_cell_idx
]
over_cell
=
None
if
summary_cell_idx
is
None
else
rows
[
over_cell_idx
]
income_cell
=
None
if
summary_cell_idx
is
None
else
rows
[
income_cell_idx
]
outlay_cell
=
None
if
summary_cell_idx
is
None
else
rows
[
outlay_cell_idx
]
borrow_cell
=
None
if
summary_cell_idx
is
None
else
rows
[
borrow_cell_idx
]
summary_cell_value
=
None
if
summary_cell
is
None
else
summary_cell
.
value
date_cell_value
=
None
if
summary_cell
is
None
else
date_cell
.
value
amount_cell_value
=
None
if
summary_cell
is
None
else
amount_cell
.
value
over_cell_value
=
None
if
summary_cell
is
None
else
over_cell
.
value
income_cell_value
=
None
if
summary_cell
is
None
else
income_cell
.
value
outlay_cell_value
=
None
if
summary_cell
is
None
else
outlay_cell
.
value
borrow_cell_value
=
None
if
summary_cell
is
None
else
borrow_cell
.
value
# row = summary_cell.row
if
summary_cell
is
not
None
:
# 关键词1提取
if
summary_cell
.
value
in
self
.
interest_keyword
:
ms
.
append
((
summary_cell
.
value
,
date_cell
.
value
,
amount_cell
.
value
))
if
summary_cell_
value
in
self
.
interest_keyword
:
ms
.
append
((
summary_cell_value
,
date_cell_value
,
amount_cell_
value
))
# 关键词2提取至临时表
elif
summary_cell
.
value
in
self
.
salary_keyword
:
tmp_ws
.
append
((
summary_cell
.
value
,
date_cell
.
value
,
amount_cell
.
value
))
elif
summary_cell_
value
in
self
.
salary_keyword
:
tmp_ws
.
append
((
summary_cell_value
,
date_cell_value
,
amount_cell_
value
))
# 贷款关键词高亮
elif
summary_cell
.
value
in
self
.
loan_keyword
:
elif
summary_cell_
value
in
self
.
loan_keyword
:
summary_cell
.
fill
=
self
.
loan_fill
amount_error
=
False
# 3.3.余额转数值
over_cell
=
rows
[
consts
.
OVER_IDX
]
over_success
=
False
if
over_cell
is
not
None
:
try
:
over_cell
.
value
=
locale
.
atof
(
self
.
amount_format
(
over_cell
.
value
))
over_cell
.
value
=
locale
.
atof
(
self
.
amount_format
(
over_cell_
value
))
except
Exception
as
e
:
amount_error
=
True
pass
else
:
over_success
=
True
over_cell
.
number_format
=
numbers
.
FORMAT_NUMBER_00
# 3.4.金额转数值
amount_success
=
False
if
amount_cell
is
not
None
:
try
:
try
:
amount_cell
.
value
=
locale
.
atof
(
self
.
amount_format
(
amount_cell
.
value
))
amount_cell
.
value
=
locale
.
atof
(
self
.
amount_format
(
amount_cell_
value
))
except
Exception
as
e
:
try
:
amount_cell
.
value
=
locale
.
atof
(
self
.
amount_format
(
rows
[
consts
.
INCOME_IDX
]
.
value
))
amount_cell
.
value
=
locale
.
atof
(
self
.
amount_format
(
income_cell_
value
))
if
amount_cell
.
value
==
0
:
raise
elif
amount_cell
.
value
<
0
:
amount_cell
.
value
=
-
amount_cell
.
value
except
Exception
as
e
:
amount_cell
.
value
=
locale
.
atof
(
self
.
amount_format
(
rows
[
consts
.
OUTLAY_IDX
]
.
value
))
amount_cell
.
value
=
locale
.
atof
(
self
.
amount_format
(
outlay_cell_
value
))
if
amount_cell
.
value
>
0
:
amount_cell
.
value
=
-
amount_cell
.
value
except
Exception
as
e
:
amount_error
=
True
pass
else
:
if
rows
[
consts
.
BORROW_IDX
]
.
value
in
consts
.
BORROW_OUTLAY_SET
:
amount_success
=
True
if
borrow_cell_value
in
consts
.
BORROW_OUTLAY_SET
:
amount_cell
.
value
=
-
amount_cell
.
value
amount_cell
.
number_format
=
numbers
.
FORMAT_NUMBER_00
same_amount_mapping
=
amount_mapping
.
get
(
date_cell
.
value
,
{})
fill_rows
=
same_amount_mapping
.
get
(
-
amount_cell
.
value
)
if
fill_rows
:
amount_fill_row
.
add
(
row
)
amount_fill_row
.
add
(
amount_cell
.
row
)
amount_fill_row
.
update
(
fill_rows
)
amount_mapping
.
setdefault
(
date_cell
.
value
,
{})
.
setdefault
(
amount_cell
.
value
,
[])
.
append
(
row
)
amount_cell
.
value
,
[])
.
append
(
amount_cell
.
row
)
# 3.5.核对结果
if
row
>
2
and
not
amount_error
:
if
amount_success
and
over_success
and
amount_cell
.
row
>
2
:
amount_col_letter
=
get_column_letter
(
amount_cell_idx
+
1
)
over_col_letter
=
get_column_letter
(
over_cell_idx
+
1
)
if
is_reverse
:
rows
[
consts
.
RESULT_IDX
]
.
value
=
'=IF(
D{0}=ROUND(SUM(D{1},C{0}),2), "{2}", "{3
}")'
.
format
(
row
-
1
,
row
,
*
self
.
proof_res
)
rows
[
consts
.
RESULT_IDX
]
.
value
=
'=IF(
{2}{0}=ROUND(SUM({2}{1},{3}{0}),4), "{4}", "{5
}")'
.
format
(
amount_cell
.
row
-
1
,
amount_cell
.
row
,
over_col_letter
,
amount_col_letter
,
*
self
.
proof_res
)
else
:
rows
[
consts
.
RESULT_IDX
]
.
value
=
'=IF(D{0}=ROUND(SUM(D{1},C{0}),2), "{2}", "{3}")'
.
format
(
row
,
row
-
1
,
*
self
.
proof_res
)
# 删除金额辅助列
new_ws
.
delete_cols
(
consts
.
BORROW_HEADER_COL
,
amount
=
new_ws
.
max_column
)
rows
[
consts
.
RESULT_IDX
]
.
value
=
'=IF({2}{0}=ROUND(SUM({2}{1},{3}{0}),4), "{4}", "{5}")'
.
format
(
amount_cell
.
row
,
amount_cell
.
row
-
1
,
over_col_letter
,
amount_col_letter
,
*
self
.
proof_res
)
# 3.6.同一天相同进出账高亮
del
amount_mapping
for
row
in
amount_fill_row
:
new_ws
[
row
][
consts
.
AMOUNT_IDX
]
.
fill
=
self
.
amount_fill
new_ws
[
row
][
amount_cell_idx
]
.
fill
=
self
.
amount_fill
# 关键词2信息提取
ms
.
append
(
self
.
blank_row
)
...
...
@@ -319,21 +424,27 @@ class BSWorkbook(Workbook):
# }
# }
for
card
,
summary
in
bs_summary
.
items
():
# 1.原表修剪、排列、按照月份分割
# 1.原表表头收集、按照月份分割
# 1.1 总结首行信息
classify
=
summary
.
get
(
'classify'
,
0
)
sheet_header_info
=
{}
header_info
=
{}
for
sheet
in
summary
.
get
(
'sheet'
,
[]):
ws
=
self
.
get_sheet_by_name
(
sheet
)
self
.
header_collect
(
ws
,
sheet_header_info
,
header_info
)
statistics_header_info
=
self
.
header_statistics
(
sheet_header_info
,
header_info
,
classify
)
# 1.2.按月份分割 min_row 正文第一行 date_col 日期行
start_date
=
summary
.
get
(
'start_date'
)
end_date
=
summary
.
get
(
'end_date'
)
date_statistics
=
False
if
start_date
is
None
or
end_date
is
None
:
date_statistics
=
True
date_list
=
[]
month_mapping
=
{}
reverse_trend_list
=
[]
date_statistics
=
True
if
start_date
is
None
or
end_date
is
None
else
False
# 用于判断是否需要收集各表中日期
date_list
=
[]
# 用于收集各表中日期
month_mapping
=
{}
# 用于创建月份表
reverse_trend_list
=
[]
# 用于判断倒序与正序
for
sheet
in
summary
.
get
(
'sheet'
,
[]):
ws
=
self
.
get_sheet_by_name
(
sheet
)
# 1.1.删除多余列、排列
min_row
=
self
.
sheet_prune
(
ws
,
summary
.
get
(
'classify'
,
0
))
# 1.2.按月份分割
self
.
sheet_split
(
ws
,
month_mapping
,
reverse_trend_list
,
min_row
,
date_list
,
date_statistics
)
date_col
,
min_row
=
self
.
get_data_col_min_row
(
sheet
,
sheet_header_info
,
header_info
,
classify
)
self
.
sheet_split
(
ws
,
date_col
,
min_row
,
month_mapping
,
reverse_trend_list
,
date_list
,
date_statistics
)
if
date_statistics
is
True
and
len
(
date_list
)
>
1
:
start_date
=
min
(
date_list
)
if
start_date
is
None
else
start_date
...
...
@@ -353,7 +464,7 @@ class BSWorkbook(Workbook):
for
month_list
in
month_mapping
.
values
():
month_list
.
sort
(
key
=
lambda
x
:
x
[
-
1
],
reverse
=
is_reverse
)
self
.
build_month_sheet
(
card
,
month_mapping
,
ms
,
is_reverse
)
self
.
build_month_sheet
(
ms
,
card
,
month_mapping
,
is_reverse
,
statistics_header_info
)
# 4.删除原表
for
sheet
in
summary
.
get
(
'sheet'
):
...
...
src/apps/doc/ocr/wb_bak.py
0 → 100644
View file @
2a72302
import
locale
import
numpy
as
np
from
pandas._libs
import
tslib
from
pandas._libs.tslibs.nattype
import
NaTType
from
pandas.core.indexes.datetimes
import
DatetimeIndex
from
openpyxl
import
Workbook
from
openpyxl.styles
import
Border
,
Side
,
PatternFill
,
numbers
from
openpyxl.utils
import
get_column_letter
from
apps.doc
import
consts
class
BSWorkbook
(
Workbook
):
def
__init__
(
self
,
interest_keyword
,
salary_keyword
,
loan_keyword
,
*
args
,
**
kwargs
):
super
()
.
__init__
(
*
args
,
**
kwargs
)
locale
.
setlocale
(
locale
.
LC_NUMERIC
,
'en_US.UTF-8'
)
self
.
meta_sheet_title
=
'关键信息提取和展示'
self
.
blank_row
=
(
None
,)
self
.
code_header
=
(
'页数'
,
'电子回单验证码'
)
self
.
date_header
=
(
'打印时间'
,
'起始日期'
,
'终止日期'
,
'流水区间结果'
)
self
.
keyword_header
=
(
'关键词'
,
'记账日期'
,
'金额'
)
self
.
interest_keyword
=
interest_keyword
self
.
salary_keyword
=
salary_keyword
self
.
loan_keyword
=
loan_keyword
self
.
proof_res
=
(
'对'
,
'错'
)
self
.
loan_fill
=
PatternFill
(
"solid"
,
fgColor
=
"00FFCC00"
)
self
.
amount_fill
=
PatternFill
(
"solid"
,
fgColor
=
"00FFFF00"
)
# self.bd = Side(style='thin', color="000000")
# self.border = Border(left=self.bd, top=self.bd, right=self.bd, bottom=self.bd)
self
.
MAX_MEAN
=
31
@staticmethod
def
sheet_prune
(
ws
,
classify
):
ws
.
insert_cols
(
1
,
amount
=
consts
.
FIXED_COL_AMOUNT
)
moved_col_set
=
set
()
header_col_set
=
set
()
# 根据第一行关键词排列
for
col
in
range
(
consts
.
FIXED_COL_AMOUNT
+
1
,
ws
.
max_column
+
1
):
header_value
=
ws
.
cell
(
1
,
col
)
.
value
header_col
=
consts
.
HEADERS_MAPPING
.
get
(
header_value
)
if
header_col
is
not
None
and
header_col
not
in
header_col_set
:
letter
=
get_column_letter
(
col
)
ws
.
move_range
(
"{0}1:{0}{1}"
.
format
(
letter
,
ws
.
max_row
),
cols
=
header_col
-
col
)
moved_col_set
.
add
(
col
)
header_col_set
.
add
(
header_col
)
elif
header_value
in
consts
.
BORROW_HEADERS_SET
:
letter
=
get_column_letter
(
col
)
ws
.
move_range
(
"{0}1:{0}{1}"
.
format
(
letter
,
ws
.
max_row
),
cols
=
consts
.
BORROW_HEADER_COL
-
col
)
moved_col_set
.
add
(
col
)
header_col_set
.
add
(
consts
.
BORROW_HEADER_COL
)
elif
header_value
in
consts
.
INCOME_HEADERS_SET
:
letter
=
get_column_letter
(
col
)
ws
.
move_range
(
"{0}1:{0}{1}"
.
format
(
letter
,
ws
.
max_row
),
cols
=
consts
.
INCOME_HEADER_COL
-
col
)
moved_col_set
.
add
(
col
)
header_col_set
.
add
(
consts
.
INCOME_HEADER_COL
)
elif
header_value
in
consts
.
OUTLAY_HEADERS_SET
:
letter
=
get_column_letter
(
col
)
ws
.
move_range
(
"{0}1:{0}{1}"
.
format
(
letter
,
ws
.
max_row
),
cols
=
consts
.
OUTLAY_HEADER_COL
-
col
)
moved_col_set
.
add
(
col
)
header_col_set
.
add
(
consts
.
OUTLAY_HEADER_COL
)
# 缺失表头再次查找
for
header_col
in
range
(
1
,
consts
.
FIXED_COL_AMOUNT
+
1
):
if
header_col
in
header_col_set
or
header_col
==
consts
.
RESULT_HEADER_COL
:
continue
fix_col
=
consts
.
CLASSIFY_LIST
[
classify
][
1
][
header_col
-
1
]
if
fix_col
is
None
:
continue
fix_col
=
fix_col
+
consts
.
FIXED_COL_AMOUNT
if
fix_col
in
moved_col_set
:
break
letter
=
get_column_letter
(
fix_col
)
ws
.
move_range
(
"{0}1:{0}{1}"
.
format
(
letter
,
ws
.
max_row
),
cols
=
header_col
-
fix_col
)
ws
.
delete_cols
(
consts
.
FIXED_COL_AMOUNT
+
1
,
amount
=
ws
.
max_column
)
min_row
=
1
if
len
(
moved_col_set
)
==
0
else
2
return
min_row
@staticmethod
def
month_split
(
dti
,
date_list
,
date_statistics
):
month_list
=
[]
idx_list
=
[]
month_pre
=
None
for
idx
,
month_str
in
enumerate
(
dti
.
strftime
(
'
%
Y-
%
m'
)):
if
isinstance
(
month_str
,
float
):
continue
if
month_str
!=
month_pre
:
month_list
.
append
(
month_str
)
if
month_pre
is
None
:
if
date_statistics
:
date_list
.
append
(
dti
[
idx
]
.
date
())
idx
=
0
idx_list
.
append
(
idx
)
month_pre
=
month_str
if
date_statistics
:
for
idx
in
range
(
len
(
dti
)
-
1
,
-
1
,
-
1
):
if
isinstance
(
dti
[
idx
],
NaTType
):
continue
date_list
.
append
(
dti
[
idx
]
.
date
())
break
return
month_list
,
idx_list
@staticmethod
def
get_reverse_trend
(
day_idx
,
idx_list
):
reverse_trend
=
0
pre_day
=
None
for
idx
,
day
in
enumerate
(
day_idx
):
if
np
.
isnan
(
day
):
continue
if
idx
in
idx_list
or
pre_day
is
None
:
pre_day
=
day
continue
if
day
<
pre_day
:
reverse_trend
+=
1
pre_day
=
day
elif
day
>
pre_day
:
reverse_trend
-=
1
pre_day
=
day
if
reverse_trend
>
0
:
reverse_trend
=
1
elif
reverse_trend
<
0
:
reverse_trend
=
-
1
return
reverse_trend
def
sheet_split
(
self
,
ws
,
month_mapping
,
reverse_trend_list
,
min_row
,
date_list
,
date_statistics
):
for
date_tuple_src
in
ws
.
iter_cols
(
min_col
=
1
,
max_col
=
1
,
min_row
=
min_row
,
values_only
=
True
):
date_tuple
=
[
date
[:
10
]
if
isinstance
(
date
,
str
)
else
date
for
date
in
date_tuple_src
]
dt_array
,
tz_parsed
=
tslib
.
array_to_datetime
(
np
.
array
(
date_tuple
,
copy
=
False
,
dtype
=
np
.
object_
),
errors
=
"coerce"
,
utc
=
False
,
dayfirst
=
False
,
yearfirst
=
False
,
require_iso8601
=
True
,
)
dti
=
DatetimeIndex
(
dt_array
,
tz
=
None
,
name
=
None
)
month_list
,
idx_list
=
self
.
month_split
(
dti
,
date_list
,
date_statistics
)
if
len
(
month_list
)
==
0
:
# month_info process
month_info
=
month_mapping
.
setdefault
(
'xxxx-xx'
,
[])
month_info
.
append
((
ws
.
title
,
min_row
,
ws
.
max_row
,
0
))
else
:
# reverse_trend_list process
reverse_trend
=
self
.
get_reverse_trend
(
dti
.
day
,
idx_list
)
reverse_trend_list
.
append
(
reverse_trend
)
# month_info process
day_idx
=
dti
.
day
idx_list_max_idx
=
len
(
idx_list
)
-
1
for
i
,
item
in
enumerate
(
month_list
):
if
i
==
idx_list_max_idx
:
day_mean
=
np
.
mean
(
day_idx
[
idx_list
[
i
]:]
.
dropna
())
month_mapping
.
setdefault
(
item
,
[])
.
append
(
(
ws
.
title
,
idx_list
[
i
]
+
min_row
,
ws
.
max_row
,
day_mean
))
else
:
day_mean
=
np
.
mean
(
day_idx
[
idx_list
[
i
]:
idx_list
[
i
+
1
]]
.
dropna
())
month_mapping
.
setdefault
(
item
,
[])
.
append
(
(
ws
.
title
,
idx_list
[
i
]
+
min_row
,
idx_list
[
i
+
1
]
+
min_row
-
1
,
day_mean
))
def
build_metadata_rows
(
self
,
confidence
,
code
,
print_time
,
start_date
,
end_date
):
if
start_date
is
None
or
end_date
is
None
:
timedelta
=
None
else
:
timedelta
=
(
end_date
-
start_date
)
.
days
metadata_rows
=
[
(
'流水识别置信度'
,
confidence
),
self
.
blank_row
,
self
.
code_header
,
]
metadata_rows
.
extend
(
code
)
metadata_rows
.
extend
(
[
self
.
blank_row
,
self
.
date_header
,
(
print_time
,
start_date
,
end_date
,
timedelta
),
self
.
blank_row
,
self
.
keyword_header
]
)
return
metadata_rows
def
create_meta_sheet
(
self
,
card
):
if
self
.
worksheets
[
0
]
.
title
==
'Sheet'
:
ms
=
self
.
worksheets
[
0
]
ms
.
title
=
'{0}({1})'
.
format
(
self
.
meta_sheet_title
,
card
[
-
6
:])
else
:
ms
=
self
.
create_sheet
(
'{0}({1})'
.
format
(
self
.
meta_sheet_title
,
card
[
-
6
:]))
return
ms
def
build_meta_sheet
(
self
,
card
,
confidence
,
code
,
print_time
,
start_date
,
end_date
):
metadata_rows
=
self
.
build_metadata_rows
(
confidence
,
code
,
print_time
,
start_date
,
end_date
)
ms
=
self
.
create_meta_sheet
(
card
)
for
row
in
metadata_rows
:
ms
.
append
(
row
)
return
ms
@staticmethod
def
amount_format
(
amount_str
):
if
not
isinstance
(
amount_str
,
str
)
or
amount_str
==
''
:
return
amount_str
# 1.替换
res_str
=
amount_str
.
translate
(
consts
.
TRANS
)
# 2.首字符处理
first_char
=
res_str
[
0
]
if
first_char
in
consts
.
ERROR_CHARS
:
first_char
=
'-'
# 3.删除多余的-
res_str
=
first_char
+
res_str
[
1
:]
.
replace
(
'-'
,
''
)
# 4.逗号与句号处理
if
len
(
res_str
)
>=
4
:
period_idx
=
len
(
res_str
)
-
3
if
res_str
[
period_idx
]
==
'.'
and
res_str
[
period_idx
-
1
]
==
','
:
res_str
=
'{0}{1}'
.
format
(
res_str
[:
period_idx
-
1
],
res_str
[
period_idx
:])
elif
res_str
[
period_idx
]
==
','
:
res_str
=
'{0}.{1}'
.
format
(
res_str
[:
period_idx
],
res_str
[
period_idx
+
1
:])
return
res_str
def
build_month_sheet
(
self
,
card
,
month_mapping
,
ms
,
is_reverse
):
tmp_ws
=
self
.
create_sheet
(
'tmp_ws'
)
for
month
in
sorted
(
month_mapping
.
keys
()):
# 3.1.拷贝数据
parts
=
month_mapping
.
get
(
month
)
new_ws
=
self
.
create_sheet
(
'{0}({1})'
.
format
(
month
,
card
[
-
6
:]))
new_ws
.
append
(
consts
.
FIXED_HEADERS
)
for
part
in
parts
:
ws
=
self
.
get_sheet_by_name
(
part
[
0
])
for
row_value
in
ws
.
iter_rows
(
min_row
=
part
[
1
],
max_row
=
part
[
2
],
values_only
=
True
):
new_ws
.
append
(
row_value
)
# 3.2.提取信息、高亮
amount_mapping
=
{}
amount_fill_row
=
set
()
for
rows
in
new_ws
.
iter_rows
(
min_row
=
2
):
summary_cell
=
rows
[
consts
.
SUMMARY_IDX
]
date_cell
=
rows
[
consts
.
DATE_IDX
]
amount_cell
=
rows
[
consts
.
AMOUNT_IDX
]
row
=
summary_cell
.
row
# 关键词1提取
if
summary_cell
.
value
in
self
.
interest_keyword
:
ms
.
append
((
summary_cell
.
value
,
date_cell
.
value
,
amount_cell
.
value
))
# 关键词2提取至临时表
elif
summary_cell
.
value
in
self
.
salary_keyword
:
tmp_ws
.
append
((
summary_cell
.
value
,
date_cell
.
value
,
amount_cell
.
value
))
# 贷款关键词高亮
elif
summary_cell
.
value
in
self
.
loan_keyword
:
summary_cell
.
fill
=
self
.
loan_fill
amount_error
=
False
# 3.3.余额转数值
over_cell
=
rows
[
consts
.
OVER_IDX
]
try
:
over_cell
.
value
=
locale
.
atof
(
self
.
amount_format
(
over_cell
.
value
))
except
Exception
as
e
:
amount_error
=
True
else
:
over_cell
.
number_format
=
numbers
.
FORMAT_NUMBER_00
# 3.4.金额转数值
try
:
try
:
amount_cell
.
value
=
locale
.
atof
(
self
.
amount_format
(
amount_cell
.
value
))
except
Exception
as
e
:
try
:
amount_cell
.
value
=
locale
.
atof
(
self
.
amount_format
(
rows
[
consts
.
INCOME_IDX
]
.
value
))
if
amount_cell
.
value
==
0
:
raise
elif
amount_cell
.
value
<
0
:
amount_cell
.
value
=
-
amount_cell
.
value
except
Exception
as
e
:
amount_cell
.
value
=
locale
.
atof
(
self
.
amount_format
(
rows
[
consts
.
OUTLAY_IDX
]
.
value
))
if
amount_cell
.
value
>
0
:
amount_cell
.
value
=
-
amount_cell
.
value
except
Exception
as
e
:
amount_error
=
True
else
:
if
rows
[
consts
.
BORROW_IDX
]
.
value
in
consts
.
BORROW_OUTLAY_SET
:
amount_cell
.
value
=
-
amount_cell
.
value
amount_cell
.
number_format
=
numbers
.
FORMAT_NUMBER_00
same_amount_mapping
=
amount_mapping
.
get
(
date_cell
.
value
,
{})
fill_rows
=
same_amount_mapping
.
get
(
-
amount_cell
.
value
)
if
fill_rows
:
amount_fill_row
.
add
(
row
)
amount_fill_row
.
update
(
fill_rows
)
amount_mapping
.
setdefault
(
date_cell
.
value
,
{})
.
setdefault
(
amount_cell
.
value
,
[])
.
append
(
row
)
# 3.5.核对结果
if
row
>
2
and
not
amount_error
:
if
is_reverse
:
rows
[
consts
.
RESULT_IDX
]
.
value
=
'=IF(D{0}=ROUND(SUM(D{1},C{0}),2), "{2}", "{3}")'
.
format
(
row
-
1
,
row
,
*
self
.
proof_res
)
else
:
rows
[
consts
.
RESULT_IDX
]
.
value
=
'=IF(D{0}=ROUND(SUM(D{1},C{0}),2), "{2}", "{3}")'
.
format
(
row
,
row
-
1
,
*
self
.
proof_res
)
# 删除金额辅助列
new_ws
.
delete_cols
(
consts
.
BORROW_HEADER_COL
,
amount
=
new_ws
.
max_column
)
# 3.6.同一天相同进出账高亮
del
amount_mapping
for
row
in
amount_fill_row
:
new_ws
[
row
][
consts
.
AMOUNT_IDX
]
.
fill
=
self
.
amount_fill
# 关键词2信息提取
ms
.
append
(
self
.
blank_row
)
ms
.
append
(
self
.
keyword_header
)
for
row
in
tmp_ws
.
iter_rows
(
values_only
=
True
):
ms
.
append
(
row
)
self
.
remove
(
tmp_ws
)
def
bs_rebuild
(
self
,
bs_summary
):
# bs_summary = {
# '卡号': {
# 'classify': 0,
# 'confidence': 0.9,
# 'role': '柳雪',
# 'code': [('page', 'code')],
# 'print_time': 'datetime',
# 'start_date': 'datetime',
# 'end_date': 'datetime',
# 'sheet': ['sheet_name']
# }
# }
for
card
,
summary
in
bs_summary
.
items
():
# 1.原表修剪、排列、按照月份分割
start_date
=
summary
.
get
(
'start_date'
)
end_date
=
summary
.
get
(
'end_date'
)
date_statistics
=
False
if
start_date
is
None
or
end_date
is
None
:
date_statistics
=
True
date_list
=
[]
month_mapping
=
{}
reverse_trend_list
=
[]
for
sheet
in
summary
.
get
(
'sheet'
,
[]):
ws
=
self
.
get_sheet_by_name
(
sheet
)
# 1.1.删除多余列、排列
min_row
=
self
.
sheet_prune
(
ws
,
summary
.
get
(
'classify'
,
0
))
# 1.2.按月份分割
self
.
sheet_split
(
ws
,
month_mapping
,
reverse_trend_list
,
min_row
,
date_list
,
date_statistics
)
if
date_statistics
is
True
and
len
(
date_list
)
>
1
:
start_date
=
min
(
date_list
)
if
start_date
is
None
else
start_date
end_date
=
max
(
date_list
)
if
end_date
is
None
else
end_date
# 2.元信息提取表
ms
=
self
.
build_meta_sheet
(
card
,
summary
.
get
(
'confidence'
,
1
),
summary
.
get
(
'code'
),
summary
.
get
(
'print_time'
),
start_date
,
end_date
)
# 3.创建月份表、提取/高亮关键行
# 倒序处理
is_reverse
=
True
if
sum
(
reverse_trend_list
)
>
0
else
False
for
month_list
in
month_mapping
.
values
():
month_list
.
sort
(
key
=
lambda
x
:
x
[
-
1
],
reverse
=
is_reverse
)
self
.
build_month_sheet
(
card
,
month_mapping
,
ms
,
is_reverse
)
# 4.删除原表
for
sheet
in
summary
.
get
(
'sheet'
):
self
.
remove
(
self
.
get_sheet_by_name
(
sheet
))
def
license_rebuild
(
self
,
license_summary
,
document_scheme
):
for
classify
,
(
_
,
name
,
field_order
,
side_diff
,
scheme_diff
)
in
consts
.
LICENSE_ORDER
:
license_list
=
license_summary
.
get
(
classify
)
if
not
license_list
:
continue
ws
=
self
.
create_sheet
(
name
)
if
scheme_diff
and
document_scheme
==
consts
.
DOC_SCHEME_LIST
[
1
]:
classify
=
consts
.
MVC_CLASSIFY_SE
for
license_dict
in
license_list
:
if
classify
==
consts
.
IC_CLASSIFY
and
license_dict
.
get
(
'类别'
)
==
'1'
:
license_summary
.
setdefault
(
consts
.
RP_CLASSIFY
,
[])
.
append
(
license_dict
)
continue
if
side_diff
:
key
,
field_order_yes
,
field_order_no
=
consts
.
FIELD_ORDER_MAP
.
get
(
classify
)
field_order
=
field_order_yes
if
key
in
license_dict
else
field_order_no
for
search_field
,
write_field
in
field_order
:
ws
.
append
((
write_field
,
license_dict
.
get
(
search_field
,
''
)))
ws
.
append
((
None
,
))
def
skip_img_sheet
(
self
,
skip_img
):
if
skip_img
:
ws
=
self
.
create_sheet
(
consts
.
SKIP_IMG_SHEET_NAME
)
ws
.
append
(
consts
.
SKIP_IMG_SHEET_HEADER
)
for
img_tuple
in
skip_img
:
ws
.
append
(
img_tuple
)
def
rebuild
(
self
,
bs_summary
,
license_summary
,
skip_img
,
document_scheme
):
self
.
bs_rebuild
(
bs_summary
)
self
.
license_rebuild
(
license_summary
,
document_scheme
)
self
.
skip_img_sheet
(
skip_img
)
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment