Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
7b977821
authored
2021-02-04 15:20:27 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
special nhzs
1 parent
1b22a0ab
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
20 additions
and
2 deletions
src/apps/doc/consts.py
src/apps/doc/ocr/wb.py
src/apps/doc/consts.py
View file @
7b97782
...
...
@@ -519,6 +519,8 @@ OTHER_TUPLE = (None, None, None, None, None, None, None, None, None, None, None,
# "35":"针式打印-部分格线-竖版-邮储银行",
# "36":"针式打印-部分格线-竖版-邮储银行-绿卡",
# "38":"普通打印-无格线-农业银行-整数-特殊",
CLASSIFY_LIST
=
[
(
'其他'
,
OTHER_TUPLE
),
(
'其他'
,
OTHER_TUPLE
),
...
...
@@ -560,6 +562,8 @@ CLASSIFY_LIST = [
(
'针式打印-部分格线-竖版-邮储银行'
,
(
2
,
None
,
5
,
6
,
None
,
4
,
None
,
7
,
None
,
None
,
None
,
None
,
None
)),
(
'针式打印-部分格线-竖版-邮储银行-绿卡'
,
(
2
,
None
,
5
,
6
,
None
,
4
,
None
,
7
,
None
,
None
,
None
,
None
,
None
)),
(
'其他'
,
OTHER_TUPLE
),
(
'普通打印-无格线-农业银行-整数-特殊'
,
(
1
,
None
,
3
,
4
,
None
,
2
,
None
,
5
,
None
,
None
,
None
,
None
,
None
)),
]
CLASSIFY_HEADER_LIST
=
[
...
...
@@ -603,6 +607,8 @@ CLASSIFY_HEADER_LIST = [
(
'序号'
,
'交易日期'
,
'交易渠道'
,
'摘要'
,
'交易金额'
,
'账户余额'
,
'对方账号/卡号/汇票号'
,
'原子账号'
,
'交易机构名称'
),
(
'序号'
,
'交易日期'
,
'交易渠道'
,
'摘要'
,
'交易金额'
,
'账户余额'
,
'对方账号/卡号/汇票号'
,
'原子账号'
,
'交易机构名称'
),
OTHER_TUPLE
,
(
'交易日期'
,
'摘要/附言'
,
'交易金额'
,
'账户余额'
,
'对方账号和户名'
),
]
# ----------license相关------------------------------------------------------------------------------------------------
...
...
@@ -946,6 +952,8 @@ LICENSE_CLASSIFY_SET_1 = {IC_CLASSIFY, VAT_CLASSIFY, MVC_CLASSIFY, MVI_CLASSIFY,
LICENSE_CLASSIFY_SET_2
=
{
BL_CLASSIFY
,
EEP_CLASSIFY
,
DL_CLASSIFY
,
PP_CLASSIFY
,
BC_CLASSIFY
}
NYYH_CLASSIFY
=
{
17
,
18
}
NYZS_CLASSIFY
=
18
SPECIAL_NYZS_CLASSIFY
=
38
MS_CLASSIFY
=
21
MS_ERROR_COL
=
(
5
,
6
)
WECHART_CLASSIFY
=
12
...
...
src/apps/doc/ocr/wb.py
View file @
7b97782
...
...
@@ -127,7 +127,7 @@ class BSWorkbook(Workbook):
max_column_list
.
append
(
ws
.
max_column
)
@staticmethod
def
header_statistics
(
sheet_header_info
,
header_info
,
classify
):
def
header_statistics
(
sheet_header_info
,
header_info
,
classify
,
special_nhzs
):
# statistics_header_info = {
# SUMMARY_KEY: 2,
# DATE_KEY: 3,
...
...
@@ -144,6 +144,8 @@ class BSWorkbook(Workbook):
best_sheet_info
=
sheet_header_info
.
get
(
sheet_order_list
[
0
])
max_find_count
=
best_sheet_info
.
get
(
consts
.
FIND_COUNT_KEY
,
0
)
if
max_find_count
==
0
:
if
special_nhzs
:
classify
=
consts
.
SPECIAL_NYZS_CLASSIFY
for
key
,
value
in
consts
.
CLASSIFY_MAP
.
items
():
col
=
consts
.
CLASSIFY_LIST
[
classify
][
1
][
value
]
statistics_header_info
[
key
]
=
col
-
1
if
isinstance
(
col
,
int
)
else
None
...
...
@@ -572,6 +574,7 @@ class BSWorkbook(Workbook):
# }
# }
for
card
,
summary
in
bs_summary
.
items
():
special_nhzs
=
False
new_card
=
self
.
get_new_card
(
card
)
# 1.原表表头收集、按照月份分割
# 1.1 总结首行信息
...
...
@@ -580,10 +583,17 @@ class BSWorkbook(Workbook):
header_info
=
{}
max_column_list
=
[]
sheets_list
=
summary
.
get
(
'sheet'
,
[])
special_nhzs_max_col
=
0
for
sheet
in
sheets_list
:
ws
=
self
.
get_sheet_by_name
(
sheet
)
if
classify
==
consts
.
NYZS_CLASSIFY
:
special_nhzs_max_col
+=
ws
.
max_column
self
.
header_collect
(
ws
,
sheet_header_info
,
header_info
,
max_column_list
,
classify
)
statistics_header_info
,
max_find_count
=
self
.
header_statistics
(
sheet_header_info
,
header_info
,
classify
)
# 农业银行整数表头特殊处理
if
classify
==
consts
.
NYZS_CLASSIFY
and
round
(
special_nhzs_max_col
/
len
(
sheets_list
))
==
5
:
special_nhzs
=
True
statistics_header_info
,
max_find_count
=
self
.
header_statistics
(
sheet_header_info
,
header_info
,
classify
,
special_nhzs
)
max_column
=
max
(
max_column_list
)
# 1.2.按月份分割 min_row 正文第一行 date_col 日期行
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment