Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
56281e38
authored
2020-10-27 15:08:30 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
fix bug
1 parent
27db93d6
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
16 additions
and
59 deletions
src/apps/doc/consts.py
src/apps/doc/ocr/wb.py
src/apps/doc/consts.py
View file @
56281e3
import
copy
PAGE_DEFAULT
=
1
PAGE_SIZE_DEFAULT
=
10
...
...
@@ -453,63 +455,6 @@ HEADERS_MAPPING.update(
OTHER_TUPLE
=
(
None
,
None
,
None
,
None
,
None
,
None
,
None
,
None
,
None
,
None
,
None
,
None
,
None
)
# {
# "0":"其他",
# "1":"普通打印-全表格-中国农业银行",
# "2":"普通打印-全表格-中国银行",
# "3":"普通打印-全表格-北京银行",
# "4":"普通打印-全表格-工商银行",
# "5":"普通打印-全表格-建设银行",
# "6":"普通打印-全表格-微信账单",
# "7":"普通打印-全表格-支付宝账单",
# "8":"普通打印-无格线-中国邮政储蓄银行",
# "9":"普通打印-无格线-交通银行",
# "10":"普通打印-无格线-农业银行整数",
# "11":"普通打印-无格线-农业银行银行活期扩张缩进",
# "12":"普通打印-无格线-招商银行",
# "13":"普通打印-无格线-招行电子账单",
# "14":"普通打印-无格线-民生银行",
# "15":"普通打印-部分格线-横版-中信银行",
# "16":"普通打印-部分格线-竖版-中国农业银行分账户窄页",
# "17":"普通打印-部分格线-竖版-农业银行",
# "18":"普通打印-部分格线-竖版-农业银行银行卡交易明细",
# "19":"普通打印-部分格线-竖版-平安电子账单",
# "20":"针式打印-全格线-建设银行",
# "21":"针式打印-部分格线-竖版-邮储银行账户交易",
# "22":"针式打印-部分格线-邮储银行一本通绿卡"
# }
# CLASSIFY_LIST = [
# ('其他', OTHER_TUPLE),
# ('农业银行', (1, None, 3, 5, None, 8, 7, 6, None, None, None, None, None)),
# ('中国银行', (1, 2, 4, 5, 6, 9, 10, 11, 12, None, None, None, None)),
# ('北京银行', (1, None, 4, 5, None, 2, 6, 7, None, None, 3, None, None)),
# ('工商银行', (1, None, 9, 10, None, 7, None, None, None, None, None, None, None)),
# ('建设银行', (None, None, None, None, None, 2, None, None, None, None, None, None, None)),
# ('微信', (2, None, 6, None, None, 3, 7, None, None, None, None, None, None)),
# ('支付宝', (2, None, None, 6, None, 3, None, None, None, None, None, 4, 5)),
#
# ('交通银行', (1, None, 5, 6, None, 3, None, None, None, None, 4, None, None)),
# ('农业银行', (1, None, 3, None, None, 2, None, 4, None, None, None, None, None)),
# ('农业银行', (1, 2, None, None, None, None, None, None, None, None, None, None, None)),
# ('招商银行', (1, None, 3, 4, None, 6, None, None, None, None, None, None, None)),
# ('招商银行电子版', (1, None, 3, 4, None, 5, 6, None, None, None, None, None, None)),
# ('民生银行', (None, None, 5, 6, None, None, 7, None, 8, None, None, None, None)),
#
# ('中信银行', (1, None, None, 5, None, 2, 6, 7, 8, None, None, 3, 4)),
# ('农业银行', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)),
# ('农业银行', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)),
# ('农业银行', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)),
# ('平安电子账单', (2, None, 5, 6, None, 4, None, None, None, None, None, None, None)),
#
# ('建设银行', (None, None, None, None, None, None, None, None, None, None, None, None, None)),
# ('邮储银行', (2, None, 5, 6, None, 4, None, 7, None, None, None, None, None)),
# ('邮储银行', (2, None, 5, 6, None, 4, None, 7, None, None, None, None, None)),
# ]
# "4":"普通打印-全表格-中国银行",
# "5":"普通打印-全表格-农业银行-10列",
# "6":"普通打印-全表格-农业银行-10列-1",
...
...
@@ -920,3 +865,11 @@ LICENSE_CLASSIFY_MAPPING = dict(LICENSE_ORDER)
OTHER_CLASSIFY_SET
=
{
OTHER_CLASSIFY
}
LICENSE_CLASSIFY_SET_1
=
{
IC_CLASSIFY
,
VAT_CLASSIFY
,
MVC_CLASSIFY
,
MVI_CLASSIFY
}
LICENSE_CLASSIFY_SET_2
=
{
BL_CLASSIFY
,
UCI_CLASSIFY
,
EEP_CLASSIFY
,
DL_CLASSIFY
,
PP_CLASSIFY
,
BC_CLASSIFY
}
WECHART_CLASSIFY
=
12
WECHART_HEADERS_MAPPING
=
copy
.
deepcopy
(
HEADERS_MAPPING
)
WECHART_HEADERS_MAPPING
.
update
(
{
'交易时间'
:
DATE_KEY
,
}
)
...
...
src/apps/doc/ocr/wb.py
View file @
56281e3
...
...
@@ -30,7 +30,7 @@ class BSWorkbook(Workbook):
self
.
MAX_MEAN
=
31
@staticmethod
def
header_collect
(
ws
,
sheet_header_info
,
header_info
,
max_column_list
):
def
header_collect
(
ws
,
sheet_header_info
,
header_info
,
max_column_list
,
classify
):
# sheet_header_info = {
# 'sheet_name': {
# 'summary_col': 1,
...
...
@@ -65,6 +65,9 @@ class BSWorkbook(Workbook):
for
first_row
in
ws
.
iter_rows
(
max_row
=
1
,
min_row
=
1
,
values_only
=
True
):
sheet_header_info
.
setdefault
(
ws
.
title
,
{})
.
setdefault
(
consts
.
HEADER_KEY
,
first_row
)
for
idx
,
header_value
in
enumerate
(
first_row
):
if
classify
==
consts
.
WECHART_CLASSIFY
:
header_col
=
consts
.
WECHART_HEADERS_MAPPING
.
get
(
header_value
)
else
:
header_col
=
consts
.
HEADERS_MAPPING
.
get
(
header_value
)
if
header_col
is
not
None
:
find_count
+=
1
...
...
@@ -315,6 +318,7 @@ class BSWorkbook(Workbook):
for
part
in
parts
:
ws
=
self
.
get_sheet_by_name
(
part
[
0
])
for
row_value
in
ws
.
iter_rows
(
min_row
=
part
[
1
],
max_row
=
part
[
2
],
values_only
=
True
):
if
any
(
row_value
):
new_ws
.
append
(
row_value
)
# 3.2.提取信息、高亮
amount_mapping
=
{}
...
...
@@ -439,7 +443,7 @@ class BSWorkbook(Workbook):
max_column_list
=
[]
for
sheet
in
summary
.
get
(
'sheet'
,
[]):
ws
=
self
.
get_sheet_by_name
(
sheet
)
self
.
header_collect
(
ws
,
sheet_header_info
,
header_info
,
max_column_list
)
self
.
header_collect
(
ws
,
sheet_header_info
,
header_info
,
max_column_list
,
classify
)
statistics_header_info
=
self
.
header_statistics
(
sheet_header_info
,
header_info
,
classify
)
max_column
=
max
(
max_column_list
)
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment