Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
59cbfab2
authored
2020-10-16 17:31:38 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
fix bug & add skip_img_sheet
1 parent
6a5899fa
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
31 additions
and
30 deletions
.gitignore
src/apps/doc/consts.py
src/apps/doc/management/commands/doc_ocr_process.py
src/apps/doc/ocr/wb.py
.gitignore
View file @
59cbfab
...
...
@@ -33,4 +33,5 @@ data/*
# 脚本
src/*.sh
test*
\ No newline at end of file
test*
ocr_test.py
\ No newline at end of file
...
...
src/apps/doc/consts.py
View file @
59cbfab
...
...
@@ -60,6 +60,8 @@ TRANS_MAP = {
}
TRANS
=
str
.
maketrans
(
TRANS_MAP
)
ERROR_CHARS
=
{
'.'
,
'·'
,
'•'
}
SKIP_IMG_SHEET_NAME
=
'未处理图片'
SKIP_IMG_SHEET_HEADER
=
(
'页码'
,
'序号'
)
CARD_RATIO
=
0.9
UNKNOWN_CARD
=
'未知卡号'
...
...
src/apps/doc/management/commands/doc_ocr_process.py
View file @
59cbfab
This diff is collapsed.
Click to expand it.
src/apps/doc/ocr/wb.py
View file @
59cbfab
...
...
@@ -141,32 +141,22 @@ class BSWorkbook(Workbook):
# month_info process
month_info
=
month_mapping
.
setdefault
(
'xxxx-xx'
,
[])
month_info
.
append
((
ws
.
title
,
min_row
,
ws
.
max_row
,
0
))
elif
len
(
month_list
)
==
1
:
# reverse_trend_list process
reverse_trend
=
self
.
get_reverse_trend
(
dti
.
day
,
idx_list
)
reverse_trend_list
.
append
(
reverse_trend
)
# month_info process
month_info
=
month_mapping
.
setdefault
(
month_list
[
0
],
[])
day_mean
=
np
.
mean
(
dti
.
day
.
dropna
())
if
len
(
month_info
)
==
0
:
month_info
.
append
((
ws
.
title
,
min_row
,
ws
.
max_row
,
day_mean
))
else
:
for
i
,
item
in
enumerate
(
month_info
):
if
day_mean
<=
item
[
-
1
]:
month_info
.
insert
(
i
,
(
ws
.
title
,
min_row
,
ws
.
max_row
,
day_mean
))
break
else
:
month_info
.
append
((
ws
.
title
,
min_row
,
ws
.
max_row
,
day_mean
))
else
:
# reverse_trend_list process
reverse_trend
=
self
.
get_reverse_trend
(
dti
.
day
,
idx_list
)
reverse_trend_list
.
append
(
reverse_trend
)
# month_info process
for
i
,
item
in
enumerate
(
month_list
[:
-
1
]):
month_mapping
.
setdefault
(
item
,
[])
.
append
(
(
ws
.
title
,
idx_list
[
i
]
+
min_row
,
idx_list
[
i
+
1
]
+
min_row
-
1
,
self
.
MAX_MEAN
))
month_mapping
.
setdefault
(
month_list
[
-
1
],
[])
.
insert
(
0
,
(
ws
.
title
,
idx_list
[
-
1
]
+
min_row
,
ws
.
max_row
,
0
))
day_idx
=
dti
.
day
idx_list_max_idx
=
len
(
idx_list
)
-
1
for
i
,
item
in
enumerate
(
month_list
):
if
i
==
idx_list_max_idx
:
day_mean
=
np
.
mean
(
day_idx
[
idx_list
[
i
]:]
.
dropna
())
month_mapping
.
setdefault
(
item
,
[])
.
append
(
(
ws
.
title
,
idx_list
[
i
]
+
min_row
,
ws
.
max_row
,
day_mean
))
else
:
day_mean
=
np
.
mean
(
day_idx
[
idx_list
[
i
]:
idx_list
[
i
+
1
]]
.
dropna
())
month_mapping
.
setdefault
(
item
,
[])
.
append
(
(
ws
.
title
,
idx_list
[
i
]
+
min_row
,
idx_list
[
i
+
1
]
+
min_row
-
1
,
day_mean
))
def
build_metadata_rows
(
self
,
confidence
,
code
,
print_time
,
start_date
,
end_date
):
if
start_date
is
None
or
end_date
is
None
:
...
...
@@ -259,7 +249,7 @@ class BSWorkbook(Workbook):
except
Exception
as
e
:
continue
else
:
over_cell
.
number_format
=
numbers
.
FORMAT_
NUMBER_COMMA_SEPARATED1
over_cell
.
number_format
=
numbers
.
FORMAT_
GENERAL
# 3.4.金额转数值
try
:
...
...
@@ -281,7 +271,7 @@ class BSWorkbook(Workbook):
else
:
if
rows
[
consts
.
BORROW_IDX
]
.
value
in
consts
.
BORROW_OUTLAY_SET
:
amount_cell
.
value
=
-
amount_cell
.
value
amount_cell
.
number_format
=
numbers
.
FORMAT_
NUMBER_COMMA_SEPARATED1
amount_cell
.
number_format
=
numbers
.
FORMAT_
GENERAL
same_amount_mapping
=
amount_mapping
.
get
(
date_cell
.
value
,
{})
fill_rows
=
same_amount_mapping
.
get
(
-
amount_cell
.
value
)
if
fill_rows
:
...
...
@@ -357,11 +347,11 @@ class BSWorkbook(Workbook):
end_date
)
# 3.创建月份表、提取/高亮关键行
is_reverse
=
False
i
f
sum
(
reverse_trend_list
)
>
0
:
# 倒序处理
is_reverse
=
True
for
month_list
in
month_mapping
.
values
():
month_list
.
sort
(
key
=
lambda
x
:
x
[
-
1
],
reverse
=
True
)
# 倒序处理
i
s_reverse
=
True
if
sum
(
reverse_trend_list
)
>
0
else
False
for
month_list
in
month_mapping
.
values
():
month_list
.
sort
(
key
=
lambda
x
:
x
[
-
1
],
reverse
=
is_reverse
)
self
.
build_month_sheet
(
card
,
month_mapping
,
ms
,
is_reverse
)
# 4.删除原表
...
...
@@ -379,6 +369,14 @@ class BSWorkbook(Workbook):
ws
.
append
(
bl_field
)
ws
.
append
((
None
,
))
def
rebuild
(
self
,
bs_summary
,
license_summary
):
def
skip_img_sheet
(
self
,
skip_img
):
if
skip_img
:
ws
=
self
.
create_sheet
(
consts
.
SKIP_IMG_SHEET_NAME
)
ws
.
append
(
consts
.
SKIP_IMG_SHEET_HEADER
)
for
img_tuple
in
skip_img
:
ws
.
append
(
img_tuple
)
def
rebuild
(
self
,
bs_summary
,
license_summary
,
skip_img
):
self
.
bs_rebuild
(
bs_summary
)
self
.
license_rebuild
(
license_summary
)
self
.
skip_img_sheet
(
skip_img
)
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment