Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
7c50d819
authored
2020-11-10 17:18:11 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
fix bug
1 parent
779b9a10
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
21 additions
and
20 deletions
src/apps/doc/management/commands/ocr_process.py
src/apps/doc/ocr/wb.py
src/apps/doc/management/commands/ocr_process.py
View file @
7c50d81
...
...
@@ -102,10 +102,10 @@ class Command(BaseCommand, LoggerMixin):
raise
EDMSException
(
edms_exc
)
self
.
cronjob_log
.
info
(
'{0} [edms download success] [pdf_path={1}]'
.
format
(
self
.
log_base
,
pdf_path
))
def
bs_process
(
self
,
wb
,
ocr_data
,
bs_summary
,
unknown_summary
,
classify
,
res_list
,
pno
,
ino
):
def
bs_process
(
self
,
wb
,
ocr_data
,
bs_summary
,
unknown_summary
,
classify
,
res_list
,
pno
,
ino
,
part_idx
):
sheets
=
ocr_data
.
get
(
'data'
,
[])
if
not
sheets
:
res_list
.
append
((
pno
,
ino
,
consts
.
RES_SUCCESS_EMPTY
))
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_SUCCESS_EMPTY
))
return
# confidence = ocr_data.get('confidence', 1)
img_name
=
'page_{0}_img_{1}'
.
format
(
pno
,
ino
)
...
...
@@ -171,22 +171,22 @@ class Command(BaseCommand, LoggerMixin):
ed_list
.
append
(
summary
[
6
])
if
cells_exists
:
res_list
.
append
((
pno
,
ino
,
consts
.
RES_SUCCESS
))
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_SUCCESS
))
else
:
res_list
.
append
((
pno
,
ino
,
consts
.
RES_SUCCESS_EMPTY
))
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_SUCCESS_EMPTY
))
def
license1_process
(
self
,
ocr_data
,
license_summary
,
classify
,
res_list
,
pno
,
ino
):
def
license1_process
(
self
,
ocr_data
,
license_summary
,
classify
,
res_list
,
pno
,
ino
,
part_idx
):
# 类别:'0'身份证, '1'居住证
license_data
=
ocr_data
.
get
(
'data'
,
[])
if
not
license_data
:
res_list
.
append
((
pno
,
ino
,
consts
.
RES_SUCCESS_EMPTY
))
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_SUCCESS_EMPTY
))
return
res_list
.
append
((
pno
,
ino
,
consts
.
RES_SUCCESS
))
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_SUCCESS
))
license_summary
.
setdefault
(
classify
,
[])
.
extend
(
license_data
)
def
license2_process
(
self
,
ocr_res_2
,
license_summary
,
pid
,
classify
,
res_list
,
pno
,
ino
):
def
license2_process
(
self
,
ocr_res_2
,
license_summary
,
pid
,
classify
,
res_list
,
pno
,
ino
,
part_idx
):
if
ocr_res_2
.
get
(
'ErrorCode'
)
in
consts
.
SUCCESS_CODE_SET
:
res_list
.
append
((
pno
,
ino
,
consts
.
RES_SUCCESS
))
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_SUCCESS
))
if
pid
==
consts
.
BC_PID
:
# 银行卡
# res_dict = {}
...
...
@@ -201,7 +201,7 @@ class Command(BaseCommand, LoggerMixin):
res_dict
[
field_dict
.
get
(
'chn_key'
,
''
)]
=
field_dict
.
get
(
'value'
,
''
)
license_summary
.
setdefault
(
classify
,
[])
.
append
(
res_dict
)
else
:
res_list
.
append
((
pno
,
ino
,
consts
.
RES_FAILED_2
))
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_FAILED_2
))
@staticmethod
def
parse_img_path
(
img_path
):
...
...
@@ -493,26 +493,27 @@ class Command(BaseCommand, LoggerMixin):
wb
=
BSWorkbook
(
interest_keyword
,
salary_keyword
,
loan_keyword
)
for
img_path
,
res
in
ocr_1_res
.
items
():
pno
,
ino
=
self
.
parse_img_path
(
img_path
)
part_idx
=
1
if
res
.
get
(
'code'
)
==
1
:
ocr_data_list
=
res
.
get
(
'data'
,
[])
if
not
isinstance
(
ocr_data_list
,
list
):
res_list
.
append
((
pno
,
ino
,
consts
.
RES_FAILED_3
))
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_FAILED_3
))
self
.
cronjob_log
.
info
(
'{0} [ocr_1 res error] [img={1}] [res={2}]'
.
format
(
self
.
log_base
,
img_path
,
res
))
else
:
for
part_idx
,
ocr_data
in
enumerate
(
ocr_data_list
):
ino_part
=
'{0}-{1}'
.
format
(
ino
,
part_idx
)
part_idx
=
part_idx
+
1
classify
=
ocr_data
.
get
(
'classify'
)
if
classify
is
None
:
res_list
.
append
((
pno
,
ino
_part
,
consts
.
RES_FAILED_3
))
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_FAILED_3
))
self
.
cronjob_log
.
info
(
'{0} [ocr_1 res error] [img={1}] [res={2}]'
.
format
(
self
.
log_base
,
img_path
,
res
))
continue
elif
classify
in
consts
.
OTHER_CLASSIFY_SET
:
# 其他类
res_list
.
append
((
pno
,
ino
_part
,
consts
.
RES_SUCCESS_OTHER
))
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_SUCCESS_OTHER
))
continue
elif
classify
in
consts
.
LICENSE_CLASSIFY_SET_1
:
# 证件1
self
.
license1_process
(
ocr_data
,
license_summary
,
classify
,
res_list
,
pno
,
ino
_part
)
self
.
license1_process
(
ocr_data
,
license_summary
,
classify
,
res_list
,
pno
,
ino
,
part_idx
)
elif
classify
in
consts
.
LICENSE_CLASSIFY_SET_2
:
# 证件2
pid
,
_
,
_
,
_
,
_
,
_
=
consts
.
LICENSE_CLASSIFY_MAPPING
.
get
(
classify
)
file_data
=
ocr_data
.
get
(
'section_img'
)
...
...
@@ -557,16 +558,16 @@ class Command(BaseCommand, LoggerMixin):
card_name_res
.
get
(
'data'
,
{})
.
get
(
'is_exists_name'
)
==
0
:
name
=
'无'
ocr_2_res
[
'Name'
]
=
name
self
.
license2_process
(
ocr_2_res
,
license_summary
,
pid
,
classify
,
res_list
,
pno
,
ino
_part
)
self
.
license2_process
(
ocr_2_res
,
license_summary
,
pid
,
classify
,
res_list
,
pno
,
ino
,
part_idx
)
break
else
:
res_list
.
append
((
pno
,
ino
_part
,
consts
.
RES_FAILED_2
))
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_FAILED_2
))
self
.
cronjob_log
.
warn
(
'{0} [ocr_2 failed] [img_path={1}]'
.
format
(
self
.
log_base
,
img_path
))
else
:
# 流水处理
self
.
bs_process
(
wb
,
ocr_data
,
bs_summary
,
unknown_summary
,
classify
,
res_list
,
pno
,
ino
_part
)
self
.
bs_process
(
wb
,
ocr_data
,
bs_summary
,
unknown_summary
,
classify
,
res_list
,
pno
,
ino
,
part_idx
)
else
:
res_list
.
append
((
pno
,
ino
,
consts
.
RES_FAILED_1
))
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_FAILED_1
))
self
.
cronjob_log
.
info
(
'{0} [ocr_1 res error] [img={1}] [res={2}]'
.
format
(
self
.
log_base
,
img_path
,
res
))
...
...
src/apps/doc/ocr/wb.py
View file @
7c50d81
...
...
@@ -582,7 +582,7 @@ class BSWorkbook(Workbook):
def
res_sheet
(
self
,
res_list
):
if
res_list
:
res_list
.
sort
(
key
=
lambda
x
:
(
x
[
0
],
x
[
1
]))
res_list
.
sort
(
key
=
lambda
x
:
(
x
[
0
],
x
[
1
]
,
x
[
2
]
))
ws
=
self
.
create_sheet
(
consts
.
RES_SHEET_NAME
)
ws
.
append
(
consts
.
RES_SHEET_HEADER
)
for
res_tuple
in
res_list
:
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment