Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
a0d9523c
authored
2021-10-12 11:26:01 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Plain Diff
fix merge
2 parents
69661549
4d3ca7e0
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
215 additions
and
15 deletions
src/apps/doc/consts.py
src/apps/doc/management/commands/ocr_process.py
src/celery_compare/tasks.py
src/apps/doc/consts.py
View file @
a0d9523
...
...
@@ -1568,6 +1568,12 @@ BC_FIELD_ORDER_2 = (('BankName', '发卡行名称'),
IMG_PATH_KEY
=
'uniq_img_path_key'
IMG_PATH_KEY_2
=
'uniq_img_path_key_2'
SECTION_IMG_PATH_KEY
=
'uniq_section_img_path_key'
ALL_POSITION_KEY
=
'uniq_all_position_key'
POSITION_KEY
=
'uniq_position_key'
ANGLE_KEY
=
'uniq_angle_key'
FIELD_POSITION_KEY
=
'position'
FIELD_QUAD_KEY
=
'quad'
INFO_SOURCE
=
[
'POS'
,
'CMS'
]
...
...
src/apps/doc/management/commands/ocr_process.py
View file @
a0d9523
...
...
@@ -378,7 +378,7 @@ class Command(BaseCommand, LoggerMixin):
license_summary
.
setdefault
(
classify
,
[])
.
extend
(
license_data
)
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_SUCCESS
))
def
license2_process
(
self
,
ocr_res_2
,
license_summary
,
pid
,
classify
,
res_list
,
pno
,
ino
,
part_idx
,
img_path
,
do_dda
,
dda_id_bc_mapping
):
def
license2_process
(
self
,
ocr_res_2
,
license_summary
,
pid
,
classify
,
res_list
,
pno
,
ino
,
part_idx
,
img_path
,
do_dda
,
dda_id_bc_mapping
,
file_data
):
if
ocr_res_2
.
get
(
'ErrorCode'
)
in
consts
.
SUCCESS_CODE_SET
:
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_SUCCESS
))
if
pid
==
consts
.
BC_PID
:
...
...
@@ -394,12 +394,51 @@ class Command(BaseCommand, LoggerMixin):
dda_id_bc_mapping
.
setdefault
(
consts
.
BC_FIELD
,
[])
.
append
((
bc_no
,
img_path
))
else
:
# 营业执照等
for
result_dict
in
ocr_res_2
.
get
(
'ResultList'
,
[]):
pre
,
suf
=
os
.
path
.
splitext
(
img_path
)
src_section_img_path
=
img_path
if
file_data
is
None
else
'{0}_{1}{2}'
.
format
(
pre
,
part_idx
,
suf
)
is_save
=
False
for
res_idx
,
result_dict
in
enumerate
(
ocr_res_2
.
get
(
'ResultList'
,
[])):
image_data
=
result_dict
.
get
(
'image_data'
,
''
)
if
len
(
image_data
)
>
0
:
position
=
{}
angle
=
0
section_img_path
=
'{0}_{1}_{2}{3}'
.
format
(
pre
,
part_idx
,
res_idx
,
suf
)
try
:
with
open
(
section_img_path
,
"wb"
)
as
fh
:
fh
.
write
(
base64
.
b64decode
(
image_data
.
encode
()))
except
Exception
as
e
:
self
.
online_log
.
warn
(
'{0} [section img save failed] [img_path={1}]'
' [part_idx={2}] [res_idx={3}]'
.
format
(
self
.
log_base
,
img_path
,
part_idx
,
res_idx
))
else
:
is_save
=
True
section_img_path
=
src_section_img_path
position
=
result_dict
.
get
(
'position'
,
{})
angle
=
result_dict
.
get
(
'angle'
,
0
)
res_dict
=
{}
position_dict
=
{}
for
field_dict
in
result_dict
.
get
(
'FieldList'
,
[]):
res_dict
[
field_dict
.
get
(
'chn_key'
,
''
)]
=
field_dict
.
get
(
'value'
,
''
)
position_dict
[
field_dict
.
get
(
'chn_key'
,
''
)]
=
{
consts
.
FIELD_POSITION_KEY
:
field_dict
.
get
(
'position'
,
{}),
consts
.
FIELD_QUAD_KEY
:
field_dict
.
get
(
'quad'
,
[]),
}
position_dict
[
consts
.
POSITION_KEY
]
=
position
position_dict
[
consts
.
ANGLE_KEY
]
=
angle
res_dict
[
consts
.
IMG_PATH_KEY
]
=
img_path
res_dict
[
consts
.
SECTION_IMG_PATH_KEY
]
=
section_img_path
res_dict
[
consts
.
ALL_POSITION_KEY
]
=
position_dict
license_summary
.
setdefault
(
classify
,
[])
.
append
(
res_dict
)
if
is_save
and
file_data
is
not
None
:
try
:
with
open
(
src_section_img_path
,
"wb"
)
as
fh
:
fh
.
write
(
base64
.
b64decode
(
file_data
.
encode
()))
except
Exception
as
e
:
self
.
online_log
.
warn
(
'{0} [section img save failed] [img_path={1}]'
' [part_idx={2}]'
.
format
(
self
.
log_base
,
img_path
,
part_idx
))
else
:
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_FAILED_2
))
...
...
@@ -978,9 +1017,10 @@ class Command(BaseCommand, LoggerMixin):
card_name_res
.
get
(
'data'
,
{})
.
get
(
'is_exists_name'
)
==
0
:
name
=
'无'
ocr_2_res
[
'Name'
]
=
name
self
.
license2_process
(
ocr_2_res
,
license_summary
,
pid
,
classify
,
res_list
,
pno
,
ino
,
part_idx
,
img_path
,
do_dda
,
dda_id_bc_mapping
)
do_dda
,
dda_id_bc_mapping
,
file_data
=
ocr_data
.
get
(
'section_img'
)
)
break
else
:
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_FAILED_2
))
...
...
src/celery_compare/tasks.py
View file @
a0d9523
import
json
import
os
import
cv2
import
time
import
logging
import
traceback
import
numpy
as
np
from
datetime
import
datetime
,
timedelta
from
collections
import
OrderedDict
from
.
import
app
...
...
@@ -40,6 +43,79 @@ empty_error_type = 1000
des_key
=
conf
.
CMS_DES_KEY
def
rotate_bound
(
image
,
angle
):
# grab the dimensions of the image and then determine the
# center
(
h
,
w
)
=
image
.
shape
[:
2
]
(
cX
,
cY
)
=
(
w
//
2
,
h
//
2
)
# grab the rotation matrix (applying the negative of the
# angle to rotate clockwise), then grab the sine and cosine
# (i.e., the rotation components of the matrix)
M
=
cv2
.
getRotationMatrix2D
((
cX
,
cY
),
angle
,
1.0
)
cos
=
np
.
abs
(
M
[
0
,
0
])
sin
=
np
.
abs
(
M
[
0
,
1
])
# compute the new bounding dimensions of the image
nW
=
int
((
h
*
sin
)
+
(
w
*
cos
))
nH
=
int
((
h
*
cos
)
+
(
w
*
sin
))
# adjust the rotation matrix to take into account translation
M
[
0
,
2
]
+=
(
nW
/
2
)
-
cX
M
[
1
,
2
]
+=
(
nH
/
2
)
-
cY
# perform the actual rotation and return the image
return
cv2
.
warpAffine
(
image
,
M
,
(
nW
,
nH
))
def
build_coordinates
(
section_position_dict
):
if
isinstance
(
section_position_dict
,
dict
):
h_min
=
section_position_dict
.
get
(
'top'
,
0
)
w_min
=
section_position_dict
.
get
(
'left'
,
0
)
h_max
=
h_min
+
section_position_dict
.
get
(
'height'
,
0
)
w_max
=
w_min
+
section_position_dict
.
get
(
'width'
,
0
)
if
h_max
>
h_min
and
w_max
>
w_min
:
return
True
,
(
h_min
,
h_max
,
w_min
,
w_max
)
else
:
return
False
,
()
return
False
,
()
def
field_build_coordinates
(
field_position_info
):
field_position_dict
=
field_position_info
.
get
(
consts
.
FIELD_POSITION_KEY
,
{})
field_quad_list
=
field_position_info
.
get
(
consts
.
FIELD_QUAD_KEY
,
[])
if
isinstance
(
field_quad_list
,
list
)
and
len
(
field_quad_list
)
==
8
:
w_list
=
[
field_quad_list
[
0
],
field_quad_list
[
2
],
field_quad_list
[
4
],
field_quad_list
[
6
]]
h_list
=
[
field_quad_list
[
1
],
field_quad_list
[
3
],
field_quad_list
[
5
],
field_quad_list
[
7
]]
h_min
=
min
(
h_list
)
h_max
=
max
(
h_list
)
w_min
=
min
(
w_list
)
w_max
=
max
(
w_list
)
if
h_max
>
h_min
and
w_max
>
w_min
:
return
True
,
(
h_min
,
h_max
,
w_min
,
w_max
)
if
isinstance
(
field_position_dict
,
dict
):
h_min
=
field_position_dict
.
get
(
'top'
,
0
)
w_min
=
field_position_dict
.
get
(
'left'
,
0
)
h_max
=
h_min
+
field_position_dict
.
get
(
'height'
,
0
)
w_max
=
w_min
+
field_position_dict
.
get
(
'width'
,
0
)
if
h_max
>
h_min
and
w_max
>
w_min
:
return
True
,
(
h_min
,
h_max
,
w_min
,
w_max
)
else
:
return
False
,
()
return
False
,
()
def
img_process
(
section_img_path
,
section_position
,
section_angle
):
image
=
cv2
.
imread
(
section_img_path
)
is_valid
,
coord_tuple
=
build_coordinates
(
section_position
)
if
is_valid
:
image
=
image
[
coord_tuple
[
0
]:
coord_tuple
[
1
],
coord_tuple
[
2
]:
coord_tuple
[
3
],
:]
if
isinstance
(
section_angle
,
int
)
or
isinstance
(
section_angle
,
float
):
if
section_angle
!=
0
:
return
rotate_bound
(
image
,
section_angle
)
return
image
# class FakePOS:
#
# def __init__(self,
...
...
@@ -289,6 +365,8 @@ def ca_compare_license(license_en, ocr_res_dict, field_list):
is_find
=
False
result_field_list
=
[]
section_img_info
=
dict
()
field_img_path_dict
=
dict
()
ocr_res_str
=
ocr_res_dict
.
get
(
ocr_field
)
if
ocr_res_str
is
not
None
:
ocr_res_list
=
json
.
loads
(
ocr_res_str
)
...
...
@@ -323,6 +401,8 @@ def ca_compare_license(license_en, ocr_res_dict, field_list):
break
is_find
=
True
section_img_info
[
consts
.
SECTION_IMG_PATH_KEY
]
=
ocr_res_list
[
res_idx
]
.
get
(
consts
.
SECTION_IMG_PATH_KEY
,
''
)
section_img_info
[
consts
.
ALL_POSITION_KEY
]
=
ocr_res_list
[
res_idx
]
.
get
(
consts
.
ALL_POSITION_KEY
,
{})
# 过期期限特殊处理
if
special_expiry_date
and
name
==
'idExpiryDate'
and
result
==
consts
.
RESULT_N
:
...
...
@@ -347,7 +427,42 @@ def ca_compare_license(license_en, ocr_res_dict, field_list):
for
name
,
value
in
field_list
:
result_field_list
.
append
((
name
,
value
,
consts
.
RESULT_N
,
empty_str
,
empty_str
,
ErrorType
.
NF
.
value
))
return
result_field_list
if
is_find
:
section_img_path
=
section_img_info
.
get
(
consts
.
SECTION_IMG_PATH_KEY
,
''
)
if
os
.
path
.
exists
(
section_img_path
):
failed_field
=
[]
base_img_path
=
empty_str
for
name
,
_
,
result
,
_
,
img_path
,
_
in
result_field_list
:
if
result
==
consts
.
RESULT_N
:
failed_field
.
append
(
name
)
if
base_img_path
==
empty_str
:
base_img_path
=
img_path
if
len
(
failed_field
)
>
0
:
info
=
section_img_info
.
get
(
consts
.
ALL_POSITION_KEY
,
{})
section_position
=
info
.
get
(
consts
.
POSITION_KEY
,
{})
section_angle
=
info
.
get
(
consts
.
ANGLE_KEY
,
0
)
try
:
last_img
=
img_process
(
section_img_path
,
section_position
,
section_angle
)
except
Exception
as
e
:
for
field
in
failed_field
:
field_img_path_dict
[
field
]
=
base_img_path
else
:
pre
,
suf
=
os
.
path
.
splitext
(
section_img_path
)
for
field
in
failed_field
:
try
:
res_field
=
compare_logic
[
field
][
0
]
is_valid
,
coord_tuple
=
field_build_coordinates
(
info
.
get
(
res_field
,
{}))
if
is_valid
:
save_path
=
'{0}_{1}{2}'
.
format
(
pre
,
field
,
suf
)
field_img
=
last_img
[
coord_tuple
[
0
]:
coord_tuple
[
1
],
coord_tuple
[
2
]:
coord_tuple
[
3
],
:]
cv2
.
imwrite
(
save_path
,
field_img
)
field_img_path_dict
[
field
]
=
save_path
else
:
field_img_path_dict
[
field
]
=
base_img_path
except
Exception
as
e
:
field_img_path_dict
[
field
]
=
base_img_path
return
result_field_list
,
field_img_path_dict
def
ca_compare_process
(
compare_info
,
ocr_res_dict
):
...
...
@@ -363,7 +478,7 @@ def ca_compare_process(compare_info, ocr_res_dict):
for
idx
,
license_list
in
info_value
.
items
():
for
license_dict
in
license_list
:
for
license_en
,
field_list
in
license_dict
.
items
():
result_field_list
=
ca_compare_license
(
license_en
,
ocr_res_dict
,
field_list
)
result_field_list
,
field_img_path_dict
=
ca_compare_license
(
license_en
,
ocr_res_dict
,
field_list
)
for
name
,
value
,
result
,
ocr_str
,
img_path
,
error_type
in
result_field_list
:
total_fields
+=
1
if
result
==
consts
.
RESULT_N
:
...
...
@@ -377,14 +492,14 @@ def ca_compare_process(compare_info, ocr_res_dict):
consts
.
HEAD_LIST
[
4
]:
value
,
consts
.
HEAD_LIST
[
5
]:
ocr_str
,
consts
.
HEAD_LIST
[
6
]:
result
,
consts
.
HEAD_LIST
[
7
]:
empty_str
,
consts
.
HEAD_LIST
[
7
]:
field_img_path_dict
.
get
(
name
,
empty_str
)
,
consts
.
HEAD_LIST
[
8
]:
img_path
,
consts
.
HEAD_LIST
[
9
]:
error_type
,
}
)
else
:
for
license_en
,
field_list
in
info_value
.
items
():
result_field_list
=
ca_compare_license
(
license_en
,
ocr_res_dict
,
field_list
)
result_field_list
,
field_img_path_dict
=
ca_compare_license
(
license_en
,
ocr_res_dict
,
field_list
)
for
name
,
value
,
result
,
ocr_str
,
img_path
,
error_type
in
result_field_list
:
total_fields
+=
1
if
result
==
consts
.
RESULT_N
:
...
...
@@ -398,7 +513,7 @@ def ca_compare_process(compare_info, ocr_res_dict):
consts
.
HEAD_LIST
[
4
]:
value
,
consts
.
HEAD_LIST
[
5
]:
ocr_str
,
consts
.
HEAD_LIST
[
6
]:
result
,
consts
.
HEAD_LIST
[
7
]:
empty_str
,
consts
.
HEAD_LIST
[
7
]:
field_img_path_dict
.
get
(
name
,
empty_str
)
,
consts
.
HEAD_LIST
[
8
]:
img_path
,
consts
.
HEAD_LIST
[
9
]:
error_type
,
}
...
...
@@ -1263,6 +1378,8 @@ def se_compare_license(license_en, ocr_res_dict, field_list):
is_find
=
False
no_ocr_result
=
False
result_field_list
=
[]
section_img_info
=
dict
()
field_img_path_dict
=
dict
()
ocr_res_str
=
ocr_res_dict
.
get
(
ocr_field
)
if
ocr_res_str
is
not
None
:
ocr_res_list
=
json
.
loads
(
ocr_res_str
)
...
...
@@ -1294,6 +1411,8 @@ def se_compare_license(license_en, ocr_res_dict, field_list):
break
is_find
=
True
section_img_info
[
consts
.
SECTION_IMG_PATH_KEY
]
=
ocr_res_list
[
res_idx
]
.
get
(
consts
.
SECTION_IMG_PATH_KEY
,
''
)
section_img_info
[
consts
.
ALL_POSITION_KEY
]
=
ocr_res_list
[
res_idx
]
.
get
(
consts
.
ALL_POSITION_KEY
,
{})
# 过期期限特殊处理
if
special_expiry_date
and
name
==
'idExpiryDate'
and
result
==
consts
.
RESULT_N
:
...
...
@@ -1314,7 +1433,42 @@ def se_compare_license(license_en, ocr_res_dict, field_list):
for
name
,
value
in
field_list
:
result_field_list
.
append
((
name
,
value
,
consts
.
RESULT_N
,
empty_str
,
empty_str
,
ErrorType
.
NF
.
value
))
return
result_field_list
,
no_ocr_result
if
is_find
:
section_img_path
=
section_img_info
.
get
(
consts
.
SECTION_IMG_PATH_KEY
,
''
)
if
os
.
path
.
exists
(
section_img_path
):
failed_field
=
[]
base_img_path
=
empty_str
for
name
,
_
,
result
,
_
,
img_path
,
_
in
result_field_list
:
if
result
==
consts
.
RESULT_N
:
failed_field
.
append
(
name
)
if
base_img_path
==
empty_str
:
base_img_path
=
img_path
if
len
(
failed_field
)
>
0
:
info
=
section_img_info
.
get
(
consts
.
ALL_POSITION_KEY
,
{})
section_position
=
info
.
get
(
consts
.
POSITION_KEY
,
{})
section_angle
=
info
.
get
(
consts
.
ANGLE_KEY
,
0
)
try
:
last_img
=
img_process
(
section_img_path
,
section_position
,
section_angle
)
except
Exception
as
e
:
for
field
in
failed_field
:
field_img_path_dict
[
field
]
=
base_img_path
else
:
pre
,
suf
=
os
.
path
.
splitext
(
section_img_path
)
for
field
in
failed_field
:
try
:
res_field
=
compare_logic
[
field
][
0
]
is_valid
,
coord_tuple
=
field_build_coordinates
(
info
.
get
(
res_field
,
{}))
if
is_valid
:
save_path
=
'{0}_{1}{2}'
.
format
(
pre
,
field
,
suf
)
field_img
=
last_img
[
coord_tuple
[
0
]:
coord_tuple
[
1
],
coord_tuple
[
2
]:
coord_tuple
[
3
],
:]
cv2
.
imwrite
(
save_path
,
field_img
)
field_img_path_dict
[
field
]
=
save_path
else
:
field_img_path_dict
[
field
]
=
base_img_path
except
Exception
as
e
:
field_img_path_dict
[
field
]
=
base_img_path
return
result_field_list
,
no_ocr_result
,
field_img_path_dict
def
se_mvc34_compare
(
license_en
,
ocr_res_dict
,
field_list
):
...
...
@@ -1380,7 +1534,7 @@ def se_mvc34_compare(license_en, ocr_res_dict, field_list):
for
name
,
value
in
field_list
:
result_field_list
.
append
((
name
,
value
,
consts
.
RESULT_N
,
empty_str
,
empty_str
,
ErrorType
.
NF
.
value
))
return
result_field_list
return
result_field_list
,
dict
()
def
se_compare_process
(
compare_info
,
ocr_res_dict
):
...
...
@@ -1406,7 +1560,7 @@ def se_compare_process(compare_info, ocr_res_dict):
else
:
strip_list
.
append
((
a
,
b
))
failure_field
=
[]
result_field_list
,
no_ocr_result
=
se_compare_license
(
license_en
,
ocr_res_dict
,
strip
_list
)
result_field_list
,
no_ocr_result
,
field_img_path_dict
=
se_compare_license
(
license_en
,
ocr_res_dict
,
field
_list
)
for
name
,
value
,
result
,
ocr_str
,
img_path
,
error_type
in
result_field_list
:
if
license_en
not
in
consts
.
SKIP_CARD
or
not
no_ocr_result
:
total_fields
+=
1
...
...
@@ -1423,7 +1577,7 @@ def se_compare_process(compare_info, ocr_res_dict):
consts
.
HEAD_LIST
[
4
]:
value
,
consts
.
HEAD_LIST
[
5
]:
ocr_str
,
consts
.
HEAD_LIST
[
6
]:
result
,
consts
.
HEAD_LIST
[
7
]:
empty_str
,
consts
.
HEAD_LIST
[
7
]:
field_img_path_dict
.
get
(
name
,
empty_str
)
,
consts
.
HEAD_LIST
[
8
]:
img_path
,
consts
.
HEAD_LIST
[
9
]:
error_type
,
}
...
...
@@ -1440,9 +1594,9 @@ def se_compare_process(compare_info, ocr_res_dict):
strip_list
.
append
((
a
,
b
))
failure_field
=
[]
if
license_en
==
consts
.
MVC34_EN
:
result_field_list
=
se_mvc34_compare
(
license_en
,
ocr_res_dict
,
strip
_list
)
result_field_list
,
field_img_path_dict
=
se_mvc34_compare
(
license_en
,
ocr_res_dict
,
field
_list
)
else
:
result_field_list
,
_
=
se_compare_license
(
license_en
,
ocr_res_dict
,
strip
_list
)
result_field_list
,
_
,
field_img_path_dict
=
se_compare_license
(
license_en
,
ocr_res_dict
,
field
_list
)
for
name
,
value
,
result
,
ocr_str
,
img_path
,
error_type
in
result_field_list
:
total_fields
+=
1
if
result
==
consts
.
RESULT_N
:
...
...
@@ -1458,7 +1612,7 @@ def se_compare_process(compare_info, ocr_res_dict):
consts
.
HEAD_LIST
[
4
]:
value
,
consts
.
HEAD_LIST
[
5
]:
ocr_str
,
consts
.
HEAD_LIST
[
6
]:
result
,
consts
.
HEAD_LIST
[
7
]:
empty_str
,
consts
.
HEAD_LIST
[
7
]:
field_img_path_dict
.
get
(
name
,
empty_str
)
,
consts
.
HEAD_LIST
[
8
]:
img_path
,
consts
.
HEAD_LIST
[
9
]:
error_type
,
}
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment