Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
77676d5c
authored
2021-11-02 15:10:40 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
add position
1 parent
086507c6
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
130 additions
and
17 deletions
src/apps/doc/management/commands/ocr_process.py
src/apps/doc/management/commands/ocr_process.py
View file @
77676d5
...
...
@@ -298,14 +298,26 @@ class Command(BaseCommand, LoggerMixin):
# rebuild_data_dict[detail_dict.get('chinese_key', '')] = detail_dict.get('words', '')
# return [rebuild_data_dict]
def
license1_process
(
self
,
ocr_data
,
license_summary
,
classify
,
res_list
,
pno
,
ino
,
part_idx
,
img_path
,
do_dda
,
dda_id_bc_mapping
):
def
license1_process
(
self
,
ocr_data
,
license_summary
,
classify
,
res_list
,
pno
,
ino
,
part_idx
,
img_path
,
do_dda
,
dda_id_bc_mapping
):
# 类别:'0'身份证, '1'居住证
license_data
=
ocr_data
.
get
(
'data'
)
if
not
license_data
:
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_SUCCESS_EMPTY
))
return
pre
,
suf
=
os
.
path
.
splitext
(
img_path
)
base64_img
=
license_data
.
pop
(
'base64_img'
,
''
)
is_save
=
True
if
len
(
base64_img
)
>
0
else
False
section_img_path
=
'{0}_{1}{2}'
.
format
(
pre
,
part_idx
,
suf
)
if
is_save
else
img_path
if
is_save
:
try
:
with
open
(
section_img_path
,
"wb"
)
as
fh
:
fh
.
write
(
base64
.
b64decode
(
base64_img
.
encode
()))
except
Exception
as
e
:
self
.
online_log
.
warn
(
'{0} [section img save failed] [img_path={1}]'
' [part_idx={2}]'
.
format
(
self
.
log_base
,
img_path
,
part_idx
))
# 保单
if
classify
==
consts
.
INSURANCE_CLASSIFY
:
product_result
=
[
''
,
''
,
''
]
...
...
@@ -333,7 +345,13 @@ class Command(BaseCommand, LoggerMixin):
'保险截止日期'
:
license_data
.
get
(
'result'
,
{})
.
get
(
'endDate'
,
{})
.
get
(
'words'
,
''
),
'保单章'
:
license_data
.
get
(
'result'
,
{})
.
get
(
'seal'
,
{})
.
get
(
'words'
,
''
),
'特别约定第一受益人'
:
special
,
consts
.
IMG_PATH_KEY
:
img_path
,
consts
.
SECTION_IMG_PATH_KEY
:
section_img_path
,
}
# position_dict = {
# '': {consts.FIELD_POSITION_KEY: {}}
# }
# insurance_ocr_result[consts.ALL_POSITION_KEY] = position_dict
license_summary
.
setdefault
(
classify
,
[])
.
append
(
insurance_ocr_result
)
# DDA
elif
classify
==
consts
.
DDA_CLASSIFY
:
...
...
@@ -341,37 +359,103 @@ class Command(BaseCommand, LoggerMixin):
if
pro
<
consts
.
DDA_PRO_MIN
:
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_SUCCESS_EMPTY
))
return
dda_ocr_result
=
{
key
:
value
.
get
(
'words'
,
''
)
for
key
,
value
in
license_data
.
get
(
'result'
,
{})
.
items
()}
dda_ocr_result
=
{}
position_dict
=
{}
for
key
,
value
in
license_data
.
get
(
'result'
,
{})
.
items
():
dda_ocr_result
[
key
]
=
value
.
get
(
'words'
,
''
)
position_dict
[
key
]
=
{
consts
.
FIELD_POSITION_KEY
:
value
.
get
(
'position'
,
{})
}
dda_ocr_result
[
consts
.
DDA_IMG_PATH
]
=
img_path
dda_ocr_result
[
consts
.
DDA_PRO
]
=
pro
dda_ocr_result
[
consts
.
IMG_PATH_KEY
]
=
img_path
dda_ocr_result
[
consts
.
SECTION_IMG_PATH_KEY
]
=
section_img_path
dda_ocr_result
[
consts
.
ALL_POSITION_KEY
]
=
position_dict
license_summary
.
setdefault
(
classify
,
[])
.
append
(
dda_ocr_result
)
# 抵押登记豁免函
elif
classify
==
consts
.
HMH_CLASSIFY
:
hmh_ocr_result
=
{
key
:
value
.
get
(
'words'
,
''
)
for
key
,
value
in
license_data
.
get
(
'words_result'
,
{})
.
items
()}
hmh_ocr_result
=
{}
position_dict
=
{}
for
key
,
value
in
license_data
.
get
(
'words_result'
,
{})
.
items
():
hmh_ocr_result
[
key
]
=
value
.
get
(
'words'
,
''
)
location_list
=
value
.
get
(
'location'
,
[
-
1
,
-
1
,
-
1
,
-
1
])
if
len
(
location_list
)
==
4
:
position_dict
[
key
]
=
{
consts
.
FIELD_POSITION_KEY
:
{
'top'
:
location_list
[
1
],
'left'
:
location_list
[
0
],
'height'
:
location_list
[
-
1
]
-
location_list
[
1
],
'width'
:
location_list
[
2
]
-
location_list
[
0
]
}
}
hmh_ocr_result
[
consts
.
IMG_PATH_KEY
]
=
img_path
hmh_ocr_result
[
consts
.
SECTION_IMG_PATH_KEY
]
=
section_img_path
hmh_ocr_result
[
consts
.
ALL_POSITION_KEY
]
=
position_dict
license_summary
.
setdefault
(
classify
,
[])
.
append
(
hmh_ocr_result
)
# 二手车交易凭证
elif
classify
==
consts
.
JYPZ_CLASSIFY
:
jypz_ocr_result
=
{
key
:
value
.
get
(
'words'
,
''
)
for
key
,
value
in
license_data
.
get
(
'result'
,
{})
.
items
()}
jypz_ocr_result
=
{}
position_dict
=
{}
for
key
,
value
in
license_data
.
get
(
'result'
,
{})
.
items
():
jypz_ocr_result
[
key
]
=
value
.
get
(
'words'
,
''
)
position_dict
[
key
]
=
{
consts
.
FIELD_POSITION_KEY
:
value
.
get
(
'position'
,
{})
}
jypz_ocr_result
[
consts
.
IMG_PATH_KEY
]
=
img_path
jypz_ocr_result
[
consts
.
SECTION_IMG_PATH_KEY
]
=
section_img_path
jypz_ocr_result
[
consts
.
ALL_POSITION_KEY
]
=
position_dict
license_summary
.
setdefault
(
classify
,
[])
.
append
(
jypz_ocr_result
)
# 车辆登记证 3/4页结果整合
elif
classify
==
consts
.
MVC_CLASSIFY
:
rebuild_data_dict
=
{}
position_dict
=
{}
rebuild_data_dict
[
consts
.
IMG_PATH_KEY
]
=
img_path
rebuild_data_dict
[
consts
.
SECTION_IMG_PATH_KEY
]
=
section_img_path
mvc_page
=
license_data
.
pop
(
'page'
,
'VehicleRCI'
)
mvc_res
=
license_data
.
pop
(
'results'
,
{})
if
mvc_page
==
'VehicleRegArea'
:
rebuild_data_dict
[
'机动车登记证书编号'
]
=
mvc_res
.
get
(
'机动车登记证书编号'
,
{})
.
get
(
'words'
,
''
)
code_position_list
=
mvc_res
.
get
(
'机动车登记证书编号'
,
{})
.
get
(
'position'
,
[
0
,
0
,
0
,
0
])
if
len
(
code_position_list
)
==
4
:
position_dict
[
'机动车登记证书编号'
]
=
{
consts
.
FIELD_POSITION_KEY
:
{
'top'
:
code_position_list
[
1
],
'left'
:
code_position_list
[
0
],
'height'
:
code_position_list
[
-
1
],
'width'
:
code_position_list
[
2
],
}
}
for
register_info
in
mvc_res
.
get
(
'登记信息'
,
[]):
register_info
.
pop
(
'register_type'
,
None
)
register_info
.
pop
(
'register_type_name'
,
None
)
for
cn_key
,
detail_dict
in
register_info
.
items
():
rebuild_data_dict
.
setdefault
(
cn_key
,
[])
.
append
(
detail_dict
.
get
(
'words'
,
''
))
tmp_position_list
=
detail_dict
.
get
(
'position'
,
[
0
,
0
,
0
,
0
])
if
len
(
tmp_position_list
)
==
4
:
position_dict
[
cn_key
]
=
{
consts
.
FIELD_POSITION_KEY
:
{
'top'
:
tmp_position_list
[
1
],
'left'
:
tmp_position_list
[
0
],
'height'
:
tmp_position_list
[
-
1
],
'width'
:
tmp_position_list
[
2
],
}
}
else
:
for
cn_key
,
detail_dict
in
mvc_res
.
items
():
rebuild_data_dict
[
cn_key
]
=
detail_dict
.
get
(
'words'
,
''
)
position_list
=
detail_dict
.
get
(
'position'
,
[
0
,
0
,
0
,
0
])
if
len
(
position_list
)
==
4
:
position_dict
[
cn_key
]
=
{
consts
.
FIELD_POSITION_KEY
:
{
'top'
:
position_list
[
1
],
'left'
:
position_list
[
0
],
'height'
:
position_list
[
-
1
],
'width'
:
position_list
[
2
],
}
}
del
mvc_res
rebuild_data_dict
[
consts
.
ALL_POSITION_KEY
]
=
position_dict
license_summary
.
setdefault
(
classify
,
[])
.
append
(
rebuild_data_dict
)
...
...
@@ -395,6 +479,7 @@ class Command(BaseCommand, LoggerMixin):
# 身份证真伪
elif
classify
==
consts
.
IC_CLASSIFY
:
id_card_dict
=
{}
position_dict
=
{}
card_type
=
license_data
.
get
(
'type'
,
''
)
is_ic
=
card_type
.
startswith
(
'身份证'
)
is_info_side
=
card_type
.
endswith
(
'信息面'
)
...
...
@@ -405,22 +490,40 @@ class Command(BaseCommand, LoggerMixin):
field_map
=
consts
.
RP_MAP_0
if
is_info_side
else
consts
.
RP_MAP_1
for
write_field
,
search_field
in
field_map
:
id_card_dict
[
write_field
]
=
license_data
.
get
(
'words_result'
,
{})
.
get
(
search_field
,
{})
.
get
(
'words'
,
''
)
location_list
=
license_data
.
get
(
'words_result'
,
{})
.
get
(
search_field
,
{})
.
get
(
'location'
,
[
-
1
,
-
1
,
-
1
,
-
1
])
if
len
(
location_list
)
==
4
:
position_dict
[
write_field
]
=
{
consts
.
FIELD_POSITION_KEY
:
{
'top'
:
location_list
[
1
],
'left'
:
location_list
[
0
],
'height'
:
location_list
[
-
1
]
-
location_list
[
1
],
'width'
:
location_list
[
2
]
-
location_list
[
0
]
}
}
if
not
is_info_side
:
start_time
=
license_data
.
get
(
'words_result'
,
{})
.
get
(
'签发日期'
,
{})
.
get
(
'words'
,
''
)
end_time
=
license_data
.
get
(
'words_result'
,
{})
.
get
(
'失效日期'
,
{})
.
get
(
'words'
,
''
)
id_card_dict
[
'有效期限'
]
=
'{0}-{1}'
.
format
(
start_time
,
end_time
)
end_time_location_list
=
license_data
.
get
(
'words_result'
,
{})
.
get
(
'失效日期'
,
{})
.
get
(
'location'
,
[
-
1
,
-
1
,
-
1
,
-
1
])
if
len
(
end_time_location_list
)
==
4
:
position_dict
[
'有效期限'
]
=
{
consts
.
FIELD_POSITION_KEY
:
{
'top'
:
end_time_location_list
[
1
],
'left'
:
end_time_location_list
[
0
],
'height'
:
end_time_location_list
[
-
1
]
-
end_time_location_list
[
1
],
'width'
:
end_time_location_list
[
2
]
-
end_time_location_list
[
0
]
}
}
id_card_dict
[
consts
.
ALL_POSITION_KEY
]
=
position_dict
id_card_dict
[
consts
.
SECTION_IMG_PATH_KEY
]
=
section_img_path
if
not
is_info_side
:
id_card_dict
[
consts
.
IMG_PATH_KEY_2
]
=
img_path
else
:
id_card_dict
[
consts
.
IMG_PATH_KEY
]
=
img_path
if
is_ic
:
try
:
base64_img
=
license_data
.
pop
(
'base64_img'
)
except
Exception
as
e
:
pass
else
:
if
is_ic
and
is_save
:
card_type
=
-
1
json_data_4
=
{
'mode'
:
1
,
...
...
@@ -460,23 +563,33 @@ class Command(BaseCommand, LoggerMixin):
'{0} [ocr_4 failed] [img_path={1}]'
.
format
(
self
.
log_base
,
img_path
))
id_card_dict
[
consts
.
IC_TURE_OR_FALSE
]
=
consts
.
IC_RES_MAPPING
.
get
(
card_type
)
finally
:
if
do_dda
and
isinstance
(
id_card_dict
.
get
(
consts
.
IC_KEY_FIELD
[
0
]),
str
)
and
isinstance
(
id_card_dict
.
get
(
consts
.
IC_KEY_FIELD
[
1
]),
str
):
ic_name
=
id_card_dict
[
consts
.
IC_KEY_FIELD
[
0
]]
.
strip
()
ic_id
=
id_card_dict
[
consts
.
IC_KEY_FIELD
[
1
]]
.
strip
()
if
do_dda
and
isinstance
(
id_card_dict
.
get
(
consts
.
IC_KEY_FIELD
[
0
]),
str
)
and
\
isinstance
(
id_card_dict
.
get
(
consts
.
IC_KEY_FIELD
[
1
]),
str
):
ic_name
=
id_card_dict
.
get
(
consts
.
IC_KEY_FIELD
[
0
],
''
)
.
strip
()
ic_id
=
id_card_dict
.
get
(
consts
.
IC_KEY_FIELD
[
1
],
''
)
.
strip
()
if
len
(
ic_name
)
>
0
and
len
(
ic_id
)
>
0
:
dda_id_bc_mapping
.
setdefault
(
consts
.
IC_FIELD
,
[])
.
append
((
ic_name
,
ic_id
,
img_path
))
license_summary
.
setdefault
(
classify
,
[])
.
append
(
id_card_dict
)
# 购车发票 & 二手车发票
elif
classify
==
consts
.
MVI_CLASSIFY
or
classify
==
consts
.
UCI_CLASSIFY
:
rebuild_data_dict
=
{}
position_dict
=
{}
mvi_res
=
license_data
.
pop
(
'result'
,
{})
for
en_key
,
detail_dict
in
mvi_res
.
items
():
rebuild_data_dict
[
detail_dict
.
get
(
'chinese_key'
,
''
)]
=
detail_dict
.
get
(
'words'
,
''
)
position_dict
[
detail_dict
.
get
(
'chinese_key'
,
''
)]
=
{
consts
.
FIELD_POSITION_KEY
:
detail_dict
.
get
(
'position'
,
{})
}
rebuild_data_dict
[
consts
.
IMG_PATH_KEY
]
=
img_path
rebuild_data_dict
[
consts
.
SECTION_IMG_PATH_KEY
]
=
section_img_path
rebuild_data_dict
[
consts
.
ALL_POSITION_KEY
]
=
position_dict
license_summary
.
setdefault
(
classify
,
[])
.
append
(
rebuild_data_dict
)
# 其他
else
:
for
res_dict
in
license_data
:
res_dict
[
consts
.
IMG_PATH_KEY
]
=
img_path
res_dict
[
consts
.
SECTION_IMG_PATH_KEY
]
=
section_img_path
license_summary
.
setdefault
(
classify
,
[])
.
extend
(
license_data
)
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_SUCCESS
))
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment