Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
086507c6
authored
2021-11-01 18:37:58 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
rebuild result
1 parent
5c08056d
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
146 additions
and
31 deletions
src/apps/doc/consts.py
src/apps/doc/management/commands/ocr_process.py
src/apps/doc/consts.py
View file @
086507c
...
...
@@ -658,6 +658,13 @@ CLASSIFY_HEADER_LIST = [
OTHER_CLASSIFY
=
2
# 身份证
IC_MAP_0
=
((
'姓名'
,
'姓名'
),
(
'公民身份号码'
,
'公民身份号码'
),
(
'出生年月'
,
'出生'
),
(
'住址'
,
'住址'
),
(
'性别'
,
'性别'
),
(
'民族'
,
'民族'
),)
IC_MAP_1
=
((
'签发机关'
,
'签发机关'
),)
IC_CN_NAME
=
'身份证'
IC_CLASSIFY
=
33
IC_TURE_OR_FALSE
=
'真伪'
...
...
@@ -670,6 +677,12 @@ IC_FIELD_ORDER_0 = ((IC_TURE_OR_FALSE, '身份证'),
(
'民族'
,
'民族'
),)
IC_FIELD_ORDER_1
=
((
IC_TURE_OR_FALSE
,
'身份证'
),
(
'有效期限'
,
'有效期限'
),
(
'签发机关'
,
'签发机关'
),)
# 居住证
RP_MAP_0
=
((
'姓名'
,
'姓名'
),
(
'公民身份号码'
,
'公民身份号码'
),
(
'出生年月'
,
'出生'
),
(
'住址'
,
'住址'
),
(
'性别'
,
'性别'
),)
RP_MAP_1
=
((
'签发机关'
,
'签发机关'
),
(
'通行证号码'
,
'通行证号码'
))
RP_CN_NAME
=
'居住证'
RP_CLASSIFY
=
10087
RP_FIELD_ORDER_0
=
((
'姓名'
,
'姓名'
),
...
...
src/apps/doc/management/commands/ocr_process.py
View file @
086507c
...
...
@@ -246,6 +246,59 @@ class Command(BaseCommand, LoggerMixin):
# contract_result.setdefault(page_num, []).append(rebuild_page_info)
contract_result
.
setdefault
(
classify
,
dict
())
.
setdefault
(
page_num
,
[])
.
append
(
rebuild_page_info
)
# def rebuild_result(self, ocr_data, classify, img_path):
# license_data = ocr_data.get('data')
# if not license_data:
# return
# if classify == consts.IC_CLASSIFY:
# rebuild_data_dict = {}
# card_type = license_data.get('type', '')
# is_ic = card_type.startswith('身份证')
# is_info_side = card_type.endswith('信息面')
# rebuild_data_dict['类别'] = '0' if is_ic else '1'
# if is_ic:
# field_map = consts.IC_MAP_0 if is_info_side else consts.IC_MAP_1
# else:
# field_map = consts.RP_MAP_0 if is_info_side else consts.RP_MAP_1
# for write_field, search_field in field_map:
# rebuild_data_dict[write_field] = license_data.get('words_result', {}).get(search_field, {}).get('words', '')
# if not is_info_side:
# start_time = license_data.get('words_result', {}).get('签发日期', {}).get('words', '')
# end_time = license_data.get('words_result', {}).get('失效日期', {}).get('words', '')
# rebuild_data_dict['有效期限'] = '{0}-{1}'.format(start_time, end_time)
# return [rebuild_data_dict]
# elif classify == consts.MVC_CLASSIFY:
# # license_data[consts.IMG_PATH_KEY] = img_path
# rebuild_data_dict = {}
# mvc_page = license_data.pop('page', 'VehicleRCI')
# mvc_res = license_data.pop('results', {})
# if mvc_page == 'VehicleRegArea':
# rebuild_data_dict['机动车登记证书编号'] = mvc_res.get('机动车登记证书编号', {}).get('words', '')
# for register_info in mvc_res.get('登记信息', []):
# register_info.pop('register_type', None)
# register_info.pop('register_type_name', None)
# for cn_key, detail_dict in register_info.items():
# rebuild_data_dict.setdefault(cn_key, []).append(
# detail_dict.get('words', ''))
# else:
# for cn_key, detail_dict in mvc_res.items():
# rebuild_data_dict[cn_key] = detail_dict.get('words', '')
# del mvc_res
# return [rebuild_data_dict]
# elif classify == consts.MVI_CLASSIFY:
# rebuild_data_dict = {}
# mvi_res = license_data.pop('result', {})
# for en_key, detail_dict in mvi_res.items():
# rebuild_data_dict[detail_dict.get('chinese_key', '')] = detail_dict.get('words', '')
# return [rebuild_data_dict]
# elif classify == consts.UCI_CLASSIFY:
# rebuild_data_dict = {}
# mvi_res = license_data.pop('result', {})
# for en_key, detail_dict in mvi_res.items():
# rebuild_data_dict[detail_dict.get('chinese_key', '')] = detail_dict.get('words', '')
# return [rebuild_data_dict]
def
license1_process
(
self
,
ocr_data
,
license_summary
,
classify
,
res_list
,
pno
,
ino
,
part_idx
,
img_path
,
do_dda
,
dda_id_bc_mapping
):
# 类别:'0'身份证, '1'居住证
...
...
@@ -253,7 +306,8 @@ class Command(BaseCommand, LoggerMixin):
if
not
license_data
:
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_SUCCESS_EMPTY
))
return
if
classify
==
consts
.
INSURANCE_CLASSIFY
:
# 保单处理
# 保单
if
classify
==
consts
.
INSURANCE_CLASSIFY
:
product_result
=
[
''
,
''
,
''
]
for
product
in
license_data
.
get
(
'result'
,
{})
.
get
(
'productList'
,
[]):
name
=
product
.
get
(
'name'
,
{})
.
get
(
'words'
,
''
)
...
...
@@ -281,7 +335,8 @@ class Command(BaseCommand, LoggerMixin):
'特别约定第一受益人'
:
special
,
}
license_summary
.
setdefault
(
classify
,
[])
.
append
(
insurance_ocr_result
)
elif
classify
==
consts
.
DDA_CLASSIFY
:
# DDA处理
# DDA
elif
classify
==
consts
.
DDA_CLASSIFY
:
pro
=
ocr_data
.
get
(
'confidence'
,
0
)
if
pro
<
consts
.
DDA_PRO_MIN
:
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_SUCCESS_EMPTY
))
...
...
@@ -291,40 +346,80 @@ class Command(BaseCommand, LoggerMixin):
dda_ocr_result
[
consts
.
DDA_PRO
]
=
pro
dda_ocr_result
[
consts
.
IMG_PATH_KEY
]
=
img_path
license_summary
.
setdefault
(
classify
,
[])
.
append
(
dda_ocr_result
)
elif
classify
==
consts
.
HMH_CLASSIFY
:
# 抵押登记豁免函
hmh_ocr_result
=
{
key
:
value
.
get
(
'words'
,
''
)
for
key
,
value
in
license_data
.
items
()}
# 抵押登记豁免函
elif
classify
==
consts
.
HMH_CLASSIFY
:
hmh_ocr_result
=
{
key
:
value
.
get
(
'words'
,
''
)
for
key
,
value
in
license_data
.
get
(
'words_result'
,
{})
.
items
()}
license_summary
.
setdefault
(
classify
,
[])
.
append
(
hmh_ocr_result
)
elif
classify
==
consts
.
JYPZ_CLASSIFY
:
# 二手车交易凭证
# 二手车交易凭证
elif
classify
==
consts
.
JYPZ_CLASSIFY
:
jypz_ocr_result
=
{
key
:
value
.
get
(
'words'
,
''
)
for
key
,
value
in
license_data
.
get
(
'result'
,
{})
.
items
()}
license_summary
.
setdefault
(
classify
,
[])
.
append
(
jypz_ocr_result
)
elif
classify
==
consts
.
MVC_CLASSIFY
:
# 车辆登记证 3/4页结果整合
for
mvc_dict
in
license_data
:
mvc_dict
[
consts
.
IMG_PATH_KEY
]
=
img_path
try
:
mvc_page
=
mvc_dict
.
pop
(
'page'
)
except
Exception
as
e
:
pass
else
:
if
mvc_page
==
'VehicleRegArea'
:
mvc_res
=
mvc_dict
.
pop
(
'results'
,
{})
mvc_dict
[
'机动车登记证书编号'
]
=
mvc_res
.
get
(
'register_no'
,
{})
.
get
(
'words'
,
''
)
for
register_info
in
mvc_res
.
get
(
'register_info'
,
[]):
for
detail_dict
in
register_info
.
get
(
'details'
,
{})
.
values
():
mvc_dict
.
setdefault
(
detail_dict
.
get
(
'chinese_key'
,
'未知'
),
[])
.
append
(
detail_dict
.
get
(
'words'
,
''
))
del
mvc_res
license_summary
.
setdefault
(
classify
,
[])
.
extend
(
license_data
)
# 车辆登记证 3/4页结果整合
elif
classify
==
consts
.
MVC_CLASSIFY
:
rebuild_data_dict
=
{}
rebuild_data_dict
[
consts
.
IMG_PATH_KEY
]
=
img_path
mvc_page
=
license_data
.
pop
(
'page'
,
'VehicleRCI'
)
mvc_res
=
license_data
.
pop
(
'results'
,
{})
if
mvc_page
==
'VehicleRegArea'
:
rebuild_data_dict
[
'机动车登记证书编号'
]
=
mvc_res
.
get
(
'机动车登记证书编号'
,
{})
.
get
(
'words'
,
''
)
for
register_info
in
mvc_res
.
get
(
'登记信息'
,
[]):
register_info
.
pop
(
'register_type'
,
None
)
register_info
.
pop
(
'register_type_name'
,
None
)
for
cn_key
,
detail_dict
in
register_info
.
items
():
rebuild_data_dict
.
setdefault
(
cn_key
,
[])
.
append
(
detail_dict
.
get
(
'words'
,
''
))
else
:
for
cn_key
,
detail_dict
in
mvc_res
.
items
():
rebuild_data_dict
[
cn_key
]
=
detail_dict
.
get
(
'words'
,
''
)
del
mvc_res
license_summary
.
setdefault
(
classify
,
[])
.
append
(
rebuild_data_dict
)
# for mvc_dict in license_data:
# mvc_dict[consts.IMG_PATH_KEY] = img_path
# try:
# mvc_page = mvc_dict.pop('page')
# except Exception as e:
# pass
# else:
# if mvc_page == 'VehicleRegArea':
# mvc_res = mvc_dict.pop('results', {})
# mvc_dict['机动车登记证书编号'] = mvc_res.get('register_no', {}).get('words', '')
# for register_info in mvc_res.get('register_info', []):
# for detail_dict in register_info.get('details', {}).values():
# mvc_dict.setdefault(detail_dict.get('chinese_key', '未知'), []).append(
# detail_dict.get('words', ''))
# del mvc_res
# license_summary.setdefault(classify, []).extend(license_data)
# 身份证真伪
elif
classify
==
consts
.
IC_CLASSIFY
:
id_card_dict
=
{}
card_type
=
license_data
.
get
(
'type'
,
''
)
is_ic
=
card_type
.
startswith
(
'身份证'
)
is_info_side
=
card_type
.
endswith
(
'信息面'
)
id_card_dict
[
'类别'
]
=
'0'
if
is_ic
else
'1'
if
is_ic
:
field_map
=
consts
.
IC_MAP_0
if
is_info_side
else
consts
.
IC_MAP_1
else
:
field_map
=
consts
.
RP_MAP_0
if
is_info_side
else
consts
.
RP_MAP_1
for
write_field
,
search_field
in
field_map
:
id_card_dict
[
write_field
]
=
license_data
.
get
(
'words_result'
,
{})
.
get
(
search_field
,
{})
.
get
(
'words'
,
''
)
if
not
is_info_side
:
start_time
=
license_data
.
get
(
'words_result'
,
{})
.
get
(
'签发日期'
,
{})
.
get
(
'words'
,
''
)
end_time
=
license_data
.
get
(
'words_result'
,
{})
.
get
(
'失效日期'
,
{})
.
get
(
'words'
,
''
)
id_card_dict
[
'有效期限'
]
=
'{0}-{1}'
.
format
(
start_time
,
end_time
)
elif
classify
==
consts
.
IC_CLASSIFY
:
# 身份证真伪
for
id_card_dict
in
license_data
:
i
f
'有效期限'
in
id_card_dict
:
id_card_dict
[
consts
.
IMG_PATH_KEY_2
]
=
img_path
else
:
id_card_dict
[
consts
.
IMG_PATH_KEY
]
=
img_path
if
not
is_info_side
:
i
d_card_dict
[
consts
.
IMG_PATH_KEY_2
]
=
img_path
else
:
id_card_dict
[
consts
.
IMG_PATH_KEY
]
=
img_path
if
is_ic
:
try
:
base64_img
=
id_card_dict
.
pop
(
'base64_img'
)
base64_img
=
license_data
.
pop
(
'base64_img'
)
except
Exception
as
e
:
continue
pass
else
:
card_type
=
-
1
json_data_4
=
{
...
...
@@ -371,7 +466,14 @@ class Command(BaseCommand, LoggerMixin):
ic_id
=
id_card_dict
[
consts
.
IC_KEY_FIELD
[
1
]]
.
strip
()
if
len
(
ic_name
)
>
0
and
len
(
ic_id
)
>
0
:
dda_id_bc_mapping
.
setdefault
(
consts
.
IC_FIELD
,
[])
.
append
((
ic_name
,
ic_id
,
img_path
))
license_summary
.
setdefault
(
classify
,
[])
.
extend
(
license_data
)
license_summary
.
setdefault
(
classify
,
[])
.
append
(
id_card_dict
)
elif
classify
==
consts
.
MVI_CLASSIFY
or
classify
==
consts
.
UCI_CLASSIFY
:
rebuild_data_dict
=
{}
mvi_res
=
license_data
.
pop
(
'result'
,
{})
for
en_key
,
detail_dict
in
mvi_res
.
items
():
rebuild_data_dict
[
detail_dict
.
get
(
'chinese_key'
,
''
)]
=
detail_dict
.
get
(
'words'
,
''
)
license_summary
.
setdefault
(
classify
,
[])
.
append
(
rebuild_data_dict
)
# 其他
else
:
for
res_dict
in
license_data
:
res_dict
[
consts
.
IMG_PATH_KEY
]
=
img_path
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment