Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
3d620b3b
authored
2022-03-14 11:28:11 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
add slice
1 parent
84d79e06
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
120 additions
and
11 deletions
src/apps/doc/management/commands/ocr_process.py
src/celery_compare/tasks.py
src/common/electronic_hil_contract/get_char.py
src/apps/doc/management/commands/ocr_process.py
View file @
3d620b3
...
...
@@ -250,6 +250,7 @@ class Command(BaseCommand, LoggerMixin):
page_num_only
=
page_num
rebuild_page_info
=
[]
text_key
=
'words'
position_key
=
'position'
for
key
,
value
in
contract_dict
.
get
(
'page_info'
,
{})
.
items
():
if
value
is
None
:
rebuild_page_info
.
append
((
key
,
))
...
...
@@ -279,11 +280,17 @@ class Command(BaseCommand, LoggerMixin):
contract_result
.
setdefault
(
classify
,
dict
())
.
setdefault
(
page_num_only
,
[])
.
append
(
rebuild_page_info
)
page_compare_dict
=
{}
page_compare_dict
=
{
consts
.
IMG_PATH_KEY
:
img_path
,
consts
.
ALL_POSITION_KEY
:
{},
}
for
key
,
value
in
contract_dict
.
get
(
'page_info'
,
{})
.
items
():
if
not
isinstance
(
value
,
dict
):
continue
elif
text_key
in
value
:
position_list
=
value
.
get
(
position_key
,
[])
page_compare_dict
[
consts
.
ALL_POSITION_KEY
][
key
]
=
position_list
if
isinstance
(
position_list
,
list
)
else
[]
if
value
[
text_key
]
is
None
:
page_compare_dict
[
key
]
=
''
elif
isinstance
(
value
[
text_key
],
str
):
...
...
@@ -292,16 +299,47 @@ class Command(BaseCommand, LoggerMixin):
page_compare_dict
[
key
]
=
value
[
text_key
]
else
:
page_compare_dict
[
key
]
=
{}
page_compare_dict
[
consts
.
ALL_POSITION_KEY
][
key
]
=
{}
for
sub_key
,
sub_value
in
value
.
items
():
position_list
=
sub_value
.
get
(
position_key
,
[])
page_compare_dict
[
consts
.
ALL_POSITION_KEY
][
key
][
sub_key
]
=
position_list
if
isinstance
(
position_list
,
list
)
else
[]
if
sub_value
[
text_key
]
is
None
:
page_compare_dict
[
key
][
sub_key
]
=
''
elif
isinstance
(
sub_value
[
text_key
],
str
):
page_compare_dict
[
key
][
sub_key
]
=
sub_value
[
text_key
]
page_compare_dict
[
consts
.
IMG_PATH_KEY
]
=
img_path
contract_result_compare
.
setdefault
(
classify
,
dict
())[
consts
.
ASP_KEY
]
=
contract_dict
.
get
(
consts
.
ASP_KEY
,
False
)
# "position" = [xmin, ymin, xmax, ymax]
contract_result_compare
.
setdefault
(
classify
,
dict
())[
page_num_only
]
=
page_compare_dict
@staticmethod
def
rebuild_position
(
src_position
):
# 'position': {'left': 470, 'top': 671, 'right': 542, 'bottom': 694}
# 'width'='right-left', 'height'='bottom-top'
# 'position': {'left': 470, 'top': 671, 'width': 542, 'height': 694}
try
:
left
=
src_position
.
get
(
'left'
,
0
)
top
=
src_position
.
get
(
'top'
,
0
)
right
=
src_position
.
get
(
'right'
,
0
)
bottom
=
src_position
.
get
(
'bottom'
,
0
)
width
=
right
-
left
height
=
bottom
-
top
return
{
'left'
:
left
,
'top'
:
top
,
'width'
:
width
,
'height'
:
height
,
}
except
Exception
as
e
:
return
{
'left'
:
0
,
'top'
:
0
,
'width'
:
0
,
'height'
:
0
,
}
def
license1_process
(
self
,
ocr_data
,
license_summary
,
classify
,
res_list
,
pno
,
ino
,
part_idx
,
img_path
,
do_dda
,
dda_id_bc_mapping
):
# 类别:'0'身份证, '1'居住证
...
...
@@ -329,6 +367,7 @@ class Command(BaseCommand, LoggerMixin):
# 保单
if
classify
==
consts
.
INSURANCE_CLASSIFY
:
product_result
=
[
''
,
''
,
''
]
product_result_position
=
[
dict
(),
dict
(),
dict
()]
min_char_count_1
=
1000
min_char_count_2
=
1000
for
product
in
license_data
.
get
(
'result'
,
{})
.
get
(
'productList'
,
[]):
...
...
@@ -338,10 +377,16 @@ class Command(BaseCommand, LoggerMixin):
min_char_count_1
=
len
(
name
)
product_result
[
0
]
=
product
.
get
(
'coverage'
,
{})
.
get
(
'words'
,
''
)
product_result
[
2
]
=
product
.
get
(
'deductible_franchise'
,
{})
.
get
(
'words'
,
''
)
product_result_position
[
0
]
=
self
.
rebuild_position
(
product
.
get
(
'coverage'
,
{})
.
get
(
'position'
,
{}))
product_result_position
[
2
]
=
self
.
rebuild_position
(
product
.
get
(
'deductible_franchise'
,
{})
.
get
(
'position'
,
{}))
elif
name
.
find
(
'第三者责任'
)
!=
-
1
:
if
len
(
name
)
<
min_char_count_2
:
min_char_count_2
=
len
(
name
)
product_result
[
1
]
=
product
.
get
(
'coverage'
,
{})
.
get
(
'words'
,
''
)
product_result_position
[
1
]
=
self
.
rebuild_position
(
product
.
get
(
'coverage'
,
{})
.
get
(
'position'
,
{}))
special_str
=
license_data
.
get
(
'result'
,
{})
.
get
(
'1stBeneficiary'
,
{})
.
get
(
'words'
,
''
)
special
=
'无'
...
...
@@ -362,11 +407,29 @@ class Command(BaseCommand, LoggerMixin):
consts
.
IMG_PATH_KEY
:
img_path
,
consts
.
SECTION_IMG_PATH_KEY
:
section_img_path
,
}
# 'position': {'left': 470, 'top': 671, 'right': 542, 'bottom': 694}
# position_dict = {
# '被保险人姓名': {consts.FIELD_POSITION_KEY: {}}
# }
# insurance_ocr_result[consts.ALL_POSITION_KEY] = position_dict
position_dict
=
{
'被保险人姓名'
:
{
consts
.
FIELD_POSITION_KEY
:
self
.
rebuild_position
(
license_data
.
get
(
'result'
,
{})
.
get
(
'insured'
,
{})
.
get
(
'name'
,
{})
.
get
(
'position'
,
{}))},
'被保险人证件号码'
:
{
consts
.
FIELD_POSITION_KEY
:
self
.
rebuild_position
(
license_data
.
get
(
'result'
,
{})
.
get
(
'insured'
,
{})
.
get
(
'certiCode'
,
{})
.
get
(
'position'
,
{}))},
'车架号'
:
{
consts
.
FIELD_POSITION_KEY
:
self
.
rebuild_position
(
license_data
.
get
(
'result'
,
{})
.
get
(
'vehicle'
,
{})
.
get
(
'VIN'
,
{})
.
get
(
'position'
,
{}))},
'机动车损失保险金额'
:
{
consts
.
FIELD_POSITION_KEY
:
product_result_position
[
0
]},
'机动车第三者责任保险金额'
:
{
consts
.
FIELD_POSITION_KEY
:
product_result_position
[
1
]},
'机动车损失保险绝对免赔率/绝对免赔额'
:
{
consts
.
FIELD_POSITION_KEY
:
product_result_position
[
2
]},
'保险费合计'
:
{
consts
.
FIELD_POSITION_KEY
:
self
.
rebuild_position
(
license_data
.
get
(
'result'
,
{})
.
get
(
'premiumSum'
,
{})
.
get
(
'position'
,
{}))},
'保险起始日期'
:
{
consts
.
FIELD_POSITION_KEY
:
self
.
rebuild_position
(
license_data
.
get
(
'result'
,
{})
.
get
(
'startDate'
,
{})
.
get
(
'position'
,
{}))},
'保险截止日期'
:
{
consts
.
FIELD_POSITION_KEY
:
self
.
rebuild_position
(
license_data
.
get
(
'result'
,
{})
.
get
(
'endDate'
,
{})
.
get
(
'position'
,
{}))},
'保单章'
:
{
consts
.
FIELD_POSITION_KEY
:
self
.
rebuild_position
(
license_data
.
get
(
'result'
,
{})
.
get
(
'seal'
,
{})
.
get
(
'position'
,
{}))},
'特别约定第一受益人'
:
{
consts
.
FIELD_POSITION_KEY
:
self
.
rebuild_position
(
license_data
.
get
(
'result'
,
{})
.
get
(
'1stBeneficiary'
,
{})
.
get
(
'position'
,
{}))},
}
insurance_ocr_result
[
consts
.
ALL_POSITION_KEY
]
=
position_dict
license_summary
.
setdefault
(
classify
,
[])
.
append
(
insurance_ocr_result
)
# DDA
elif
classify
==
consts
.
DDA_CLASSIFY
:
...
...
@@ -873,11 +936,24 @@ class Command(BaseCommand, LoggerMixin):
res
[
key
]
=
page_info_dict
.
get
(
str
(
pno
),
{})
.
get
(
key1
,
''
)
res
.
setdefault
(
consts
.
IMG_PATH_KEY
,
dict
())[
key
]
=
page_info_dict
.
get
(
str
(
pno
),
{})
.
get
(
consts
.
IMG_PATH_KEY
,
''
)
res
.
setdefault
(
consts
.
ALL_POSITION_KEY
,
dict
())[
key
]
=
page_info_dict
.
get
(
str
(
pno
),
{})
.
get
(
consts
.
ALL_POSITION_KEY
,
{})
.
get
(
key1
,
[])
else
:
res
[
key
]
=
page_info_dict
.
get
(
str
(
pno
),
{})
.
get
(
key1
,
{})
.
get
(
key2
,
''
)
res
.
setdefault
(
consts
.
IMG_PATH_KEY
,
dict
())[
key
]
=
page_info_dict
.
get
(
str
(
pno
),
{})
.
get
(
consts
.
IMG_PATH_KEY
,
''
)
res
.
setdefault
(
consts
.
ALL_POSITION_KEY
,
dict
())[
key
]
=
page_info_dict
.
get
(
str
(
pno
),
{})
.
get
(
consts
.
ALL_POSITION_KEY
,
{})
.
get
(
key1
,
{})
.
get
(
key2
,
[])
# res = {
# 'key': 'list or str',
# 'uniq_img_path_key': {
# 'key': 'str',
# },
# 'uniq_all_position_key': {
# 'key': 'list'
# }
# }
license_summary
[
classify
]
=
[
res
]
else
:
res
=
{}
...
...
src/celery_compare/tasks.py
View file @
3d620b3
...
...
@@ -2405,11 +2405,13 @@ def se_contract_compare(license_en, ocr_res_dict, strip_list, is_gsyh):
result_field_list
=
[]
field_img_path_dict
=
dict
()
ocr_res
=
dict
()
if
ocr_res_str
is
not
None
:
ocr_res_list
=
json
.
loads
(
ocr_res_str
)
ocr_res
=
ocr_res_list
.
pop
()
for
name
,
value
in
strip_list
:
# 购置税校验
if
name
==
consts
.
SE_AFC_CON_FIELD
[
21
]:
if
len
(
value
)
==
3
:
reason
=
[]
...
...
@@ -2471,6 +2473,29 @@ def se_contract_compare(license_en, ocr_res_dict, strip_list, is_gsyh):
result_field_list
.
append
((
name
,
value
,
consts
.
RESULT_N
,
empty_str
,
empty_str
,
ErrorType
.
NF
.
value
,
'{0}未找到'
.
format
(
license_en
)))
if
ocr_res_str
is
not
None
:
img_map
=
{}
for
name
,
_
,
result
,
_
,
img_path
,
_
,
_
in
result_field_list
:
if
result
==
consts
.
RESULT_N
:
img_map
.
setdefault
(
img_path
,
[])
.
append
(
name
)
for
path
,
field_list
in
img_map
.
items
():
if
os
.
path
.
exists
(
path
):
pre
,
suf
=
os
.
path
.
splitext
(
path
)
last_img
=
cv2
.
imread
(
path
)
for
field_idx
,
field
in
enumerate
(
field_list
):
try
:
save_path
=
'{0}_{1}{2}'
.
format
(
pre
,
str
(
field_idx
),
suf
)
section_position_list
=
ocr_res
.
get
(
consts
.
ALL_POSITION_KEY
,
{})
.
get
(
field
,
[])
if
isinstance
(
section_position_list
,
list
)
and
len
(
section_position_list
)
==
4
:
field_img
=
last_img
[
section_position_list
[
1
]:
section_position_list
[
3
],
section_position_list
[
0
]:
section_position_list
[
2
],
:]
cv2
.
imwrite
(
save_path
,
field_img
)
field_img_path_dict
[
field
]
=
save_path
else
:
field_img_path_dict
[
field
]
=
path
except
Exception
as
e
:
field_img_path_dict
[
field
]
=
path
return
result_field_list
,
field_img_path_dict
...
...
src/common/electronic_hil_contract/get_char.py
View file @
3d620b3
...
...
@@ -788,6 +788,7 @@ class Finder:
items
=
[]
start
=
False
page
=
None
greater_equal_v35
=
False
for
pno
in
self
.
pdf_info
:
condition
=
False
for
block
in
self
.
pdf_info
[
f
'{pno}'
][
'blocks'
]:
...
...
@@ -796,6 +797,8 @@ class Finder:
for
line
in
block
[
'lines'
]:
for
span
in
line
[
'spans'
]:
bbox
,
text
=
span
[
'bbox'
],
span
[
'text'
]
if
text
==
'租赁利率'
:
greater_equal_v35
=
True
if
'总计'
in
text
:
start
=
True
if
'注:出租人向承租人购买租赁车辆的对价'
in
text
:
...
...
@@ -804,9 +807,14 @@ class Finder:
if
start
==
True
:
items
.
append
(
text
)
lines
=
[[
'项目'
,
'购买价格'
,
'实际融资金额'
]]
for
i
in
range
(
len
(
items
)
//
3
):
line
=
[
items
[
2
+
i
*
3
+
0
],
items
[
2
+
i
*
3
+
1
],
items
[
2
+
i
*
3
+
2
]]
lines
.
append
(
line
)
if
greater_equal_v35
:
for
i
in
range
(
len
(
items
)
//
4
):
line
=
[
items
[
2
+
i
*
4
+
0
],
items
[
2
+
i
*
4
+
1
],
items
[
2
+
i
*
4
+
2
]]
lines
.
append
(
line
)
else
:
for
i
in
range
(
len
(
items
)
//
3
):
line
=
[
items
[
2
+
i
*
3
+
0
],
items
[
2
+
i
*
3
+
1
],
items
[
2
+
i
*
3
+
2
]]
lines
.
append
(
line
)
if
len
(
items
)
>
0
:
lines
.
append
([
items
[
0
],
''
,
items
[
1
]])
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment