Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
fdb7ca98
authored
2021-11-11 17:43:45 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
fix format
1 parent
87525e99
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
46 additions
and
14 deletions
src/common/electronic_afc_contract/get_char.py
src/common/electronic_afc_contract/get_char.py
View file @
fdb7ca9
...
...
@@ -18,6 +18,7 @@ class Finder:
self
.
item
=
{
"words"
:
None
,
"position"
:
None
,
}
def
gen_init_result
(
self
,
is_asp
):
# 格式化算法输出
self
.
init_result
=
{
"page_1"
:
{
"合同编号"
:
self
.
item
,
...
...
@@ -108,10 +109,12 @@ class Finder:
"日期"
:
self
.
item
,
},
}
def
poly_to_rectangle
(
self
,
poly
):
xmin
,
ymin
,
xmax
,
ymin
,
xmax
,
ymax
,
xmin
,
ymax
=
poly
bbox
=
[
xmin
,
ymin
,
xmax
,
ymax
]
return
bbox
def
get_contract_no
(
self
,
page_num
):
"""传入页码,查看该页码右上角的编号
...
...
@@ -133,6 +136,7 @@ class Finder:
contract_no
[
'words'
]
=
words
contract_no
[
'position'
]
=
location
return
contract_no
def
get_vehicle_price
(
self
,
page_num
=
'0'
):
vehicle_price
=
self
.
item
.
copy
()
# vehicle_price['words'] = ''
...
...
@@ -145,6 +149,7 @@ class Finder:
vehicle_price
[
'words'
]
=
words
vehicle_price
[
'position'
]
=
location
return
vehicle_price
def
get_vin
(
self
,
page_num
=
'0'
):
vin
=
self
.
item
.
copy
()
# vin['words'] = ''
...
...
@@ -157,6 +162,7 @@ class Finder:
vin
[
'words'
]
=
words
vin
[
'position'
]
=
location
return
vin
def
get_loan_principal
(
self
,
page_num
=
'0'
):
chinese_keywords
=
[
'壹'
,
'贰'
,
'叁'
,
'肆'
,
'伍'
,
'陆'
,
'柒'
,
'捌'
,
'玖'
,
'拾'
,
'佰'
,
'仟'
,
'万'
,
'亿'
,
'元'
,
'角'
,
'分'
,
'零'
,
'整'
]
...
...
@@ -197,6 +203,7 @@ class Finder:
asp_2
[
'position'
]
=
bbox
asp_2
[
'words'
]
=
words
return
upper
,
lower
,
asp_1
,
asp_2
def
get_loan_term
(
self
,
page_num
=
'0'
):
loan_term
=
self
.
item
.
copy
()
all_text
=
''
...
...
@@ -220,6 +227,7 @@ class Finder:
loan_term
[
'position'
]
=
bbox
loan_term
[
'words'
]
=
words
return
loan_term
def
mergelist
(
self
,
text_list
):
pattern
=
re
.
compile
(
"[^
\u4e00
-
\u9fa5
]"
)
# 匹配不是中文的其他字符
mergeindex
=
-
1
...
...
@@ -230,8 +238,10 @@ class Finder:
if
mergeindex
==
-
1
:
return
text_list
else
:
new_text_list
=
text_list
[:
mergeindex
]
+
[
text_list
[
mergeindex
]
+
text_list
[
mergeindex
+
1
]]
+
text_list
[
mergeindex
+
2
:]
new_text_list
=
text_list
[:
mergeindex
]
+
[
text_list
[
mergeindex
]
+
text_list
[
mergeindex
+
1
]]
+
text_list
[
mergeindex
+
2
:]
return
self
.
mergelist
(
new_text_list
)
def
get_asp_details
(
self
,
page_num
):
asp_details_table_term
=
self
.
item
.
copy
()
asp_details_table
=
[]
...
...
@@ -262,6 +272,7 @@ class Finder:
if
len
(
asp_details_table
)
>
0
:
asp_details_table_term
[
'words'
]
=
asp_details_table
return
asp_details_table_term
def
get_signature
(
self
):
signature
=
self
.
item
.
copy
()
for
block
in
self
.
pdf_info
[
'0'
][
'blocks'
]:
...
...
@@ -275,6 +286,7 @@ class Finder:
signature
[
'words'
]
=
words
signature
[
'position'
]
=
bbox
return
signature
def
get_somebody
(
self
,
top
,
bottom
):
# 指定上下边界后,返回上下边界内的客户信息
_name
=
self
.
item
.
copy
()
...
...
@@ -309,6 +321,7 @@ class Finder:
_id
[
'position'
]
=
bbox
_id
[
'words'
]
=
words
return
_name
,
_id
def
get_seller
(
self
):
seller
=
self
.
item
.
copy
()
# 先找到 key
...
...
@@ -330,11 +343,12 @@ class Finder:
for
line
in
block
[
'lines'
]:
for
span
in
line
[
'spans'
]:
bbox
,
text
=
span
[
'bbox'
],
span
[
'text'
]
if
anchor_bbox
[
2
]
<
np
.
mean
(
bbox
[::
2
])
<
half_width
and
\
anchor_bbox
[
1
]
<
np
.
mean
(
bbox
[
1
::
2
])
<
anchor_bbox
[
3
]:
if
anchor_bbox
[
2
]
<
np
.
mean
(
bbox
[::
2
])
<
half_width
and
\
anchor_bbox
[
1
]
<
np
.
mean
(
bbox
[
1
::
2
])
<
anchor_bbox
[
3
]:
seller
[
'position'
]
=
bbox
seller
[
'words'
]
=
text
return
seller
def
get_payback_account
(
self
):
account
=
self
.
item
.
copy
()
account_name
=
self
.
item
.
copy
()
...
...
@@ -387,6 +401,7 @@ class Finder:
account_bank
[
'position'
]
=
bbox
account_bank
[
'words'
]
=
words
return
account
,
account_name
,
account_bank
def
get_repayment_schedule
(
self
):
repayment_schedule
=
self
.
item
.
copy
()
# 只看第二页
...
...
@@ -416,6 +431,7 @@ class Finder:
if
len
(
repayment_schedule_table
)
>
0
:
repayment_schedule
[
'words'
]
=
repayment_schedule_table
return
repayment_schedule
def
get_signature_role_1
(
self
):
signature_role_1
=
self
.
init_item
.
copy
()
# 先定位签字区域
...
...
@@ -445,11 +461,13 @@ class Finder:
else
:
words
=
'无'
boxes
=
np
.
array
(
boxes
)
.
reshape
((
-
1
,
2
))
position
=
[
min
(
boxes
[:,
0
]),
min
(
boxes
[:,
1
]),
max
(
boxes
[:,
0
]),
max
(
boxes
[:,
1
])]
position
=
[
min
(
boxes
[:,
0
]),
min
(
boxes
[:,
1
]),
max
(
boxes
[:,
0
]),
max
(
boxes
[:,
1
])]
signature_role_1
[
'page_num'
]
=
page_num
signature_role_1
[
'position'
]
=
position
signature_role_1
[
'words'
]
=
words
return
signature_role_1
def
get_signature_role_2
(
self
):
signature_role_2
=
self
.
init_item
.
copy
()
# 先定位签字区域
...
...
@@ -479,11 +497,13 @@ class Finder:
else
:
words
=
'无'
boxes
=
np
.
array
(
boxes
)
.
reshape
((
-
1
,
2
))
position
=
[
min
(
boxes
[:,
0
]),
min
(
boxes
[:,
1
]),
max
(
boxes
[:,
0
]),
max
(
boxes
[:,
1
])]
position
=
[
min
(
boxes
[:,
0
]),
min
(
boxes
[:,
1
]),
max
(
boxes
[:,
0
]),
max
(
boxes
[:,
1
])]
signature_role_2
[
'page_num'
]
=
page_num
signature_role_2
[
'position'
]
=
position
signature_role_2
[
'words'
]
=
words
return
signature_role_2
def
get_signature_role_3
(
self
):
signature_role_3
=
self
.
init_item
.
copy
()
# 先定位签字区域
...
...
@@ -513,11 +533,13 @@ class Finder:
else
:
words
=
'无'
boxes
=
np
.
array
(
boxes
)
.
reshape
((
-
1
,
2
))
position
=
[
min
(
boxes
[:,
0
]),
min
(
boxes
[:,
1
]),
max
(
boxes
[:,
0
]),
max
(
boxes
[:,
1
])]
position
=
[
min
(
boxes
[:,
0
]),
min
(
boxes
[:,
1
]),
max
(
boxes
[:,
0
]),
max
(
boxes
[:,
1
])]
signature_role_3
[
'page_num'
]
=
page_num
signature_role_3
[
'position'
]
=
position
signature_role_3
[
'words'
]
=
words
return
signature_role_3
def
get_signature_role_4
(
self
):
signature_role_4
=
self
.
init_item
.
copy
()
# 先定位签字区域
...
...
@@ -547,11 +569,13 @@ class Finder:
else
:
words
=
'无'
boxes
=
np
.
array
(
boxes
)
.
reshape
((
-
1
,
2
))
position
=
[
min
(
boxes
[:,
0
]),
min
(
boxes
[:,
1
]),
max
(
boxes
[:,
0
]),
max
(
boxes
[:,
1
])]
position
=
[
min
(
boxes
[:,
0
]),
min
(
boxes
[:,
1
]),
max
(
boxes
[:,
0
]),
max
(
boxes
[:,
1
])]
signature_role_4
[
'page_num'
]
=
page_num
signature_role_4
[
'position'
]
=
position
signature_role_4
[
'words'
]
=
words
return
signature_role_4
def
get_signature_role_5
(
self
):
signature_role_5
=
self
.
init_item
.
copy
()
# 先定位签字区域
...
...
@@ -582,11 +606,13 @@ class Finder:
else
:
words
=
'无'
boxes
=
np
.
array
(
boxes
)
.
reshape
((
-
1
,
2
))
position
=
[
min
(
boxes
[:,
0
]),
min
(
boxes
[:,
1
]),
max
(
boxes
[:,
0
]),
max
(
boxes
[:,
1
])]
position
=
[
min
(
boxes
[:,
0
]),
min
(
boxes
[:,
1
]),
max
(
boxes
[:,
0
]),
max
(
boxes
[:,
1
])]
signature_role_5
[
'page_num'
]
=
page_num
signature_role_5
[
'position'
]
=
position
signature_role_5
[
'words'
]
=
words
return
signature_role_5
def
get_last_page_signature
(
self
,
page_num
,
top
,
bottom
):
signature_name
=
self
.
item
.
copy
()
signature_date
=
self
.
item
.
copy
()
...
...
@@ -610,7 +636,7 @@ class Finder:
for
line
in
block
[
'lines'
]:
for
span
in
line
[
'spans'
]:
bbox
,
text
=
span
[
'bbox'
],
span
[
'text'
]
if
'签署日期'
in
text
and
int
(
anchor_top
)
<
np
.
mean
(
bbox
[
1
::
2
])
<
int
(
anchor_bottom
):
if
'签署日期'
in
text
and
int
(
anchor_top
)
<
np
.
mean
(
bbox
[
1
::
2
])
<
int
(
anchor_bottom
):
name
=
text
.
split
(
' '
)[
0
]
date
=
text
.
split
(
':'
)[
-
1
]
signature_name
[
'words'
]
=
name
...
...
@@ -618,6 +644,7 @@ class Finder:
signature_date
[
'words'
]
=
date
signature_date
[
'position'
]
=
bbox
return
signature_name
,
signature_date
def
get_info
(
self
):
"""
block['type'] == 0 : 表示该元素为图片
...
...
@@ -672,22 +699,27 @@ class Finder:
contract_no
=
self
.
get_contract_no
(
page_num
=
'0'
)
self
.
init_result
[
'page_2'
][
'合同编号'
]
=
contract_no
# 找借款人及抵押人(地址字段原本有空格)
borrower_name
,
borrower_id
=
self
.
get_somebody
(
top
=
'借款人及抵押人:'
,
bottom
=
'共同借款人:'
)
borrower_name
,
borrower_id
=
self
.
get_somebody
(
top
=
'借款人及抵押人:'
,
bottom
=
'共同借款人:'
)
# 这是为了同时兼容 8.1 版本
if
borrower_name
[
'words'
]
==
None
:
borrower_name
,
borrower_id
=
self
.
get_somebody
(
top
=
'借款人及抵押人:'
,
bottom
=
'共同借款人及共同抵押人:'
)
borrower_name
,
borrower_id
=
self
.
get_somebody
(
top
=
'借款人及抵押人:'
,
bottom
=
'共同借款人及共同抵押人:'
)
self
.
init_result
[
'page_2'
][
'借款人及抵押人'
][
'name'
]
=
borrower_name
self
.
init_result
[
'page_2'
][
'借款人及抵押人'
][
'id'
]
=
borrower_id
# 找共同借款人及共同抵押人
co_borrower_name
,
co_borrower_id
=
self
.
get_somebody
(
top
=
'共同借款人:'
,
bottom
=
'保证人1:'
)
co_borrower_name
,
co_borrower_id
=
self
.
get_somebody
(
top
=
'共同借款人:'
,
bottom
=
'保证人1:'
)
self
.
init_result
[
'page_2'
][
'共同借款人及共同抵押人'
][
'name'
]
=
co_borrower_name
self
.
init_result
[
'page_2'
][
'共同借款人及共同抵押人'
][
'id'
]
=
co_borrower_id
# 保证人1
first_guarantor_name
,
first_guarantor_id
=
self
.
get_somebody
(
top
=
'保证人1:'
,
bottom
=
'保证人2:'
)
first_guarantor_name
,
first_guarantor_id
=
self
.
get_somebody
(
top
=
'保证人1:'
,
bottom
=
'保证人2:'
)
self
.
init_result
[
'page_2'
][
'保证人1'
][
'name'
]
=
first_guarantor_name
self
.
init_result
[
'page_2'
][
'保证人1'
][
'id'
]
=
first_guarantor_id
# 保证人2
second_guarantor_name
,
second_guarantor_id
=
self
.
get_somebody
(
top
=
'保证人2:'
,
bottom
=
'第一章'
)
second_guarantor_name
,
second_guarantor_id
=
self
.
get_somebody
(
top
=
'保证人2:'
,
bottom
=
'第一章'
)
self
.
init_result
[
'page_2'
][
'保证人2'
][
'name'
]
=
second_guarantor_name
self
.
init_result
[
'page_2'
][
'保证人2'
][
'id'
]
=
second_guarantor_id
# 所购车辆价格
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment