Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
b9e0884e
authored
2023-02-08 17:11:17 +0800
by
冯轩
Browse Files
Options
Browse Files
Tag
Download
Plain Diff
Merge branch 'feature/fsm-contract' into feature/uat-tmp
2 parents
3474c20c
4c67581e
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
126 additions
and
19 deletions
src/common/electronic_afc_contract/get_char_fsm.py
src/common/electronic_hil_contract/get_char_fsm.py
src/common/fsm_econtract/retriever.py
src/common/electronic_afc_contract/get_char_fsm.py
View file @
b9e0884
...
...
@@ -62,6 +62,7 @@ class Finder:
},
"page_3"
:
{
"合同编号"
:
self
.
item
,
"还款计划表"
:
self
.
item
,
"车辆代理商"
:
self
.
item
,
},
"page_4"
:
{
"合同编号"
:
self
.
item
,
"附加产品融资贷款本金总金额明细"
:
self
.
item
,
...
...
@@ -71,6 +72,7 @@ class Finder:
"page_6"
:
{
"合同编号"
:
self
.
item
,
},
}
if
self
.
is_asp
:
self
.
init_result
[
"page_7"
]
=
{
"合同编号"
:
self
.
item
,
}
self
.
init_result
[
"page_8"
]
=
{
"合同编号"
:
self
.
item
,
...
...
@@ -90,6 +92,25 @@ class Finder:
"日期"
:
self
.
item
,
},
}
else
:
self
.
init_result
[
"page_7"
]
=
{
"合同编号"
:
self
.
item
,
"主借人签字"
:
{
"签字"
:
self
.
item
,
"日期"
:
self
.
item
,
},
"共借人签字"
:
{
"签字"
:
self
.
item
,
"日期"
:
self
.
item
,
},
"保证人1签字"
:
{
"签字"
:
self
.
item
,
"日期"
:
self
.
item
,
},
"保证人2签字"
:
{
"签字"
:
self
.
item
,
"日期"
:
self
.
item
,
},
"见证人签字"
:
{
"签字"
:
self
.
item
,
"日期"
:
self
.
item
,
},
}
def
get_top_iou
(
self
,
poly
,
ocr_result
):
"""传入一个多边形, 找到与之最匹配的多边形
...
...
@@ -398,6 +419,39 @@ class Finder:
seller
[
'words'
]
=
text
return
seller
def
get_cldls
(
self
):
seller
=
self
.
item
.
copy
()
# 先找到 key
anchor_bbox
=
None
for
block
in
self
.
pdf_info
[
'2'
][
'blocks'
]:
if
anchor_bbox
is
not
None
:
break
if
block
[
'type'
]
!=
0
:
continue
for
line
in
block
[
'lines'
]:
if
anchor_bbox
is
not
None
:
break
for
span
in
line
[
'spans'
]:
bbox
,
text
=
span
[
'bbox'
],
span
[
'text'
]
if
text
.
strip
()
==
'车辆代理商'
:
anchor_bbox
=
bbox
# print(anchor_bbox)
# 当找到了 key, 则根据 key 去匹配 value
if
anchor_bbox
:
half_width
=
self
.
pdf_info
[
'2'
][
'width'
]
*
0.5
for
block
in
self
.
pdf_info
[
'2'
][
'blocks'
]:
if
block
[
'type'
]
!=
0
:
continue
for
line
in
block
[
'lines'
]:
for
span
in
line
[
'spans'
]:
bbox
,
text
=
span
[
'bbox'
],
span
[
'text'
]
if
anchor_bbox
[
2
]
<
np
.
mean
(
bbox
[::
2
])
<
half_width
and
\
anchor_bbox
[
1
]
<
np
.
mean
(
bbox
[
1
::
2
])
<
anchor_bbox
[
3
]:
seller
[
'position'
]
=
bbox
seller
[
'words'
]
=
text
return
seller
return
seller
def
get_borrower_collection_account
(
self
):
account
=
self
.
item
.
copy
()
account_name
=
self
.
item
.
copy
()
...
...
@@ -885,6 +939,9 @@ class Finder:
repayment_schedule_table
=
self
.
get_repayment_schedule
()
# print(repayment_schedule_table)
self
.
init_result
[
'page_3'
][
'还款计划表'
]
=
repayment_schedule_table
# 车辆代理商
cldls
=
self
.
get_cldls
()
self
.
init_result
[
'page_3'
][
'车辆代理商'
]
=
cldls
#######################################
# Page 4
# 找合同编号
...
...
@@ -907,6 +964,8 @@ class Finder:
contract_no
=
self
.
get_contract_no
(
page_num
=
'5'
)
# print(contract_no)
self
.
init_result
[
'page_6'
][
'合同编号'
]
=
contract_no
if
self
.
is_asp
:
# Page 7
# 找合同编号
contract_no
=
self
.
get_contract_no
(
page_num
=
'6'
)
...
...
@@ -944,6 +1003,41 @@ class Finder:
# print(signature_name, signature_date)
self
.
init_result
[
'page_8'
][
'见证人签字'
][
'签字'
]
=
signature_name
self
.
init_result
[
'page_8'
][
'见证人签字'
][
'日期'
]
=
signature_date
else
:
# Page 7
# 找合同编号
contract_no
=
self
.
get_contract_no
(
page_num
=
'6'
)
self
.
init_result
[
'page_7'
][
'合同编号'
]
=
contract_no
signature_name
,
signature_date
=
self
.
get_last_page_signature
(
page_num
=
'6'
,
top
=
'合同编号'
,
bottom
=
'共同借款人'
)
if
signature_name
[
'words'
]
==
None
:
signature_name
,
signature_date
=
self
.
get_last_page_signature
(
page_num
=
'6'
,
top
=
'合同编号'
,
bottom
=
'共同借款人(抵押人)'
)
# print(signature_name, signature_date)
self
.
init_result
[
'page_7'
][
'主借人签字'
][
'签字'
]
=
signature_name
self
.
init_result
[
'page_7'
][
'主借人签字'
][
'日期'
]
=
signature_date
signature_name
,
signature_date
=
self
.
get_last_page_signature
(
page_num
=
'6'
,
top
=
'共同借款人'
,
bottom
=
'保证人1'
)
if
signature_name
[
'words'
]
==
None
:
signature_name
,
signature_date
=
self
.
get_last_page_signature
(
page_num
=
'6'
,
top
=
'共同借款人(抵押人)'
,
bottom
=
'保证人1'
)
# print(signature_name, signature_date)
self
.
init_result
[
'page_7'
][
'共借人签字'
][
'签字'
]
=
signature_name
self
.
init_result
[
'page_7'
][
'共借人签字'
][
'日期'
]
=
signature_date
signature_name
,
signature_date
=
self
.
get_last_page_signature
(
page_num
=
'6'
,
top
=
'保证人1'
,
bottom
=
'保证人2'
)
self
.
init_result
[
'page_7'
][
'保证人1签字'
][
'签字'
]
=
signature_name
self
.
init_result
[
'page_7'
][
'保证人1签字'
][
'日期'
]
=
signature_date
signature_name
,
signature_date
=
self
.
get_last_page_signature
(
page_num
=
'6'
,
top
=
'保证人2'
,
bottom
=
'在本人面前亲笔签署本合同'
)
self
.
init_result
[
'page_7'
][
'保证人2签字'
][
'签字'
]
=
signature_name
self
.
init_result
[
'page_7'
][
'保证人2签字'
][
'日期'
]
=
signature_date
signature_name
,
signature_date
=
self
.
get_last_page_signature
(
page_num
=
'6'
,
top
=
'在本人面前亲笔签署本合同'
,
bottom
=
'以下无正文'
)
# print(signature_name, signature_date)
self
.
init_result
[
'page_7'
][
'见证人签字'
][
'签字'
]
=
signature_name
self
.
init_result
[
'page_7'
][
'见证人签字'
][
'日期'
]
=
signature_date
# 重新定制输出
new_results
=
{
"is_asp"
:
self
.
is_asp
,
...
...
src/common/electronic_hil_contract/get_char_fsm.py
View file @
b9e0884
...
...
@@ -51,6 +51,7 @@ class Finder:
"合同编号(正文)"
:
self
.
item
,
"车辆识别代码"
:
self
.
item
,
"车辆卖方(经销商)"
:
self
.
item
,
"车辆代理商"
:
self
.
item
,
"车辆原始销售价格(《机动车销售统一发票》所列金额)"
:
self
.
item
,
"车辆附加产品明细表"
:
self
.
item
,
"融资成本总额"
:
self
.
item
,
...
...
@@ -696,11 +697,13 @@ class Finder:
signature_name
[
'position'
]
=
bbox
return
signature_name
,
signature_date
def
get_electronic_signature
(
self
,
top
,
bottom
):
def
get_electronic_signature
(
self
,
top
,
bottom
,
t_pno
=
None
):
signature
=
self
.
item
.
copy
()
anchor_top
=
None
anchor_bottom
=
None
for
pno
in
self
.
pdf_info
:
if
t_pno
is
not
None
and
pno
!=
t_pno
:
continue
for
block
in
self
.
pdf_info
[
pno
][
'blocks'
]:
if
block
[
'type'
]
!=
0
:
continue
...
...
@@ -709,10 +712,15 @@ class Finder:
bbox
,
text
=
span
[
'bbox'
],
span
[
'text'
]
if
top
in
text
:
anchor_top
=
bbox
[
1
]
if
bottom
in
text
:
elif
bottom
in
text
and
anchor_top
is
not
None
and
bbox
[
3
]
>
anchor_top
:
anchor_bottom
=
bbox
[
3
]
if
anchor_top
is
not
None
and
anchor_bottom
is
not
None
:
# print('in')
# print(anchor_top)
# print(anchor_bottom)
for
pno
in
self
.
pdf_info
:
if
t_pno
is
not
None
and
pno
!=
t_pno
:
continue
for
block
in
self
.
pdf_info
[
pno
][
'blocks'
]:
if
block
[
'type'
]
!=
0
:
continue
...
...
@@ -1005,7 +1013,7 @@ class Finder:
words
=
text
.
split
(
':'
)[
-
1
]
_id
[
'position'
]
=
bbox
_id
[
'page'
]
=
pno
_id
[
'words'
]
=
words
_id
[
'words'
]
=
words
.
strip
()
return
name
,
_id
def
get_key_value_position
(
self
,
key
):
...
...
@@ -1243,6 +1251,9 @@ class Finder:
if
seller
[
'words'
]
==
None
:
seller
=
self
.
get_key_value
(
key
=
'车辆卖方:'
)
self
.
init_result
[
'车辆卖方(经销商)'
]
=
seller
# 找到车辆代理商
cldls
=
self
.
get_key_value
(
key
=
'车辆代理商'
,
page_num
=
'4'
)
self
.
init_result
[
'车辆代理商'
]
=
cldls
# 找到 —— 车辆原始销售价格
vehicle_price
=
self
.
get_key_value
(
key
=
'车辆原始销售价格(《机动车销售统一发票》所列金额):'
)
self
.
init_result
[
'车辆原始销售价格(《机动车销售统一发票》所列金额)'
]
=
vehicle_price
...
...
@@ -1264,7 +1275,7 @@ class Finder:
account
=
self
.
get_key_value
(
key
=
'银行账号:'
,
page_num
=
'4'
)
self
.
init_result
[
'收款银行账户-银行账号'
]
=
account
bank
=
self
.
get_key_value
(
key
=
'开户银行:'
,
page_num
=
'4'
)
self
.
init_result
[
'
承租人收款
账户-开户行'
]
=
bank
self
.
init_result
[
'
收款银行
账户-开户行'
]
=
bank
# 找承租人扣款账户户名、银行账号、银行
name
=
self
.
get_key_value
(
key
=
'户名:'
,
page_num
=
'5'
)
self
.
init_result
[
'银行账户-户名'
]
=
name
...
...
@@ -1277,65 +1288,65 @@ class Finder:
# 承租人姓名、签章
if
is_cdfl
==
False
:
name
=
self
.
get_key_value
(
key
=
'承租人姓名:'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'承租人姓名:'
,
bottom
=
'保证人1姓名:'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'承租人姓名:'
,
bottom
=
'保证人1姓名:'
,
t_pno
=
'5'
)
if
name
[
"words"
]
==
None
:
name
=
self
.
get_key_value
(
key
=
'承租人一姓名:'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'承租人一姓名:'
,
bottom
=
'共同承租人名称:'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'承租人一姓名:'
,
bottom
=
'共同承租人名称:'
,
t_pno
=
'5'
)
self
.
init_result
[
'签字页-承租人姓名'
]
=
name
self
.
init_result
[
'签字页-承租人签章'
]
=
electronic_signature
# 保证人1姓名、签章
name
=
self
.
get_key_value
(
key
=
'保证人1姓名:'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'保证人1姓名:'
,
bottom
=
'保证人2姓名:'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'保证人1姓名:'
,
bottom
=
'保证人2姓名:'
,
t_pno
=
'5'
)
self
.
init_result
[
'签字页-保证人1姓名'
]
=
name
self
.
init_result
[
'签字页-保证人1签章'
]
=
electronic_signature
# 这里用的是 name["words"] == ""
if
name
[
"words"
]
==
""
:
name
=
self
.
get_key_value
(
key
=
'共同承租人名称:'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'共同承租人名称:'
,
bottom
=
'保证人1姓名:'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'共同承租人名称:'
,
bottom
=
'保证人1姓名:'
,
t_pno
=
'5'
)
self
.
init_result
[
'签字页-共同承租人姓名'
]
=
name
self
.
init_result
[
'签字页-共同承租人签章'
]
=
electronic_signature
# 保证人2姓名、签章
name
=
self
.
get_key_value
(
key
=
'保证人2姓名:'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'保证人2姓名:'
,
bottom
=
'保证人3姓名:'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'保证人2姓名:'
,
bottom
=
'保证人3姓名:'
,
t_pno
=
'5'
)
self
.
init_result
[
'签字页-保证人2姓名'
]
=
name
self
.
init_result
[
'签字页-保证人2签章'
]
=
electronic_signature
# if判断条件对应3_3版本
if
name
[
"words"
]
==
""
:
name
=
self
.
get_key_value
(
key
=
'保证人1姓名:'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'保证人1姓名:'
,
bottom
=
'保证人2姓名:'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'保证人1姓名:'
,
bottom
=
'保证人2姓名:'
,
t_pno
=
'5'
)
self
.
init_result
[
'签字页-保证人1姓名'
]
=
name
self
.
init_result
[
'签字页-保证人1签章'
]
=
electronic_signature
# 保证人3姓名、签章
name
=
self
.
get_key_value
(
key
=
'保证人3姓名:'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'保证人3姓名:'
,
bottom
=
'日期:'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'保证人3姓名:'
,
bottom
=
'日期:'
,
t_pno
=
'5'
)
self
.
init_result
[
'签字页-保证人3姓名'
]
=
name
self
.
init_result
[
'签字页-保证人3签章'
]
=
electronic_signature
# if判断条件对应3_3版本
if
name
[
"words"
]
==
None
:
name
=
self
.
get_key_value
(
key
=
'保证人2姓名:'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'保证人2姓名:'
,
bottom
=
'日期:'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'保证人2姓名:'
,
bottom
=
'日期:'
,
t_pno
=
'5'
)
self
.
init_result
[
'签字页-保证人2姓名'
]
=
name
self
.
init_result
[
'签字页-保证人2签章'
]
=
electronic_signature
else
:
name
=
self
.
get_key_value
(
key
=
'承租人一姓名:'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'承租人一姓名:'
,
bottom
=
'共同承租人名称:'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'承租人一姓名:'
,
bottom
=
'共同承租人名称:'
,
t_pno
=
'5'
)
self
.
init_result
[
'签字页-承租人姓名'
]
=
name
self
.
init_result
[
'签字页-承租人签章'
]
=
electronic_signature
name
=
self
.
get_key_value
(
key
=
'共同承租人名称:'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'共同承租人名称:'
,
bottom
=
'保证人1姓名:'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'共同承租人名称:'
,
bottom
=
'保证人1姓名:'
,
t_pno
=
'5'
)
self
.
init_result
[
'签字页-共同承租人姓名'
]
=
name
self
.
init_result
[
'签字页-共同承租人签章'
]
=
electronic_signature
name
=
self
.
get_key_value
(
key
=
'保证人1姓名:'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'保证人1姓名:'
,
bottom
=
'保证人2姓名:'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'保证人1姓名:'
,
bottom
=
'保证人2姓名:'
,
t_pno
=
'5'
)
self
.
init_result
[
'签字页-保证人1姓名'
]
=
name
self
.
init_result
[
'签字页-保证人1签章'
]
=
electronic_signature
name
=
self
.
get_key_value
(
key
=
'保证人2姓名:'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'保证人2姓名:'
,
bottom
=
'保证人3姓名:'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'保证人2姓名:'
,
bottom
=
'保证人3姓名:'
,
t_pno
=
'5'
)
self
.
init_result
[
'签字页-保证人2姓名'
]
=
name
self
.
init_result
[
'签字页-保证人2签章'
]
=
electronic_signature
...
...
@@ -1404,12 +1415,12 @@ class Finder:
self
.
init_result_2
[
'融资租赁期限'
]
=
lease_term
# 签字页抵押人姓名和签章
name
=
self
.
get_key_value
(
key
=
'抵押人姓名:'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'抵押权人盖章'
,
bottom
=
'抵押人配偶姓名:'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'抵押权人盖章'
,
bottom
=
'抵押人配偶姓名:'
,
t_pno
=
'1'
)
self
.
init_result_2
[
'签字页-抵押人姓名'
]
=
name
self
.
init_result_2
[
'签字页-抵押人签章'
]
=
electronic_signature
# 签字页抵押人配偶姓名和签章
name
=
self
.
get_key_value
(
key
=
'抵押人配偶姓名:'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'抵押人配偶姓名:'
,
bottom
=
'日期'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'抵押人配偶姓名:'
,
bottom
=
'日期'
,
t_pno
=
'1'
)
self
.
init_result_2
[
'签字页-抵押人配偶姓名'
]
=
name
self
.
init_result_2
[
'签字页-抵押人配偶签章'
]
=
electronic_signature
return
self
.
init_result_2
\ No newline at end of file
...
...
src/common/fsm_econtract/retriever.py
View file @
b9e0884
...
...
@@ -6,6 +6,7 @@ class HMHRetriever:
def
__init__
(
self
):
self
.
words_str
=
'words'
self
.
position_str
=
'location'
self
.
fix_hava_str
=
'有'
self
.
default_position
=
[
0
,
0
,
0
,
0
]
self
.
search_fields_list
=
[
(
'借款/承租人姓名'
,
''
),
...
...
@@ -51,7 +52,8 @@ class HMHRetriever:
for
name_date_tuple
in
name_date_list
:
if
len
(
name_date_tuple
)
==
2
:
result
[
self
.
search_fields_list
[
4
][
0
]]
=
{
self
.
words_str
:
'{0} {1}'
.
format
(
name_date_tuple
[
0
]
.
replace
(
'
\u3000
'
,
''
)
.
strip
(),
name_date_tuple
[
1
]),
# self.words_str: '{0} {1}'.format(name_date_tuple[0].replace('\u3000', '').strip(), name_date_tuple[1]),
self
.
words_str
:
self
.
fix_hava_str
,
self
.
position_str
:
bbox
}
is_find_name_date
=
True
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment