Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
1122a082
authored
2022-12-13 15:27:30 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
fix bug
1 parent
784ff18a
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
2 additions
and
22 deletions
src/common/fsm_econtract/retriever.py
src/common/fsm_econtract/retriever.py
View file @
1122a08
...
...
@@ -21,8 +21,7 @@ class HMHRetriever:
for
bbox
,
text
in
pdf_text_list
.
pop
(
str
(
0
),
[]):
# print(text)
if
not
is_find_name_id_company
:
# name_id_company_list = re.findall(r'借款人\(姓名(.*)证件号码(.*)与(.*公司)', text)
name_id_company_list
=
re
.
findall
(
r'承租人\(姓名(.*)证件号码(.*)与(.*公司)'
,
text
)
name_id_company_list
=
re
.
findall
(
r'姓名(.*)证件号码(.*)与(.*公司)'
,
text
)
for
name_id_company_tuple
in
name_id_company_list
:
if
len
(
name_id_company_tuple
)
==
3
:
result
[
self
.
search_fields_list
[
0
][
0
]]
=
{
...
...
@@ -30,26 +29,7 @@ class HMHRetriever:
self
.
position_str
:
bbox
}
result
[
self
.
search_fields_list
[
1
][
0
]]
=
{
self
.
words_str
:
name_id_company_tuple
[
1
]
.
replace
(
'
\u3000
'
,
''
)
.
strip
(),
self
.
position_str
:
bbox
}
result
[
self
.
search_fields_list
[
2
][
0
]]
=
{
self
.
words_str
:
name_id_company_tuple
[
2
],
self
.
position_str
:
bbox
}
is_find_name_id_company
=
True
break
if
not
is_find_name_id_company
:
name_id_company_list
=
re
.
findall
(
r'借款人\(姓名(.*)证件号码(.*)与(.*公司)'
,
text
)
# name_id_company_list = re.findall(r'承租人\(姓名(.*)证件号码(.*)与(.*公司)', text)
for
name_id_company_tuple
in
name_id_company_list
:
if
len
(
name_id_company_tuple
)
==
3
:
result
[
self
.
search_fields_list
[
0
][
0
]]
=
{
self
.
words_str
:
name_id_company_tuple
[
0
]
.
replace
(
'
\u3000
'
,
''
)
.
strip
(),
self
.
position_str
:
bbox
}
result
[
self
.
search_fields_list
[
1
][
0
]]
=
{
self
.
words_str
:
name_id_company_tuple
[
1
]
.
replace
(
'
\u3000
'
,
''
)
.
strip
(),
self
.
words_str
:
name_id_company_tuple
[
1
]
.
replace
(
'
\u3000
'
,
''
)
.
replace
(
')'
,
''
)
.
replace
(
')'
,
''
)
.
strip
(),
self
.
position_str
:
bbox
}
result
[
self
.
search_fields_list
[
2
][
0
]]
=
{
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment