Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
93c7cc0a
authored
2023-07-12 11:14:56 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
KWOM_July
1 parent
b10ff66a
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
16 additions
and
1 deletions
src/common/electronic_afc_contract/afc_contract_ocr.py
src/common/electronic_afc_contract/get_char_fsm.py
src/common/electronic_hil_contract/get_char.py
src/common/electronic_afc_contract/afc_contract_ocr.py
View file @
93c7cc0
...
...
@@ -25,6 +25,15 @@ def extract_info(ocr_results):
def
predict
(
pdf_info
,
is_qrs
=
False
,
is_fsm
=
False
):
pop_seceond_page_info
=
{}
if
not
is_fsm
and
not
is_qrs
and
len
(
pdf_info
)
==
9
:
pop_seceond_page_info
=
pdf_info
.
pop
(
'1'
,
{})
for
pno
in
range
(
8
):
if
pno
==
0
:
pdf_info
[
str
(
pno
)][
'blocks'
]
.
extend
(
pop_seceond_page_info
.
get
(
'blocks'
,
[]))
else
:
pdf_info
[
str
(
pno
)]
=
pdf_info
.
pop
(
str
(
pno
+
1
))
ocr_results
=
{}
for
pno
in
pdf_info
:
ocr_results
[
pno
]
=
{}
...
...
src/common/electronic_afc_contract/get_char_fsm.py
View file @
93c7cc0
...
...
@@ -13,6 +13,7 @@ class Finder:
self
.
item
=
{
"words"
:
None
,
"position"
:
None
,
}
self
.
cn_re
=
re
.
compile
(
u'[
\u4e00
-
\u9fa5
]'
)
def
gen_init_result
(
self
,
is_asp
):
# 格式化算法输出
...
...
@@ -187,6 +188,11 @@ class Finder:
vin
[
'position'
]
=
location
return
vin
def
cn_char_filter
(
self
,
src_str
):
cn_chars
=
re
.
findall
(
self
.
cn_re
,
src_str
)
cn_str
=
''
.
join
(
cn_chars
)
return
cn_str
def
get_loan_principal
(
self
,
page_num
=
'0'
):
chinese_keywords
=
[
'壹'
,
'贰'
,
'叁'
,
'肆'
,
'伍'
,
'陆'
,
'柒'
,
'捌'
,
'玖'
,
'拾'
,
'佰'
,
'仟'
,
'万'
,
'亿'
,
'元'
,
'角'
,
'分'
,
'零'
,
'整'
]
...
...
@@ -201,7 +207,7 @@ class Finder:
for
line
in
block
[
'lines'
]:
for
span
in
line
[
'spans'
]:
bbox
,
text
=
span
[
'bbox'
],
span
[
'text'
]
if
fuzz
.
ratio
(
''
.
join
(
chinese_keywords
),
text
)
>
15
:
if
fuzz
.
ratio
(
''
.
join
(
chinese_keywords
),
self
.
cn_char_filter
(
text
))
>=
10
:
text
=
text
.
split
(
':'
)[
-
1
]
.
strip
()
upper
[
'position'
]
=
bbox
upper
[
'words'
]
=
text
...
...
src/common/electronic_hil_contract/get_char.py
View file @
93c7cc0
This diff is collapsed.
Click to expand it.
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment