Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
a473474e
authored
2021-04-25 11:33:15 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
fix registeredCapital
1 parent
ad575f50
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
125 additions
and
26 deletions
src/common/tools/comparison.py
src/common/tools/rmb_lower.py
src/common/tools/comparison.py
View file @
a473474
import
re
from
datetime
import
datetime
from
.rmb_
upper
import
to_rmb_upp
er
from
.rmb_
lower
import
rmb_handl
er
class
Comparison
:
...
...
@@ -36,26 +36,26 @@ class Comparison:
return
self
.
RESULT_N
def
common_compare
(
self
,
input_str
,
ocr_str
,
idx
,
**
kwargs
):
if
ocr_str
==
''
:
return
self
.
RESULT_NA
,
None
if
not
isinstance
(
input_str
,
str
)
or
not
isinstance
(
ocr_str
,
str
):
if
not
isinstance
(
ocr_str
,
str
)
or
not
isinstance
(
input_str
,
str
):
return
self
.
RESULT_NA
,
ocr_str
if
ocr_str
==
''
or
ocr_str
.
strip
()
==
''
:
return
self
.
RESULT_NA
,
None
return
self
.
build_res
(
input_str
==
ocr_str
),
ocr_str
def
company_compare
(
self
,
input_str
,
ocr_str
,
idx
,
**
kwargs
):
if
ocr_str
==
''
:
return
self
.
RESULT_NA
,
None
if
not
isinstance
(
input_str
,
str
)
or
not
isinstance
(
ocr_str
,
str
):
if
not
isinstance
(
ocr_str
,
str
)
or
not
isinstance
(
input_str
,
str
):
return
self
.
RESULT_NA
,
ocr_str
if
ocr_str
==
''
or
ocr_str
.
strip
()
==
''
:
return
self
.
RESULT_NA
,
None
input_tmp
=
re
.
sub
(
self
.
re_obj
,
''
,
input_str
)
.
strip
()
ocr_tmp
=
re
.
sub
(
self
.
re_obj
,
''
,
ocr_str
)
.
strip
()
return
self
.
build_res
(
input_tmp
==
ocr_tmp
),
ocr_str
def
name_compare
(
self
,
input_str
,
ocr_str
,
idx
,
**
kwargs
):
if
ocr_str
==
''
:
return
self
.
RESULT_NA
,
None
if
not
isinstance
(
input_str
,
str
)
or
not
isinstance
(
ocr_str
,
str
):
if
not
isinstance
(
ocr_str
,
str
)
or
not
isinstance
(
input_str
,
str
):
return
self
.
RESULT_NA
,
ocr_str
if
ocr_str
==
''
or
ocr_str
.
strip
()
==
''
:
return
self
.
RESULT_NA
,
None
if
kwargs
.
get
(
'is_passport'
):
input_tmp
=
input_str
.
upper
()
.
replace
(
' '
,
''
)
ocr_tmp
=
ocr_str
.
upper
()
.
replace
(
' '
,
''
)
...
...
@@ -71,15 +71,16 @@ class Comparison:
return
self
.
build_res
(
input_s
==
ocr_s
),
ocr_str
def
date_compare
(
self
,
input_str
,
ocr_str
,
idx
,
**
kwargs
):
if
ocr_str
==
''
:
return
self
.
RESULT_NA
,
None
if
not
isinstance
(
input_str
,
str
)
or
not
isinstance
(
ocr_str
,
str
):
if
not
isinstance
(
ocr_str
,
str
)
or
not
isinstance
(
input_str
,
str
):
return
self
.
RESULT_NA
,
ocr_str
if
kwargs
.
get
(
'long'
,
False
)
and
'长期'
in
ocr_str
:
if
input_str
==
'2099-12-31'
or
input_str
==
'2099-01-01'
:
return
self
.
RESULT_Y
,
'2099-12-31'
else
:
return
self
.
RESULT_N
,
'2099-12-31'
if
ocr_str
==
''
or
ocr_str
.
strip
()
==
''
:
return
self
.
RESULT_NA
,
None
if
kwargs
.
get
(
'long'
,
False
):
if
'长期'
in
ocr_str
or
'永久'
in
ocr_str
:
if
input_str
==
'2099-12-31'
or
input_str
==
'2099-01-01'
:
return
self
.
RESULT_Y
,
'2099-12-31'
else
:
return
self
.
RESULT_N
,
'2099-12-31'
if
kwargs
.
get
(
'ocr_split'
,
False
):
if
'至'
in
ocr_str
:
ocr_str
=
ocr_str
.
split
(
'至'
)[
-
1
]
...
...
@@ -102,26 +103,26 @@ class Comparison:
return
self
.
build_res
(
input_str
==
ocr_str
),
ocr_output
def
rmb_compare
(
self
,
input_str
,
ocr_str
,
idx
,
**
kwargs
):
if
not
isinstance
(
input_str
,
str
)
or
not
isinstance
(
ocr
_str
,
str
):
if
not
isinstance
(
ocr_str
,
str
)
or
not
isinstance
(
input
_str
,
str
):
return
self
.
RESULT_NA
,
None
if
ocr_str
==
''
:
if
ocr_str
==
''
or
ocr_str
.
strip
()
==
''
:
return
self
.
RESULT_NA
,
None
try
:
input_rmb_upper
=
to_rmb_upper
(
float
(
input_str
)
)
res
=
self
.
build_res
(
input_rmb_upper
==
ocr_st
r
)
ocr_lower
=
rmb_handler
.
to_rmb_lower
(
)
res
=
self
.
build_res
(
float
(
input_str
)
==
ocr_lowe
r
)
except
Exception
as
e
:
return
self
.
RESULT_N
,
None
else
:
if
res
==
self
.
RESULT_Y
:
return
res
,
input_str
else
:
return
res
,
None
return
res
,
ocr_lower
def
type_compare
(
self
,
input_str
,
ocr_str
,
idx
,
**
kwargs
):
if
ocr_str
==
''
:
return
self
.
RESULT_NA
,
None
if
not
isinstance
(
input_str
,
str
)
or
not
isinstance
(
ocr_str
,
str
):
if
not
isinstance
(
ocr_str
,
str
)
or
not
isinstance
(
input_str
,
str
):
return
self
.
RESULT_NA
,
ocr_str
if
ocr_str
==
''
or
ocr_str
.
strip
()
==
''
:
return
self
.
RESULT_NA
,
None
for
map_tuple
in
self
.
TYPE_MAPPING
:
if
re
.
search
(
map_tuple
[
0
],
ocr_str
)
is
not
None
:
compare_str
=
map_tuple
[
1
]
...
...
src/common/tools/rmb_lower.py
0 → 100644
View file @
a473474
import
re
class
RMBHandler
:
def
__init__
(
self
):
self
.
num_mapping
=
{
'零'
:
0
,
'壹'
:
1
,
'贰'
:
2
,
'叁'
:
3
,
'肆'
:
4
,
'伍'
:
5
,
'陆'
:
6
,
'柒'
:
7
,
'捌'
:
8
,
'玖'
:
9
}
self
.
unit_mapping
=
{
'厘'
:
(
0
,
0.001
),
'分'
:
(
1
,
0.01
),
'角'
:
(
2
,
0.1
),
'圆'
:
(
3
,
1
),
'拾'
:
(
4
,
10
),
'佰'
:
(
5
,
100
),
'仟'
:
(
6
,
1000
),
'万'
:
(
7
,
10000
),
'亿'
:
(
8
,
100000000
)
}
def
upper_to_lower
(
self
,
price
):
result
=
0
last_unit_idx
=
0
num
=
0
for
idx
,
c
in
enumerate
(
price
):
if
c
in
self
.
num_mapping
:
num
=
self
.
num_mapping
.
get
(
c
)
else
:
if
idx
==
0
:
num
=
1
unit_idx
,
unit
=
self
.
unit_mapping
.
get
(
c
,
(
0
,
0
))
if
unit_idx
>
last_unit_idx
:
result
=
(
result
+
num
)
*
unit
else
:
result
=
result
+
(
num
*
unit
)
last_unit_idx
=
unit_idx
num
=
0
return
result
def
pre_process
(
self
,
price
,
upper
=
True
):
if
upper
:
for
idx
,
c
in
enumerate
(
price
):
if
c
in
self
.
num_mapping
or
c
in
self
.
unit_mapping
:
head
=
idx
break
else
:
return
None
for
idx
in
range
(
len
(
price
)
-
1
,
-
1
,
-
1
):
if
price
[
idx
]
in
self
.
num_mapping
or
price
[
idx
]
in
self
.
unit_mapping
:
tail
=
idx
+
1
break
else
:
return
None
return
price
[
head
:
tail
]
else
:
pass
def
to_rmb_lower
(
self
,
price
):
try
:
if
re
.
search
(
r'[\d]'
,
price
)
is
None
:
price
=
self
.
pre_process
(
price
)
if
not
price
:
return
None
result
=
self
.
upper_to_lower
(
price
)
if
result
is
not
None
:
result
=
float
(
round
(
result
,
3
))
return
result
else
:
re_obj
=
re
.
search
(
r'(\d+\.?\d*)([万亿]?)'
,
price
)
digit
=
float
(
re_obj
.
group
(
1
))
unit
=
re_obj
.
group
(
2
)
if
unit
in
self
.
unit_mapping
:
digit
=
digit
*
self
.
unit_mapping
[
unit
][
1
]
return
digit
except
Exception
as
e
:
return
None
rmb_handler
=
RMBHandler
()
if
__name__
==
'__main__'
:
test_2
=
[
'壹万伍仟肆佰壹拾圆叁角伍分肆厘'
,
'捌万陆仟肆佰壹拾圆整'
,
'壹万伍仟肆佰壹拾元贰角捌分肆厘'
,
'拾壹亿壹仟万伍仟肆佰壹拾元贰角捌分肆厘'
,
'拾伍万圆'
]
test_1
=
[
'sfdds'
,
'柒佰玖拾万元整'
,
'100万元整'
,
'人民币伍佰万圆整'
,
'人民币壹仟万元'
,
'100万元'
,
'贰佰壹拾捌万圆整'
,
'(人民币)壹仟万元'
,
'壹佰壹拾万圆整'
,
'人民币30.0000万元整'
,
'伍拾万元人民币'
]
input_list
=
test_1
for
i
in
input_list
:
print
(
'{0}={1}'
.
format
(
i
,
rmb_handler
.
to_rmb_lower
(
i
)))
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment