Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
b198d2cf
authored
2021-04-13 12:40:45 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
fix bug
1 parent
aeefa795
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
80 additions
and
66 deletions
src/apps/doc/consts.py
src/apps/doc/ocr/gcap.py
src/celery_compare/tasks.py
src/common/tools/comparison.py
src/apps/doc/consts.py
View file @
b198d2c
...
...
@@ -1044,36 +1044,36 @@ COMPARE_FIELDS = (IC_OCR_FIELD, RP_OCR_FIELD, BL_OCR_FIELD, EEP_OCR_FIELD, DL_OC
# 身份证
ITPRC
=
[
(
'customerChineseName'
,
'姓名'
,
'name_compare'
,
{}),
(
'idNum'
,
'公民身份号码'
,
'common_compare'
,
{}),
(
'customerChineseName'
,
'姓名'
,
'name_compare'
,
{}
,
'customerChineseNameResult'
),
(
'idNum'
,
'公民身份号码'
,
'common_compare'
,
{}
,
'idNumResult'
),
# 20200410-20250410 OCR识别为长期,向GCAP发送:2099-12-31 00:00:00.0
(
'idExpiryDate'
,
'有效期限'
,
'date_compare'
,
{
'long'
:
True
,
'ocr_split'
:
True
,
'input_replace'
:
''
}),
(
'dateOfBirth'
,
'出生年月'
,
'date_compare'
,
{
'input_replace'
:
''
}),
# 20231023
(
'idExpiryDate'
,
'有效期限'
,
'date_compare'
,
{
'long'
:
True
,
'ocr_split'
:
True
,
'input_replace'
:
''
}
,
'idExpiryDateResult'
),
(
'dateOfBirth'
,
'出生年月'
,
'date_compare'
,
{
'input_replace'
:
''
}
,
'dateOfBirthResult'
),
# 20231023
]
# 护照
ITPSP
=
[
(
'customerChineseName'
,
'英文姓名'
,
'name_compare'
,
{
'is_passport'
:
True
}),
(
'idNum'
,
'护照号码'
,
'common_compare'
,
{}),
(
'idExpiryDate'
,
'有效期至'
,
'date_compare'
,
{
'input_replace'
:
''
}),
# 20250410
(
'dateOfBirth'
,
'出生日期'
,
'date_compare'
,
{
'input_replace'
:
''
}),
# 20250410
(
'customerChineseName'
,
'英文姓名'
,
'name_compare'
,
{
'is_passport'
:
True
}
,
'customerChineseNameResult'
),
(
'idNum'
,
'护照号码'
,
'common_compare'
,
{}
,
'idNumResult'
),
(
'idExpiryDate'
,
'有效期至'
,
'date_compare'
,
{
'input_replace'
:
''
}
,
'idExpiryDateResult'
),
# 20250410
(
'dateOfBirth'
,
'出生日期'
,
'date_compare'
,
{
'input_replace'
:
''
}
,
'dateOfBirthResult'
),
# 20250410
]
# 港澳台通行证
ITHKM_ITTID
=
[
(
'customerChineseName'
,
'中文名'
,
'common_compare'
,
{}),
(
'idNum'
,
'证件号码'
,
'common_compare'
,
{}),
(
'idExpiryDate'
,
'有效期限'
,
'date_compare'
,
{
'ocr_split'
:
True
,
'input_replace'
:
'.'
}),
# 2013.10.24-2023.10.23
(
'dateOfBirth'
,
'出生日期'
,
'date_compare'
,
{
'input_replace'
:
'.'
}),
# 2023.10.23
(
'customerChineseName'
,
'中文名'
,
'common_compare'
,
{}
,
'customerChineseNameResult'
),
(
'idNum'
,
'证件号码'
,
'common_compare'
,
{}
,
'idNumResult'
),
(
'idExpiryDate'
,
'有效期限'
,
'date_compare'
,
{
'ocr_split'
:
True
,
'input_replace'
:
'.'
}
,
'idExpiryDateResult'
),
# 2013.10.24-2023.10.23
(
'dateOfBirth'
,
'出生日期'
,
'date_compare'
,
{
'input_replace'
:
'.'
}
,
'dateOfBirthResult'
),
# 2023.10.23
]
# 居住证
ITRES
=
[
(
'customerChineseName'
,
'姓名'
,
'common_compare'
,
{}),
(
'idNum'
,
'公民身份号码'
,
'common_compare'
,
{}),
(
'secondIdNum'
,
'通行证号码'
,
'common_compare'
,
{}),
(
'idExpiryDate'
,
'有效期限'
,
'date_compare'
,
{
'ocr_split'
:
True
,
'input_replace'
:
''
}),
# 20200410-20250410
(
'dateOfBirth'
,
'出生年月'
,
'date_compare'
,
{
'input_replace'
:
''
}),
# 20231023
(
'customerChineseName'
,
'姓名'
,
'common_compare'
,
{}
,
'customerChineseName'
),
(
'idNum'
,
'公民身份号码'
,
'common_compare'
,
{}
,
'idNum'
),
(
'secondIdNum'
,
'通行证号码'
,
'common_compare'
,
{}
,
'secondIdNum'
),
(
'idExpiryDate'
,
'有效期限'
,
'date_compare'
,
{
'ocr_split'
:
True
,
'input_replace'
:
''
}
,
'idExpiryDate'
),
# 20200410-20250410
(
'dateOfBirth'
,
'出生年月'
,
'date_compare'
,
{
'input_replace'
:
''
}
,
'dateOfBirth'
),
# 20231023
]
ID_TYPE_COMPARE
=
{
...
...
@@ -1090,33 +1090,33 @@ ID_TYPE_COMPARE = {
# c)两中比对 均不一致:N,向GCAP发送:车辆登记证 & 行驶证识别结果
# 车辆登记证
PCUSD_MVC
=
[
(
'vinNo'
,
'9.车辆识别代号/车架号'
,
'common_compare'
,
{}),
(
'manufactureDate'
,
'32.车辆出厂日期'
,
'common_compare'
,
{}),
(
'firstRegistrationDate'
,
'3.登记日期'
,
'common_compare'
,
{}),
(
'vinNo'
,
'9.车辆识别代号/车架号'
,
'common_compare'
,
{}
,
'vinNoResult'
),
(
'manufactureDate'
,
'32.车辆出厂日期'
,
'common_compare'
,
{}
,
'manufactureDateResult'
),
(
'firstRegistrationDate'
,
'3.登记日期'
,
'common_compare'
,
{}
,
'firstRegistrationDateResult'
),
]
# 行驶证
PCUSD_DL
=
[
(
'vinNo'
,
'车辆识别代码'
,
'common_compare'
,
{}),
(
'vinNo'
,
'车辆识别代码'
,
'common_compare'
,
{}
,
'vinNoResult'
),
]
# 营业执照
TCCOR
=
[
(
'customerChineseName'
,
'企业名称'
,
'common_compare'
,
{}),
(
'legalRepName'
,
'经营者姓名'
,
'name_compare'
,
{}),
(
'idNum'
,
'注册号'
,
'common_compare'
,
{}),
(
'businessLicenseNo'
,
'注册号'
,
'common_compare'
,
{}),
(
'taxRegistrationCode'
,
'注册号'
,
'common_compare'
,
{}),
(
'incorporationDate'
,
'成立日期'
,
'date_compare'
,
{
'ocr_replace'
:
True
}),
# 2017年07月11日
(
'customerChineseName'
,
'企业名称'
,
'common_compare'
,
{}
,
'customerChinessNameResult'
),
(
'legalRepName'
,
'经营者姓名'
,
'name_compare'
,
{}
,
'legalRepNameResult'
),
(
'idNum'
,
'注册号'
,
'common_compare'
,
{}
,
'idNumResult'
),
(
'businessLicenseNo'
,
'注册号'
,
'common_compare'
,
{}
,
'businessLicenseNoResult'
),
(
'taxRegistrationCode'
,
'注册号'
,
'common_compare'
,
{}
,
'taxRegistrationCodeResult'
),
(
'incorporationDate'
,
'成立日期'
,
'date_compare'
,
{
'ocr_replace'
:
True
}
,
'incorporationDateResult'
),
# 2017年07月11日
# 2017年07月11日至长期 1. OCR识别为长期,向GCAP发送:2099-12-31 00:00:00.0
(
'businessLicenseDueDate'
,
'营业期限'
,
'date_compare'
,
{
'long'
:
True
,
'ocr_replace'
:
True
}),
(
'capitalRegAmount'
,
'注册资本'
,
'rmb_compare'
,
{}),
# 壹拾万元整 将OCR识别结果(人民币大写)转化为数字
(
'businessLicenseDueDate'
,
'营业期限'
,
'date_compare'
,
{
'long'
:
True
,
'ocr_replace'
:
True
}
,
'businessLicenseDueDateResult'
),
(
'capitalRegAmount'
,
'注册资本'
,
'rmb_compare'
,
{}
,
'capitalRegAmountResult'
),
# 壹拾万元整 将OCR识别结果(人民币大写)转化为数字
]
TCSEP
=
[
(
'companyName'
,
'企业名称'
,
'common_compare'
,
{}),
(
'registeredCapital'
,
'注册资本'
,
'rmb_compare'
,
{}),
# 壹拾万元整 将OCR识别结果(人民币大写)转化为数字
(
'selfEmployedSubType'
,
'企业类型'
,
'type_compare'
,
{})
# 有限责任公司
(
'companyName'
,
'企业名称'
,
'common_compare'
,
{}
,
'companyNameResult'
),
(
'registeredCapital'
,
'注册资本'
,
'rmb_compare'
,
{}
,
'registeredCapitalResult'
),
# 壹拾万元整 将OCR识别结果(人民币大写)转化为数字
(
'selfEmployedSubType'
,
'企业类型'
,
'type_compare'
,
{}
,
'selfEmployedSubTypeResult'
)
# 有限责任公司
]
# 1. POS数据OCR识别结果对应关系如下:
...
...
@@ -1131,7 +1131,7 @@ RESULT_NA = 'NA'
IN_ORDER
=
(
'applicantType'
,
'idType'
,
'secondIdType'
,
'customerType'
,
'customerChineseName'
,
'idNum'
,
'secondIdNum'
,
"idExpiryDate"
,
"dateOfBirth"
,
'companyName'
,
"registeredCapital"
,
'selfEmployedSubType'
,)
UC_ORDER
=
(
'vinNo'
,
'manufactureDate'
,
'firstRegistrationDate'
)
CO_ORDER
=
(
'customerChineseName'
,
'legalRepName'
,
'idNum'
,
'businessLicenseNo'
,
'taxRegistrationCode'
,
CO_ORDER
=
(
'customerChineseName'
,
'legalRepName'
,
'idNum'
,
'
customerType'
,
'
businessLicenseNo'
,
'taxRegistrationCode'
,
'incorporationDate'
,
'businessLicenseDueDate'
,
'capitalRegAmount'
)
...
...
src/apps/doc/ocr/gcap.py
View file @
b198d2c
...
...
@@ -25,7 +25,7 @@ class GCAP:
response
=
requests
.
post
(
self
.
url
,
headers
=
self
.
headers
,
data
=
data
,
verify
=
False
,
auth
=
self
.
auth
)
if
response
.
status_code
!=
200
:
raise
GCAPException
(
'GCAP response with code: {0}'
.
format
(
response
.
status_code
))
return
response
.
status_code
return
response
.
text
def
test_send
(
self
):
test_res
=
OrderedDict
({
...
...
src/celery_compare/tasks.py
View file @
b198d2c
...
...
@@ -35,22 +35,25 @@ def field_compare(info_dict, ocr_res_dict, ocr_field, compare_list, res_set):
break
for
idx
,
compare_tuple
in
enumerate
(
compare_list
):
input_str
=
info_dict
.
get
(
compare_tuple
[
0
])
ocr_str
=
ocr_res
.
get
(
compare_tuple
[
1
]
,
''
)
compare_res
=
getattr
(
cp
,
compare_tuple
[
2
])(
info_dict
.
get
(
compare_tuple
[
0
]),
ocr_res
.
get
(
compare_tuple
[
1
]),
**
compare_tuple
[
3
])
ocr_str
=
ocr_res
.
get
(
compare_tuple
[
1
])
compare_res
,
ocr_output
=
getattr
(
cp
,
compare_tuple
[
2
])(
info_dict
.
get
(
compare_tuple
[
0
]),
ocr_res
.
get
(
compare_tuple
[
1
]),
idx
,
**
compare_tuple
[
3
])
if
idx
==
0
and
compare_res
==
consts
.
RESULT_N
:
break
is_find
=
True
info_dict
[
compare_tuple
[
0
]
+
'Result'
]
=
compare_res
info_dict
[
compare_tuple
[
4
]
]
=
compare_res
if
input_str
is
not
None
:
info_dict
[
compare_tuple
[
0
]]
=
ocr_str
if
ocr_str
is
None
or
ocr_output
is
None
:
del
info_dict
[
compare_tuple
[
0
]]
else
:
info_dict
[
compare_tuple
[
0
]]
=
ocr_output
res_set
.
add
(
compare_res
)
if
not
is_find
:
res_set
.
add
(
consts
.
RESULT_N
)
for
compare_tuple
in
compare_list
:
info_dict
[
compare_tuple
[
0
]
+
'Result'
]
=
consts
.
RESULT_NA
info_dict
[
compare_tuple
[
4
]
]
=
consts
.
RESULT_NA
if
compare_tuple
[
0
]
in
info_dict
:
info_dict
[
compare_tuple
[
0
]]
=
''
del
info_dict
[
compare_tuple
[
0
]]
def
usedcar_info_compare
(
info_dict
,
ocr_res_dict
,
ocr_field
,
compare_list
,
res_set
):
...
...
@@ -66,21 +69,21 @@ def usedcar_info_compare(info_dict, ocr_res_dict, ocr_field, compare_list, res_s
input_str
=
info_dict
.
get
(
compare_tuple
[
0
])
ocr_str
=
ocr_res
.
get
(
compare_tuple
[
1
],
''
)
compare_res
=
getattr
(
cp
,
compare_tuple
[
2
])(
info_dict
.
get
(
compare_tuple
[
0
]),
ocr_res
.
get
(
compare_tuple
[
1
]),
**
compare_tuple
[
3
])
info_dict
.
get
(
compare_tuple
[
0
]),
ocr_res
.
get
(
compare_tuple
[
1
]),
idx
,
**
compare_tuple
[
3
])
if
idx
==
0
and
compare_res
==
consts
.
RESULT_N
:
no_match_vino
.
append
(
ocr_str
)
break
is_find
=
True
info_dict
[
compare_tuple
[
0
]
+
'Result'
]
=
compare_res
info_dict
[
compare_tuple
[
4
]
]
=
compare_res
if
input_str
is
not
None
:
info_dict
[
compare_tuple
[
0
]]
=
ocr_str
res_set
.
add
(
compare_res
)
if
not
is_find
:
res_set
.
add
(
consts
.
RESULT_N
)
for
compare_tuple
in
compare_list
:
info_dict
[
compare_tuple
[
0
]
+
'Result'
]
=
consts
.
RESULT_N
info_dict
[
compare_tuple
[
4
]
]
=
consts
.
RESULT_N
if
compare_tuple
[
0
]
in
info_dict
:
info_dict
[
compare_tuple
[
0
]]
=
''
del
info_dict
[
compare_tuple
[
0
]]
return
is_find
,
no_match_vino
...
...
@@ -123,7 +126,7 @@ def compare(application_id, application_entity, uniq_seq, ocr_res_id):
'vehicleStatus'
:
last_obj
.
vehicle_status
,
'wholeResult'
:
'Y'
,
'wholeResultMessage'
:
''
,
'applicationLink'
:
'
'
,
'applicationLink'
:
r'\\china.bmw.corp\WINFS\SF-CN-data\SF3-CN-S\SF3-CN-S-1-New
'
,
}
})
...
...
@@ -165,7 +168,7 @@ def compare(application_id, application_entity, uniq_seq, ocr_res_id):
elif
mvc_find
is
False
and
dl_find
is
True
:
vino
=
mvc_vinos
[
0
]
if
len
(
mvc_vinos
)
>
0
else
''
order_usedcar_info
[
consts
.
PCUSD_MVC
[
0
][
0
]]
=
vino
order_usedcar_info
[
consts
.
PCUSD_MVC
[
0
][
0
]
+
'Result'
]
=
consts
.
RESULT_N
order_usedcar_info
[
consts
.
PCUSD_MVC
[
0
][
4
]
]
=
consts
.
RESULT_N
elif
mvc_find
is
False
and
dl_find
is
False
:
vino_list
=
[]
if
len
(
mvc_vinos
)
>
0
:
...
...
@@ -213,7 +216,7 @@ def compare(application_id, application_entity, uniq_seq, ocr_res_id):
try
:
for
times
in
range
(
consts
.
RETRY_TIMES
):
try
:
gcap
.
send
(
data
)
res_text
=
gcap
.
send
(
data
)
except
Exception
as
e
:
gcap_exc
=
str
(
e
)
else
:
...
...
@@ -225,5 +228,8 @@ def compare(application_id, application_entity, uniq_seq, ocr_res_id):
'[error={5}]'
.
format
(
log_base
,
application_entity
,
application_id
,
uniq_seq
,
ocr_res_id
,
traceback
.
format_exc
()))
else
:
compare_log
.
info
(
'{0} [gcap success] [entity={1}] [id={2}] [uniq_seq={3}] [ocr_res_id={4}] '
'[response={5}]'
.
format
(
log_base
,
application_entity
,
application_id
,
uniq_seq
,
ocr_res_id
,
res_text
))
compare_log
.
info
(
'{0} [task success] [entity={1}] [id={2}] [uniq_seq={3}] [ocr_res_id={4}]'
.
format
(
log_base
,
application_entity
,
application_id
,
uniq_seq
,
ocr_res_id
))
...
...
src/common/tools/comparison.py
View file @
b198d2c
import
re
from
datetime
import
datetime
from
.rmb_upper
import
to_rmb_upper
...
...
@@ -33,14 +34,14 @@ class Comparison:
else
:
return
self
.
RESULT_N
def
common_compare
(
self
,
input_str
,
ocr_str
,
**
kwargs
):
def
common_compare
(
self
,
input_str
,
ocr_str
,
idx
,
**
kwargs
):
if
not
isinstance
(
input_str
,
str
)
or
not
isinstance
(
ocr_str
,
str
):
return
self
.
RESULT_N
return
self
.
build_res
(
input_str
==
ocr_str
)
return
self
.
build_res
(
input_str
==
ocr_str
)
,
ocr_str
def
name_compare
(
self
,
input_str
,
ocr_str
,
**
kwargs
):
def
name_compare
(
self
,
input_str
,
ocr_str
,
idx
,
**
kwargs
):
if
not
isinstance
(
input_str
,
str
)
or
not
isinstance
(
ocr_str
,
str
):
return
self
.
RESULT_N
return
self
.
RESULT_N
,
ocr_str
if
kwargs
.
get
(
'is_passport'
):
input_obj
=
re
.
search
(
r'[a-zA-Z]]!'
,
input_str
)
if
input_obj
:
...
...
@@ -48,39 +49,46 @@ class Comparison:
ocr_obj
=
re
.
search
(
r'[a-zA-Z]]!'
,
ocr_str
)
if
ocr_obj
:
ocr_s
=
ocr_obj
.
group
()
return
self
.
build_res
(
input_s
==
ocr_s
)
return
self
.
RESULT_N
return
self
.
build_res
(
input_s
==
ocr_s
)
,
ocr_str
return
self
.
RESULT_N
,
ocr_str
else
:
return
self
.
build_res
((
input_str
==
ocr_str
))
return
self
.
build_res
((
input_str
==
ocr_str
))
,
ocr_str
else
:
if
re
.
search
(
r'[a-zA-Z]]'
,
input_str
):
return
self
.
RESULT_NA
return
self
.
RESULT_NA
,
ocr_str
input_s
=
input_str
.
translate
(
self
.
TRANS
)
ocr_s
=
ocr_str
.
translate
(
self
.
TRANS
)
return
self
.
build_res
(
input_s
==
ocr_s
)
return
self
.
build_res
(
input_s
==
ocr_s
)
,
ocr_str
def
date_compare
(
self
,
input_str
,
ocr_str
,
**
kwargs
):
def
date_compare
(
self
,
input_str
,
ocr_str
,
idx
,
**
kwargs
):
if
not
isinstance
(
input_str
,
str
)
or
not
isinstance
(
ocr_str
,
str
):
return
self
.
RESULT_N
return
self
.
RESULT_N
,
ocr_str
if
kwargs
.
get
(
'long'
,
False
)
and
'长期'
in
ocr_str
:
return
'2099-12-31'
return
self
.
RESULT_Y
,
'2099-12-31'
if
kwargs
.
get
(
'ocr_split'
,
False
):
ocr_str
=
ocr_str
.
split
(
'-'
)[
-
1
]
if
kwargs
.
get
(
'ocr_replace'
,
False
):
ocr_str
=
ocr_str
.
replace
(
'年'
,
'-'
)
.
replace
(
'月'
,
'-'
)
.
replace
(
'日'
,
''
)
if
kwargs
.
get
(
'input_replace'
)
is
not
None
:
input_str
=
input_str
.
replace
(
'-'
,
kwargs
.
get
(
'input_replace'
))
return
self
.
build_res
(
input_str
==
ocr_str
)
try
:
ocr_output
=
datetime
.
strptime
(
ocr_str
,
'
%
Y{0}
%
m{0}
%
d'
.
format
(
kwargs
.
get
(
'input_replace'
)))
.
strftime
(
'
%
Y-
%
m-
%
d'
)
except
Exception
as
e
:
ocr_output
=
None
else
:
ocr_output
=
ocr_str
return
self
.
build_res
(
input_str
==
ocr_str
),
ocr_output
def
rmb_compare
(
self
,
input_str
,
ocr_str
,
**
kwargs
):
def
rmb_compare
(
self
,
input_str
,
ocr_str
,
idx
,
**
kwargs
):
if
not
isinstance
(
input_str
,
str
)
or
not
isinstance
(
ocr_str
,
str
):
return
self
.
RESULT_N
input_rmb_upper
=
to_rmb_upper
(
float
(
input_str
))
return
self
.
build_res
(
input_rmb_upper
==
ocr_str
)
return
self
.
build_res
(
input_rmb_upper
==
ocr_str
)
,
input_str
def
type_compare
(
self
,
input_str
,
ocr_str
,
**
kwargs
):
def
type_compare
(
self
,
input_str
,
ocr_str
,
idx
,
**
kwargs
):
if
not
isinstance
(
input_str
,
str
)
or
not
isinstance
(
ocr_str
,
str
):
return
self
.
RESULT_N
return
self
.
RESULT_N
,
ocr_str
for
map_tuple
in
self
.
TYPE_MAPPING
:
if
re
.
search
(
map_tuple
[
0
],
ocr_str
)
is
not
None
:
compare_str
=
map_tuple
[
1
]
...
...
@@ -88,7 +96,7 @@ class Comparison:
else
:
compare_str
=
self
.
CSOTH
return
self
.
build_res
(
input_str
==
compare_str
)
return
self
.
build_res
(
input_str
==
compare_str
)
,
ocr_str
cp
=
Comparison
()
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment