Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
89149364
authored
2021-03-30 16:30:10 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
compare update
1 parent
4e022eb3
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
171 additions
and
184 deletions
src/apps/doc/consts.py
src/apps/doc/views.py
src/celery_compare/tasks.py
src/common/tools/comparison.py
src/apps/doc/consts.py
View file @
8914936
...
...
@@ -1018,61 +1018,69 @@ BASE_XML_TEXT = """<?xml version="1.0" encoding="utf-8"?>
CDATA_TEXT
=
"""<![CDATA[<Exec xmlns="http://tempuri.org/"><strXMLParm><Request><Framework><UserName>SFCHINA
\
qqcout0</UserName><GUID>70d0efcb-3bc2-4018-ac4e-681c8f3131b6</GUID><DetailedTracingEnabled>False</DetailedTracingEnabled><ServiceName>AMSWebService</ServiceName><SupportsRedirection>true</SupportsRedirection><ServiceType>Service</ServiceType></Framework><Parms><InputXML type="string">&lt;?xml version="1.0" encoding="utf-16"?&gt;&lt;InputXML&gt; &lt;Result&gt; {0} &lt;/Result&gt;&lt;AuthorizationData&gt;&lt;ServiceComponent&gt;OCR&lt;/ServiceComponent&gt;&lt;RoleId/&gt;&lt;CompanyId/&gt;&lt;/AuthorizationData&gt;&lt;/InputXML&gt;</InputXML></Parms></Request></strXMLParm></Exec>]]>"""
IC_OCR_FIELD
=
'ic_ocr'
RP_OCR_FIELD
=
'rp_ocr'
BL_OCR_FIELD
=
'bl_ocr'
EEP_OCR_FIELD
=
'eep_ocr'
DL_OCR_FIELD
=
'dl_ocr'
PP_OCR_FIELD
=
'pp_ocr'
MVC_OCR_FIELD
=
'mvc_ocr'
RESULT_MAPPING
=
{
# MVI_CLASSIFY: 'mvi_ocr',
IC_CLASSIFY
:
'ic_ocr'
,
RP_CLASSIFY
:
'rp_ocr'
,
IC_CLASSIFY
:
IC_OCR_FIELD
,
RP_CLASSIFY
:
RP_OCR_FIELD
,
# BC_CLASSIFY: 'bc_ocr',
BL_CLASSIFY
:
'bl_ocr'
,
BL_CLASSIFY
:
BL_OCR_FIELD
,
# UCI_CLASSIFY: 'uci_ocr',
EEP_CLASSIFY
:
'eep_ocr'
,
DL_CLASSIFY
:
'dl_ocr'
,
PP_CLASSIFY
:
'pp_ocr'
,
MVC_CLASSIFY
:
'mvc_ocr'
,
EEP_CLASSIFY
:
EEP_OCR_FIELD
,
DL_CLASSIFY
:
DL_OCR_FIELD
,
PP_CLASSIFY
:
PP_OCR_FIELD
,
MVC_CLASSIFY
:
MVC_OCR_FIELD
,
# VAT_CLASSIFY: 'vat_ocr',
}
COMPARE_FIELDS
=
(
'ic_ocr'
,
'rp_ocr'
,
'bl_ocr'
,
'eep_ocr'
,
'dl_ocr'
,
'pp_ocr'
,
'mvc_ocr'
)
COMPARE_FIELDS
=
(
IC_OCR_FIELD
,
RP_OCR_FIELD
,
BL_OCR_FIELD
,
EEP_OCR_FIELD
,
DL_OCR_FIELD
,
PP_OCR_FIELD
,
MVC_OCR_FIELD
)
# 身份证
ITPRC
=
{
'customerChineseName'
:
(
'姓名'
,
'common
_compare'
,
{}),
'idNum'
:
(
'公民身份号码'
,
'common_compare'
,
{}),
ITPRC
=
[
(
'customerChineseName'
,
'姓名'
,
'name
_compare'
,
{}),
(
'idNum'
,
'公民身份号码'
,
'common_compare'
,
{}),
# 20200410-20250410 OCR识别为长期,向GCAP发送:2099-12-31 00:00:00.0
'idExpiryDate'
:
(
'有效期限'
,
'date_compare'
,
{
'long'
:
True
,
'ocr_split'
:
True
,
'input_replace'
:
''
}),
}
(
'idExpiryDate'
,
'有效期限'
,
'date_compare'
,
{
'long'
:
True
,
'ocr_split'
:
True
,
'input_replace'
:
''
}),
]
# 护照
ITPSP
=
{
'customerChineseName'
:
(
'英文姓名'
,
'common_compare'
,
{
}),
'idNum'
:
(
'护照号码'
,
'common_compare'
,
{}),
'idExpiryDate'
:
(
'有效期至'
,
'date_compare'
,
{
'input_replace'
:
''
}),
# 20250410
'dateOfBirth'
:
(
'出生日期'
,
'date_compare'
,
{
'input_replace'
:
''
}),
# 20250410
}
ITPSP
=
[
(
'customerChineseName'
,
'英文姓名'
,
'name_compare'
,
{
'is_passport'
:
True
}),
(
'idNum'
,
'护照号码'
,
'common_compare'
,
{}),
(
'idExpiryDate'
,
'有效期至'
,
'date_compare'
,
{
'input_replace'
:
''
}),
# 20250410
(
'dateOfBirth'
,
'出生日期'
,
'date_compare'
,
{
'input_replace'
:
''
}),
# 20250410
]
# 港澳台通行证
ITHKM_ITTID
=
{
'customerChineseName'
:
(
'中文名'
,
'common_compare'
,
{}),
'idNum'
:
(
'证件号码'
,
'common_compare'
,
{}),
'idExpiryDate'
:
(
'有效期限'
,
'date_compare'
,
{
'ocr_split'
:
True
,
'input_replace'
:
'.'
}),
# 2013.10.24-2023.10.23
'dateOfBirth'
:
(
'出生日期'
,
'date_compare'
,
{
'input_replace'
:
'.'
}),
# 2023.10.23
ITHKM_ITTID
=
[
(
'customerChineseName'
,
'中文名'
,
'common_compare'
,
{}),
(
'idNum'
,
'证件号码'
,
'common_compare'
,
{}),
(
'idExpiryDate'
,
'有效期限'
,
'date_compare'
,
{
'ocr_split'
:
True
,
'input_replace'
:
'.'
}),
# 2013.10.24-2023.10.23
(
'dateOfBirth'
,
'出生日期'
,
'date_compare'
,
{
'input_replace'
:
'.'
}),
# 2023.10.23
# 'secondIdNum': ''
}
]
# 居住证
ITRES
=
{
'customerChineseName'
:
(
'姓名'
,
'common_compare'
,
{}),
'idNum'
:
(
'公民身份号码'
,
'common_compare'
,
{}),
'idExpiryDate'
:
(
'有效期限'
,
'date_compare'
,
{
'ocr_split'
:
True
,
'input_replace'
:
''
}),
# 20200410-20250410
'secondIdNum'
:
(
'通行证号码'
,
'common_compare'
,
{})
}
ITRES
=
[
(
'customerChineseName'
,
'姓名'
,
'common_compare'
,
{}),
(
'idNum'
,
'公民身份号码'
,
'common_compare'
,
{}),
(
'idExpiryDate'
,
'有效期限'
,
'date_compare'
,
{
'ocr_split'
:
True
,
'input_replace'
:
''
}),
# 20200410-20250410
(
'secondIdNum'
,
'通行证号码'
,
'common_compare'
,
{})
]
ID_TYPE_COMPARE
=
{
'ITPRC'
:
{
'model_field'
:
'ic_ocr'
,
'compare_field'
:
ITPRC
}
,
'ITPSP'
:
{
'model_field'
:
'pp_ocr'
,
'compare_field'
:
ITPSP
}
,
'ITHKM'
:
{
'model_field'
:
'eep_ocr'
,
'compare_field'
:
ITHKM_ITTID
}
,
'ITTID'
:
{
'model_field'
:
'eep_ocr'
,
'compare_field'
:
ITHKM_ITTID
}
,
'ITRES'
:
{
'model_field'
:
'rp_ocr'
,
'compare_field'
:
ITRES
}
,
'ITPRC'
:
[
IC_OCR_FIELD
,
ITPRC
]
,
'ITPSP'
:
[
PP_OCR_FIELD
,
ITPSP
]
,
'ITHKM'
:
[
EEP_OCR_FIELD
,
ITHKM_ITTID
]
,
'ITTID'
:
[
EEP_OCR_FIELD
,
ITHKM_ITTID
]
,
'ITRES'
:
[
RP_OCR_FIELD
,
ITRES
]
,
}
# 1. 分别对比 POS车架号 vs 车辆登记正,POS车架号 vs 行驶证
...
...
@@ -1080,37 +1088,35 @@ ID_TYPE_COMPARE = {
# b)其中一个 不一致:N,向GCAP发送:不一致的OCR识别结果
# c)两中比对 均不一致:N,向GCAP发送:车辆登记证 & 行驶证识别结果
# 车辆登记证
PCUSD_MVC
=
{
'vinNo'
:
(
'9.车辆识别代号/车架号'
,
'common_compare'
,
{}),
'manufactureDate'
:
(
'32.车辆出厂日期'
,
'common_compare'
,
{}),
'firstRegistrationDate'
:
(
'3.登记日期'
,
'common_compare'
,
{}),
}
PCUSD_MVC
=
[
(
'vinNo'
,
'9.车辆识别代号/车架号'
,
'common_compare'
,
{}),
(
'manufactureDate'
,
'32.车辆出厂日期'
,
'common_compare'
,
{}),
(
'firstRegistrationDate'
,
'3.登记日期'
,
'common_compare'
,
{}),
]
# 行驶证
PCUSD_DL
=
{
'vinNo'
:
(
'车辆识别代码'
,
'common_compare'
,
{}),
# 'manufactureDate': '',
# 'firstRegistrationDate': '',
}
PCUSD_DL
=
[
(
'vinNo'
,
'车辆识别代码'
,
'common_compare'
,
{}),
]
# 营业执照
TCCOR
=
{
'customerChineseName'
:
(
'企业名称'
,
'common_compare'
,
{}),
'legalRepName'
:
(
'经营者姓名'
,
'common
_compare'
,
{}),
'idNum'
:
(
'注册号'
,
'common_compare'
,
{}),
'businessLicenseNo'
:
(
'注册号'
,
'common_compare'
,
{}),
'taxRegistrationCode'
:
(
'注册号'
,
'common_compare'
,
{}),
'incorporationDate'
:
(
'成立日期'
,
'date_compare'
,
{
'ocr_replace'
:
True
}),
# 2017年07月11日
TCCOR
=
[
(
'customerChineseName'
,
'企业名称'
,
'common_compare'
,
{}),
(
'legalRepName'
,
'经营者姓名'
,
'name
_compare'
,
{}),
(
'idNum'
,
'注册号'
,
'common_compare'
,
{}),
(
'businessLicenseNo'
,
'注册号'
,
'common_compare'
,
{}),
(
'taxRegistrationCode'
,
'注册号'
,
'common_compare'
,
{}),
(
'incorporationDate'
,
'成立日期'
,
'date_compare'
,
{
'ocr_replace'
:
True
}),
# 2017年07月11日
# 2017年07月11日至长期 1. OCR识别为长期,向GCAP发送:2099-12-31 00:00:00.0
'businessLicenseDueDate'
:
(
'营业期限'
,
'date_compare'
,
{
'long'
:
True
,
'ocr_replace'
:
True
}),
'capitalRegAmount'
:
(
'注册资本'
,
'rmb_compare'
,
{}),
# 壹拾万元整 将OCR识别结果(人民币大写)转化为数字
}
(
'businessLicenseDueDate'
,
'营业期限'
,
'date_compare'
,
{
'long'
:
True
,
'ocr_replace'
:
True
}),
(
'capitalRegAmount'
,
'注册资本'
,
'rmb_compare'
,
{}),
# 壹拾万元整 将OCR识别结果(人民币大写)转化为数字
]
TCSEP
=
{
'companyName'
:
(
'企业名称'
,
'common_compare'
,
{}),
'registeredCapital'
:
(
'注册资本'
,
'rmb_compare'
,
{}),
# 壹拾万元整 将OCR识别结果(人民币大写)转化为数字
'selfEmployedSubType'
:
(
'企业类型'
,
'type_compare'
,
{}),
# 有限责任公司
}
TCSEP
=
[
(
'companyName'
,
'企业名称'
,
'common_compare'
,
{}),
(
'registeredCapital'
,
'注册资本'
,
'rmb_compare'
,
{}),
# 壹拾万元整 将OCR识别结果(人民币大写)转化为数字
(
'selfEmployedSubType'
,
'企业类型'
,
'type_compare'
,
{})
# 有限责任公司
]
# 1. POS数据OCR识别结果对应关系如下:
# a)Individual Businessman CSIBM => 个体工商户
...
...
src/apps/doc/views.py
View file @
8914936
...
...
@@ -444,7 +444,7 @@ class DocView(GenericView, DocHandler):
file
.
close
()
# 1. 上传信息记录
# application_id = '{0}{1}'.format(consts.FIXED_APPLICATION_ID_PREFIX, metadata_version_id)
application_id
=
'CH-S2000127
72
'
application_id
=
'CH-S2000127
27
'
upload_finish_time
=
timezone
.
now
()
document_scheme
=
random
.
choice
(
consts
.
DOC_SCHEME_LIST
)
data_source
=
random
.
choice
(
consts
.
DATA_SOURCE_LIST
)
...
...
src/celery_compare/tasks.py
View file @
8914936
...
...
@@ -12,6 +12,29 @@ compare_log = logging.getLogger('compare')
log_base
=
'[CA Compare]'
def
field_compare
(
info_dict
,
ocr_res_dict
,
ocr_field
,
compare_list
,
res_set
):
is_find
=
False
ocr_res_str
=
ocr_res_dict
.
get
(
ocr_field
)
if
ocr_res_str
is
not
None
:
ocr_res_list
=
json
.
loads
(
ocr_res_str
)
res_len
=
len
(
ocr_res_list
)
for
ocr_res
in
ocr_res_list
:
if
is_find
:
break
for
idx
,
compare_tuple
in
enumerate
(
compare_list
):
compare_res
=
getattr
(
cp
,
compare_tuple
[
2
])(
info_dict
.
get
(
compare_tuple
[
0
]),
ocr_res
.
get
(
compare_tuple
[
1
]),
**
compare_tuple
[
3
])
if
idx
==
0
and
compare_res
==
consts
.
RESULT_N
and
res_len
!=
1
:
break
is_find
=
True
info_dict
[
compare_tuple
[
0
]
+
'Result'
]
=
compare_res
res_set
.
add
(
compare_res
)
if
not
is_find
:
res_set
.
add
(
consts
.
RESULT_N
)
for
compare_tuple
in
compare_list
:
info_dict
[
compare_tuple
[
0
]
+
'Result'
]
=
consts
.
RESULT_N
@app.task
def
compare
(
application_id
,
application_entity
,
uniq_seq
,
ocr_res_id
):
# POS: application_id, application_entity, uniq_seq, None
...
...
@@ -25,8 +48,7 @@ def compare(application_id, application_entity, uniq_seq, ocr_res_id):
last_obj
=
comparison_class
.
objects
.
filter
(
application_id
=
application_id
)
.
last
()
if
last_obj
is
None
:
compare_log
.
info
(
'{0} [comparison info empty] [entity={1}] [id={2}] [uniq_seq={3}] [ocr_res_id={4}]'
.
format
(
log_base
,
application_entity
,
application_id
,
uniq_seq
,
ocr_res_id
))
log_base
,
application_entity
,
application_id
,
uniq_seq
,
ocr_res_id
))
return
# 根据application_id查找OCR累计结果指定license字段,如果没有,结束
...
...
@@ -42,6 +64,7 @@ def compare(application_id, application_entity, uniq_seq, ocr_res_id):
return
# 比对信息
try
:
comparison_res
=
{
'OCR_Input'
:
{
'uniqSeq'
:
last_obj
.
uniq_seq
,
...
...
@@ -64,143 +87,66 @@ def compare(application_id, application_entity, uniq_seq, ocr_res_id):
individual_cus_info
[
'customerType'
]
=
last_obj
.
customer_type
# sep营业执照
if
is_sep
and
individual_cus_info
.
get
(
'companyName'
)
is
not
None
:
sep_is_find
=
False
sep_ocr_res_str
=
ocr_res_dict
.
get
(
'bl_ocr'
)
if
sep_ocr_res_str
is
not
None
:
sep_ocr_list
=
json
.
loads
(
sep_ocr_res_str
)
for
sep_ocr
in
sep_ocr_list
:
company_name
=
sep_ocr
.
get
(
consts
.
TCSEP
.
get
(
'companyName'
)[
0
])
if
company_name
is
None
or
company_name
!=
individual_cus_info
.
get
(
'companyName'
):
continue
sep_is_find
=
True
for
sep_field
,
sep_tuple
in
consts
.
TCSEP
.
items
():
sep_res
=
getattr
(
cp
,
sep_tuple
[
1
])(
individual_cus_info
.
get
(
sep_field
),
sep_ocr
.
get
(
sep_tuple
[
0
]),
**
sep_tuple
[
2
])
individual_cus_info
[
sep_field
+
'Result'
]
=
sep_res
res_set
.
add
(
sep_res
)
break
if
not
sep_is_find
:
res_set
.
add
(
consts
.
RESULT_N
)
for
field
in
consts
.
TCSEP
.
keys
():
individual_cus_info
[
field
+
'Result'
]
=
consts
.
RESULT_N
field_compare
(
individual_cus_info
,
ocr_res_dict
,
consts
.
BL_OCR_FIELD
,
consts
.
TCSEP
,
res_set
)
# 个人信息证件
id_type
=
individual_cus_info
.
get
(
'idType'
)
compare_target
=
consts
.
ID_TYPE_COMPARE
.
get
(
id_type
)
if
compare_target
is
None
:
continue
is_find
=
False
ocr_res_str
=
ocr_res_dict
.
get
(
compare_target
.
get
(
'model_field'
))
if
ocr_res_str
is
not
None
:
ocr_res_list
=
json
.
loads
(
ocr_res_str
)
compare_target_dict
=
compare_target
.
get
(
'compare_field'
)
for
ocr_res
in
ocr_res_list
:
base_name
=
ocr_res
.
get
(
compare_target_dict
.
get
(
'customerChineseName'
)[
0
])
if
base_name
is
None
or
base_name
!=
individual_cus_info
.
get
(
'customerChineseName'
):
# TODO 特殊姓名比对
compare_info_list
=
consts
.
ID_TYPE_COMPARE
.
get
(
id_type
)
if
compare_info_list
is
None
:
continue
is_find
=
True
for
compare_field
,
compare_tuple
in
compare_target
.
get
(
'compare_field'
)
.
items
():
compare_res
=
getattr
(
cp
,
compare_tuple
[
1
])(
individual_cus_info
.
get
(
compare_field
),
ocr_res
.
get
(
compare_tuple
[
0
]),
**
compare_tuple
[
2
])
individual_cus_info
[
compare_field
+
'Result'
]
=
compare_res
res_set
.
add
(
compare_res
)
break
if
not
is_find
:
res_set
.
add
(
consts
.
RESULT_N
)
for
field
in
compare_target
.
get
(
'compare_field'
)
.
keys
():
individual_cus_info
[
field
+
'Result'
]
=
consts
.
RESULT_N
field_compare
(
individual_cus_info
,
ocr_res_dict
,
compare_info_list
[
0
],
compare_info_list
[
1
],
res_set
)
comparison_res
[
'OCR_Input'
][
'individualCusInfo'
]
=
individual_cus_info_list
if
last_obj
.
corporate_cus_info
is
not
None
:
corporate_cus_info
=
json
.
loads
(
last_obj
.
corporate_cus_info
)
corporate_cus_info
[
'customerType'
]
=
last_obj
.
customer_type
is_bl_find
=
False
bl_ocr_res_str
=
ocr_res_dict
.
get
(
'bl_ocr'
)
if
bl_ocr_res_str
is
not
None
:
bl_ocr_list
=
json
.
loads
(
bl_ocr_res_str
)
for
bl_ocr
in
bl_ocr_list
:
company_name
=
bl_ocr
.
get
(
consts
.
TCCOR
.
get
(
'customerChineseName'
)[
0
])
if
company_name
is
None
or
company_name
!=
corporate_cus_info
.
get
(
'customerChineseName'
):
continue
is_bl_find
=
True
for
bl_field
,
bl_tuple
in
consts
.
TCCOR
.
items
():
bl_res
=
getattr
(
cp
,
bl_tuple
[
1
])(
corporate_cus_info
.
get
(
bl_field
),
bl_ocr
.
get
(
bl_tuple
[
0
]),
**
bl_tuple
[
2
])
corporate_cus_info
[
bl_field
+
'Result'
]
=
bl_res
res_set
.
add
(
bl_res
)
break
if
not
is_bl_find
:
res_set
.
add
(
consts
.
RESULT_N
)
for
field
in
consts
.
TCCOR
.
keys
():
corporate_cus_info
[
field
+
'Result'
]
=
consts
.
RESULT_N
field_compare
(
corporate_cus_info
,
ocr_res_dict
,
consts
.
BL_OCR_FIELD
,
consts
.
TCCOR
,
res_set
)
comparison_res
[
'OCR_Input'
][
'corporateCusInfo'
]
=
corporate_cus_info
if
last_obj
.
vehicle_status
==
consts
.
VEHICLE_STATUS
[
0
]
and
last_obj
.
usedcar_info
is
not
None
:
usedcar_info
=
json
.
loads
(
last_obj
.
usedcar_info
)
is_usedcar_find
=
False
mvc_ocr_res_str
=
ocr_res_dict
.
get
(
'mvc_ocr'
)
if
mvc_ocr_res_str
is
not
None
:
mvc_ocr_list
=
json
.
loads
(
mvc_ocr_res_str
)
for
mvc_ocr
in
mvc_ocr_list
:
vin_no
=
mvc_ocr
.
get
(
consts
.
PCUSD_MVC
.
get
(
'vinNo'
)[
0
])
if
vin_no
is
None
or
vin_no
!=
usedcar_info
.
get
(
'vinNo'
):
continue
is_usedcar_find
=
True
for
mvc_field
,
mvc_tuple
in
consts
.
PCUSD_MVC
.
items
():
mvc_res
=
getattr
(
cp
,
mvc_tuple
[
1
])(
usedcar_info
.
get
(
mvc_field
),
mvc_ocr
.
get
(
mvc_tuple
[
0
]),
**
mvc_tuple
[
2
])
usedcar_info
[
mvc_field
+
'Result'
]
=
mvc_res
res_set
.
add
(
mvc_res
)
dl_find
=
False
dl_ocr_res_str
=
ocr_res_dict
.
get
(
'dl_ocr'
)
if
dl_ocr_res_str
is
not
None
:
dl_ocr_list
=
json
.
loads
(
dl_ocr_res_str
)
for
dl_ocr
in
dl_ocr_list
:
dl_vin_no
=
dl_ocr
.
get
(
consts
.
PCUSD_DL
.
get
(
'vinNo'
)[
0
])
if
dl_vin_no
is
None
or
dl_vin_no
!=
usedcar_info
.
get
(
'vinNo'
):
continue
dl_find
=
True
break
if
not
dl_find
:
res_set
.
add
(
consts
.
RESULT_N
)
usedcar_info
[
'vinNo'
+
'Result'
]
=
consts
.
RESULT_N
break
if
not
is_usedcar_find
:
res_set
.
add
(
consts
.
RESULT_N
)
for
field
in
consts
.
PCUSD_MVC
.
keys
():
usedcar_info
[
field
+
'Result'
]
=
consts
.
RESULT_N
field_compare
(
usedcar_info
,
ocr_res_dict
,
consts
.
MVC_OCR_FIELD
,
consts
.
PCUSD_MVC
,
res_set
)
if
usedcar_info
[
consts
.
PCUSD_MVC
[
0
][
0
]
+
'Result'
]
==
consts
.
RESULT_Y
:
field_compare
(
usedcar_info
,
ocr_res_dict
,
consts
.
DL_OCR_FIELD
,
consts
.
PCUSD_DL
,
res_set
)
comparison_res
[
'OCR_Input'
][
'usedCarInfo'
]
=
usedcar_info
comparison_res
[
'OCR_Input'
][
'wholeResult'
]
=
consts
.
RESULT_N
if
consts
.
RESULT_N
in
res_set
else
consts
.
RESULT_Y
print
(
comparison_res
)
except
Exception
as
e
:
compare_log
.
error
(
'{0} [compare error] [entity={1}] [id={2}] [uniq_seq={3}] [ocr_res_id={4}] '
'[error={5}]'
.
format
(
log_base
,
application_entity
,
application_id
,
uniq_seq
,
ocr_res_id
,
traceback
.
format_exc
()))
else
:
compare_log
.
info
(
'{0} [compare success] [entity={1}] [id={2}] [uniq_seq={3}] [ocr_res_id={4}]'
.
format
(
log_base
,
application_entity
,
application_id
,
uniq_seq
,
ocr_res_id
))
# 将比对结果发送GCAP
#
try:
#
data = gcap.dict_to_xml(comparison_res)
#
except Exception as e:
#
compare_log.error('{0} [dict to xml failed] [entity={1}] [id={2}] [uniq_seq={3}] [ocr_res_id={4}] '
#
'[error={5}]'.format(log_base, application_entity, application_id, uniq_seq, ocr_res_id,
#
traceback.format_exc()))
#
else:
#
try:
#
for times in range(consts.RETRY_TIMES):
#
try:
#
gcap.send(data)
#
except Exception as e:
#
gcap_exc = str(e)
#
else:
#
break
#
else:
#
raise GCAPException(gcap_exc)
#
except Exception as e:
#
compare_log.error('{0} [gcap failed] [entity={1}] [id={2}] [uniq_seq={3}] [ocr_res_id={4}] '
#
'[error={5}]'.format(log_base, application_entity, application_id, uniq_seq,
#
ocr_res_id, traceback.format_exc()))
#
else:
#
compare_log.info('{0} [task success] [entity={1}] [id={2}] [uniq_seq={3}] [ocr_res_id={4}]'.format(
#
log_base, application_entity, application_id, uniq_seq, ocr_res_id))
try
:
data
=
gcap
.
dict_to_xml
(
comparison_res
)
except
Exception
as
e
:
compare_log
.
error
(
'{0} [dict to xml failed] [entity={1}] [id={2}] [uniq_seq={3}] [ocr_res_id={4}] '
'[error={5}]'
.
format
(
log_base
,
application_entity
,
application_id
,
uniq_seq
,
ocr_res_id
,
traceback
.
format_exc
()))
else
:
try
:
for
times
in
range
(
consts
.
RETRY_TIMES
):
try
:
gcap
.
send
(
data
)
except
Exception
as
e
:
gcap_exc
=
str
(
e
)
else
:
break
else
:
raise
GCAPException
(
gcap_exc
)
except
Exception
as
e
:
compare_log
.
error
(
'{0} [gcap failed] [entity={1}] [id={2}] [uniq_seq={3}] [ocr_res_id={4}] '
'[error={5}]'
.
format
(
log_base
,
application_entity
,
application_id
,
uniq_seq
,
ocr_res_id
,
traceback
.
format_exc
()))
else
:
compare_log
.
info
(
'{0} [task success] [entity={1}] [id={2}] [uniq_seq={3}] [ocr_res_id={4}]'
.
format
(
log_base
,
application_entity
,
application_id
,
uniq_seq
,
ocr_res_id
))
...
...
src/common/tools/comparison.py
View file @
8914936
...
...
@@ -19,7 +19,13 @@ class Comparison:
self
.
RESULT_Y
=
'Y'
self
.
RESULT_N
=
'N'
self
.
RESULT_NA
=
'NA'
# TODO NA情况
self
.
RESULT_NA
=
'NA'
self
.
TRANS_MAP
=
{
' '
:
''
,
'·'
:
''
,
}
self
.
TRANS
=
str
.
maketrans
(
self
.
TRANS_MAP
)
def
build_res
(
self
,
result
):
if
result
:
...
...
@@ -28,9 +34,34 @@ class Comparison:
return
self
.
RESULT_N
def
common_compare
(
self
,
input_str
,
ocr_str
,
**
kwargs
):
if
not
isinstance
(
input_str
,
str
)
or
isinstance
(
ocr_str
,
str
):
return
self
.
RESULT_N
return
self
.
build_res
(
input_str
==
ocr_str
)
def
name_compare
(
self
,
input_str
,
ocr_str
,
**
kwargs
):
if
not
isinstance
(
input_str
,
str
)
or
isinstance
(
ocr_str
,
str
):
return
self
.
RESULT_N
if
kwargs
.
get
(
'is_passport'
):
input_obj
=
re
.
search
(
r'[a-zA-Z]]!'
,
input_str
)
if
input_obj
:
input_s
=
input_obj
.
group
()
ocr_obj
=
re
.
search
(
r'[a-zA-Z]]!'
,
ocr_str
)
if
ocr_obj
:
ocr_s
=
ocr_obj
.
group
()
return
self
.
build_res
(
input_s
==
ocr_s
)
return
self
.
RESULT_N
else
:
return
self
.
build_res
((
input_str
==
ocr_str
))
else
:
if
re
.
search
(
r'[a-zA-Z]]'
,
input_str
):
return
self
.
RESULT_NA
input_s
=
input_str
.
translate
(
self
.
TRANS
)
ocr_s
=
ocr_str
.
translate
(
self
.
TRANS
)
return
self
.
build_res
(
input_s
==
ocr_s
)
def
date_compare
(
self
,
input_str
,
ocr_str
,
**
kwargs
):
if
not
isinstance
(
input_str
,
str
)
or
isinstance
(
ocr_str
,
str
):
return
self
.
RESULT_N
if
kwargs
.
get
(
'long'
,
False
)
and
'长期'
in
ocr_str
:
return
'2099-12-31'
if
kwargs
.
get
(
'ocr_split'
,
False
):
...
...
@@ -42,10 +73,14 @@ class Comparison:
return
self
.
build_res
(
input_str
==
ocr_str
)
def
rmb_compare
(
self
,
input_str
,
ocr_str
,
**
kwargs
):
if
not
isinstance
(
input_str
,
str
)
or
isinstance
(
ocr_str
,
str
):
return
self
.
RESULT_N
input_rmb_upper
=
to_rmb_upper
(
float
(
input_str
))
return
self
.
build_res
(
input_rmb_upper
==
ocr_str
)
def
type_compare
(
self
,
input_str
,
ocr_str
,
**
kwargs
):
if
not
isinstance
(
input_str
,
str
)
or
isinstance
(
ocr_str
,
str
):
return
self
.
RESULT_N
for
map_tuple
in
self
.
TYPE_MAPPING
:
if
re
.
search
(
map_tuple
[
0
],
ocr_str
)
is
not
None
:
compare_str
=
map_tuple
[
1
]
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment