89149364 by 周伟奇

compare update

1 parent 4e022eb3
......@@ -1018,61 +1018,69 @@ BASE_XML_TEXT = """<?xml version="1.0" encoding="utf-8"?>
CDATA_TEXT = """<![CDATA[<Exec xmlns="http://tempuri.org/"><strXMLParm>&lt;Request&gt;&lt;Framework&gt;&lt;UserName&gt;SFCHINA\qqcout0&lt;/UserName&gt;&lt;GUID&gt;70d0efcb-3bc2-4018-ac4e-681c8f3131b6&lt;/GUID&gt;&lt;DetailedTracingEnabled&gt;False&lt;/DetailedTracingEnabled&gt;&lt;ServiceName&gt;AMSWebService&lt;/ServiceName&gt;&lt;SupportsRedirection&gt;true&lt;/SupportsRedirection&gt;&lt;ServiceType&gt;Service&lt;/ServiceType&gt;&lt;/Framework&gt;&lt;Parms&gt;&lt;InputXML type="string"&gt;&amp;lt;?xml version="1.0" encoding="utf-16"?&amp;gt;&amp;lt;InputXML&amp;gt; &amp;lt;Result&amp;gt; {0} &amp;lt;/Result&amp;gt;&amp;lt;AuthorizationData&amp;gt;&amp;lt;ServiceComponent&amp;gt;OCR&amp;lt;/ServiceComponent&amp;gt;&amp;lt;RoleId/&amp;gt;&amp;lt;CompanyId/&amp;gt;&amp;lt;/AuthorizationData&amp;gt;&amp;lt;/InputXML&amp;gt;&lt;/InputXML&gt;&lt;/Parms&gt;&lt;/Request&gt;</strXMLParm></Exec>]]>"""
IC_OCR_FIELD = 'ic_ocr'
RP_OCR_FIELD = 'rp_ocr'
BL_OCR_FIELD = 'bl_ocr'
EEP_OCR_FIELD = 'eep_ocr'
DL_OCR_FIELD = 'dl_ocr'
PP_OCR_FIELD = 'pp_ocr'
MVC_OCR_FIELD = 'mvc_ocr'
RESULT_MAPPING = {
# MVI_CLASSIFY: 'mvi_ocr',
IC_CLASSIFY: 'ic_ocr',
RP_CLASSIFY: 'rp_ocr',
IC_CLASSIFY: IC_OCR_FIELD,
RP_CLASSIFY: RP_OCR_FIELD,
# BC_CLASSIFY: 'bc_ocr',
BL_CLASSIFY: 'bl_ocr',
BL_CLASSIFY: BL_OCR_FIELD,
# UCI_CLASSIFY: 'uci_ocr',
EEP_CLASSIFY: 'eep_ocr',
DL_CLASSIFY: 'dl_ocr',
PP_CLASSIFY: 'pp_ocr',
MVC_CLASSIFY: 'mvc_ocr',
EEP_CLASSIFY: EEP_OCR_FIELD,
DL_CLASSIFY: DL_OCR_FIELD,
PP_CLASSIFY: PP_OCR_FIELD,
MVC_CLASSIFY: MVC_OCR_FIELD,
# VAT_CLASSIFY: 'vat_ocr',
}
COMPARE_FIELDS = ('ic_ocr', 'rp_ocr', 'bl_ocr', 'eep_ocr', 'dl_ocr', 'pp_ocr', 'mvc_ocr')
COMPARE_FIELDS = (IC_OCR_FIELD, RP_OCR_FIELD, BL_OCR_FIELD, EEP_OCR_FIELD, DL_OCR_FIELD, PP_OCR_FIELD, MVC_OCR_FIELD)
# 身份证
ITPRC = {
'customerChineseName': ('姓名', 'common_compare', {}),
'idNum': ('公民身份号码', 'common_compare', {}),
ITPRC = [
('customerChineseName', '姓名', 'name_compare', {}),
('idNum', '公民身份号码', 'common_compare', {}),
# 20200410-20250410 OCR识别为长期,向GCAP发送:2099-12-31 00:00:00.0
'idExpiryDate': ('有效期限', 'date_compare', {'long': True, 'ocr_split': True, 'input_replace': ''}),
}
('idExpiryDate', '有效期限', 'date_compare', {'long': True, 'ocr_split': True, 'input_replace': ''}),
]
# 护照
ITPSP = {
'customerChineseName': ('英文姓名', 'common_compare', {}),
'idNum': ('护照号码', 'common_compare', {}),
'idExpiryDate': ('有效期至', 'date_compare', {'input_replace': ''}), # 20250410
'dateOfBirth': ('出生日期', 'date_compare', {'input_replace': ''}), # 20250410
}
ITPSP = [
('customerChineseName', '英文姓名', 'name_compare', {'is_passport': True}),
('idNum', '护照号码', 'common_compare', {}),
('idExpiryDate', '有效期至', 'date_compare', {'input_replace': ''}), # 20250410
('dateOfBirth', '出生日期', 'date_compare', {'input_replace': ''}), # 20250410
]
# 港澳台通行证
ITHKM_ITTID = {
'customerChineseName': ('中文名', 'common_compare', {}),
'idNum': ('证件号码', 'common_compare', {}),
'idExpiryDate': ('有效期限', 'date_compare', {'ocr_split': True, 'input_replace': '.'}), # 2013.10.24-2023.10.23
'dateOfBirth': ('出生日期', 'date_compare', {'input_replace': '.'}), # 2023.10.23
ITHKM_ITTID = [
('customerChineseName', '中文名', 'common_compare', {}),
('idNum', '证件号码', 'common_compare', {}),
('idExpiryDate', '有效期限', 'date_compare', {'ocr_split': True, 'input_replace': '.'}), # 2013.10.24-2023.10.23
('dateOfBirth', '出生日期', 'date_compare', {'input_replace': '.'}), # 2023.10.23
# 'secondIdNum': ''
}
]
# 居住证
ITRES = {
'customerChineseName': ('姓名', 'common_compare', {}),
'idNum': ('公民身份号码', 'common_compare', {}),
'idExpiryDate': ('有效期限', 'date_compare', {'ocr_split': True, 'input_replace': ''}), # 20200410-20250410
'secondIdNum': ('通行证号码', 'common_compare', {})
}
ITRES = [
('customerChineseName', '姓名', 'common_compare', {}),
('idNum', '公民身份号码', 'common_compare', {}),
('idExpiryDate', '有效期限', 'date_compare', {'ocr_split': True, 'input_replace': ''}), # 20200410-20250410
('secondIdNum', '通行证号码', 'common_compare', {})
]
ID_TYPE_COMPARE = {
'ITPRC': {'model_field': 'ic_ocr', 'compare_field': ITPRC},
'ITPSP': {'model_field': 'pp_ocr', 'compare_field': ITPSP},
'ITHKM': {'model_field': 'eep_ocr', 'compare_field': ITHKM_ITTID},
'ITTID': {'model_field': 'eep_ocr', 'compare_field': ITHKM_ITTID},
'ITRES': {'model_field': 'rp_ocr', 'compare_field': ITRES},
'ITPRC': [IC_OCR_FIELD, ITPRC],
'ITPSP': [PP_OCR_FIELD, ITPSP],
'ITHKM': [EEP_OCR_FIELD, ITHKM_ITTID],
'ITTID': [EEP_OCR_FIELD, ITHKM_ITTID],
'ITRES': [RP_OCR_FIELD, ITRES],
}
# 1. 分别对比 POS车架号 vs 车辆登记正,POS车架号 vs 行驶证
......@@ -1080,37 +1088,35 @@ ID_TYPE_COMPARE = {
# b)其中一个 不一致:N,向GCAP发送:不一致的OCR识别结果
# c)两中比对 均不一致:N,向GCAP发送:车辆登记证 & 行驶证识别结果
# 车辆登记证
PCUSD_MVC = {
'vinNo': ('9.车辆识别代号/车架号', 'common_compare', {}),
'manufactureDate': ('32.车辆出厂日期', 'common_compare', {}),
'firstRegistrationDate': ('3.登记日期', 'common_compare', {}),
}
PCUSD_MVC = [
('vinNo', '9.车辆识别代号/车架号', 'common_compare', {}),
('manufactureDate', '32.车辆出厂日期', 'common_compare', {}),
('firstRegistrationDate', '3.登记日期', 'common_compare', {}),
]
# 行驶证
PCUSD_DL = {
'vinNo': ('车辆识别代码', 'common_compare', {}),
# 'manufactureDate': '',
# 'firstRegistrationDate': '',
}
PCUSD_DL = [
('vinNo', '车辆识别代码', 'common_compare', {}),
]
# 营业执照
TCCOR = {
'customerChineseName': ('企业名称', 'common_compare', {}),
'legalRepName': ('经营者姓名', 'common_compare', {}),
'idNum': ('注册号', 'common_compare', {}),
'businessLicenseNo': ('注册号', 'common_compare', {}),
'taxRegistrationCode': ('注册号', 'common_compare', {}),
'incorporationDate': ('成立日期', 'date_compare', {'ocr_replace': True}), # 2017年07月11日
TCCOR = [
('customerChineseName', '企业名称', 'common_compare', {}),
('legalRepName', '经营者姓名', 'name_compare', {}),
('idNum', '注册号', 'common_compare', {}),
('businessLicenseNo', '注册号', 'common_compare', {}),
('taxRegistrationCode', '注册号', 'common_compare', {}),
('incorporationDate', '成立日期', 'date_compare', {'ocr_replace': True}), # 2017年07月11日
# 2017年07月11日至长期 1. OCR识别为长期,向GCAP发送:2099-12-31 00:00:00.0
'businessLicenseDueDate': ('营业期限', 'date_compare', {'long': True, 'ocr_replace': True}),
'capitalRegAmount': ('注册资本', 'rmb_compare', {}), # 壹拾万元整 将OCR识别结果(人民币大写)转化为数字
}
('businessLicenseDueDate', '营业期限', 'date_compare', {'long': True, 'ocr_replace': True}),
('capitalRegAmount', '注册资本', 'rmb_compare', {}), # 壹拾万元整 将OCR识别结果(人民币大写)转化为数字
]
TCSEP = {
'companyName': ('企业名称', 'common_compare', {}),
'registeredCapital': ('注册资本', 'rmb_compare', {}), # 壹拾万元整 将OCR识别结果(人民币大写)转化为数字
'selfEmployedSubType': ('企业类型', 'type_compare', {}), # 有限责任公司
}
TCSEP = [
('companyName', '企业名称', 'common_compare', {}),
('registeredCapital', '注册资本', 'rmb_compare', {}), # 壹拾万元整 将OCR识别结果(人民币大写)转化为数字
('selfEmployedSubType', '企业类型', 'type_compare', {}) # 有限责任公司
]
# 1. POS数据OCR识别结果对应关系如下:
# a)Individual Businessman CSIBM => 个体工商户
......
......@@ -444,7 +444,7 @@ class DocView(GenericView, DocHandler):
file.close()
# 1. 上传信息记录
# application_id = '{0}{1}'.format(consts.FIXED_APPLICATION_ID_PREFIX, metadata_version_id)
application_id = 'CH-S200012772'
application_id = 'CH-S200012727'
upload_finish_time = timezone.now()
document_scheme = random.choice(consts.DOC_SCHEME_LIST)
data_source = random.choice(consts.DATA_SOURCE_LIST)
......
......@@ -12,6 +12,29 @@ compare_log = logging.getLogger('compare')
log_base = '[CA Compare]'
def field_compare(info_dict, ocr_res_dict, ocr_field, compare_list, res_set):
is_find = False
ocr_res_str = ocr_res_dict.get(ocr_field)
if ocr_res_str is not None:
ocr_res_list = json.loads(ocr_res_str)
res_len = len(ocr_res_list)
for ocr_res in ocr_res_list:
if is_find:
break
for idx, compare_tuple in enumerate(compare_list):
compare_res = getattr(cp, compare_tuple[2])(
info_dict.get(compare_tuple[0]), ocr_res.get(compare_tuple[1]), **compare_tuple[3])
if idx == 0 and compare_res == consts.RESULT_N and res_len != 1:
break
is_find = True
info_dict[compare_tuple[0] + 'Result'] = compare_res
res_set.add(compare_res)
if not is_find:
res_set.add(consts.RESULT_N)
for compare_tuple in compare_list:
info_dict[compare_tuple[0] + 'Result'] = consts.RESULT_N
@app.task
def compare(application_id, application_entity, uniq_seq, ocr_res_id):
# POS: application_id, application_entity, uniq_seq, None
......@@ -25,8 +48,7 @@ def compare(application_id, application_entity, uniq_seq, ocr_res_id):
last_obj = comparison_class.objects.filter(application_id=application_id).last()
if last_obj is None:
compare_log.info('{0} [comparison info empty] [entity={1}] [id={2}] [uniq_seq={3}] [ocr_res_id={4}]'.format(
log_base, application_entity, application_id, uniq_seq, ocr_res_id
))
log_base, application_entity, application_id, uniq_seq, ocr_res_id))
return
# 根据application_id查找OCR累计结果指定license字段,如果没有,结束
......@@ -42,6 +64,7 @@ def compare(application_id, application_entity, uniq_seq, ocr_res_id):
return
# 比对信息
try:
comparison_res = {
'OCR_Input': {
'uniqSeq': last_obj.uniq_seq,
......@@ -64,143 +87,66 @@ def compare(application_id, application_entity, uniq_seq, ocr_res_id):
individual_cus_info['customerType'] = last_obj.customer_type
# sep营业执照
if is_sep and individual_cus_info.get('companyName') is not None:
sep_is_find = False
sep_ocr_res_str = ocr_res_dict.get('bl_ocr')
if sep_ocr_res_str is not None:
sep_ocr_list = json.loads(sep_ocr_res_str)
for sep_ocr in sep_ocr_list:
company_name = sep_ocr.get(consts.TCSEP.get('companyName')[0])
if company_name is None or company_name != individual_cus_info.get('companyName'):
continue
sep_is_find = True
for sep_field, sep_tuple in consts.TCSEP.items():
sep_res = getattr(cp, sep_tuple[1])(
individual_cus_info.get(sep_field), sep_ocr.get(sep_tuple[0]), **sep_tuple[2])
individual_cus_info[sep_field + 'Result'] = sep_res
res_set.add(sep_res)
break
if not sep_is_find:
res_set.add(consts.RESULT_N)
for field in consts.TCSEP.keys():
individual_cus_info[field + 'Result'] = consts.RESULT_N
field_compare(individual_cus_info, ocr_res_dict, consts.BL_OCR_FIELD, consts.TCSEP, res_set)
# 个人信息证件
id_type = individual_cus_info.get('idType')
compare_target = consts.ID_TYPE_COMPARE.get(id_type)
if compare_target is None:
continue
is_find = False
ocr_res_str = ocr_res_dict.get(compare_target.get('model_field'))
if ocr_res_str is not None:
ocr_res_list = json.loads(ocr_res_str)
compare_target_dict = compare_target.get('compare_field')
for ocr_res in ocr_res_list:
base_name = ocr_res.get(compare_target_dict.get('customerChineseName')[0])
if base_name is None or base_name != individual_cus_info.get('customerChineseName'): # TODO 特殊姓名比对
compare_info_list = consts.ID_TYPE_COMPARE.get(id_type)
if compare_info_list is None:
continue
is_find = True
for compare_field, compare_tuple in compare_target.get('compare_field').items():
compare_res = getattr(cp, compare_tuple[1])(
individual_cus_info.get(compare_field), ocr_res.get(compare_tuple[0]), **compare_tuple[2])
individual_cus_info[compare_field + 'Result'] = compare_res
res_set.add(compare_res)
break
if not is_find:
res_set.add(consts.RESULT_N)
for field in compare_target.get('compare_field').keys():
individual_cus_info[field + 'Result'] = consts.RESULT_N
field_compare(individual_cus_info, ocr_res_dict, compare_info_list[0], compare_info_list[1], res_set)
comparison_res['OCR_Input']['individualCusInfo'] = individual_cus_info_list
if last_obj.corporate_cus_info is not None:
corporate_cus_info = json.loads(last_obj.corporate_cus_info)
corporate_cus_info['customerType'] = last_obj.customer_type
is_bl_find = False
bl_ocr_res_str = ocr_res_dict.get('bl_ocr')
if bl_ocr_res_str is not None:
bl_ocr_list = json.loads(bl_ocr_res_str)
for bl_ocr in bl_ocr_list:
company_name = bl_ocr.get(consts.TCCOR.get('customerChineseName')[0])
if company_name is None or company_name != corporate_cus_info.get('customerChineseName'):
continue
is_bl_find = True
for bl_field, bl_tuple in consts.TCCOR.items():
bl_res = getattr(cp, bl_tuple[1])(
corporate_cus_info.get(bl_field), bl_ocr.get(bl_tuple[0]), **bl_tuple[2])
corporate_cus_info[bl_field + 'Result'] = bl_res
res_set.add(bl_res)
break
if not is_bl_find:
res_set.add(consts.RESULT_N)
for field in consts.TCCOR.keys():
corporate_cus_info[field + 'Result'] = consts.RESULT_N
field_compare(corporate_cus_info, ocr_res_dict, consts.BL_OCR_FIELD, consts.TCCOR, res_set)
comparison_res['OCR_Input']['corporateCusInfo'] = corporate_cus_info
if last_obj.vehicle_status == consts.VEHICLE_STATUS[0] and last_obj.usedcar_info is not None:
usedcar_info = json.loads(last_obj.usedcar_info)
is_usedcar_find = False
mvc_ocr_res_str = ocr_res_dict.get('mvc_ocr')
if mvc_ocr_res_str is not None:
mvc_ocr_list = json.loads(mvc_ocr_res_str)
for mvc_ocr in mvc_ocr_list:
vin_no = mvc_ocr.get(consts.PCUSD_MVC.get('vinNo')[0])
if vin_no is None or vin_no != usedcar_info.get('vinNo'):
continue
is_usedcar_find = True
for mvc_field, mvc_tuple in consts.PCUSD_MVC.items():
mvc_res = getattr(cp, mvc_tuple[1])(
usedcar_info.get(mvc_field), mvc_ocr.get(mvc_tuple[0]), **mvc_tuple[2])
usedcar_info[mvc_field + 'Result'] = mvc_res
res_set.add(mvc_res)
dl_find = False
dl_ocr_res_str = ocr_res_dict.get('dl_ocr')
if dl_ocr_res_str is not None:
dl_ocr_list = json.loads(dl_ocr_res_str)
for dl_ocr in dl_ocr_list:
dl_vin_no = dl_ocr.get(consts.PCUSD_DL.get('vinNo')[0])
if dl_vin_no is None or dl_vin_no != usedcar_info.get('vinNo'):
continue
dl_find = True
break
if not dl_find:
res_set.add(consts.RESULT_N)
usedcar_info['vinNo' + 'Result'] = consts.RESULT_N
break
if not is_usedcar_find:
res_set.add(consts.RESULT_N)
for field in consts.PCUSD_MVC.keys():
usedcar_info[field + 'Result'] = consts.RESULT_N
field_compare(usedcar_info, ocr_res_dict, consts.MVC_OCR_FIELD, consts.PCUSD_MVC, res_set)
if usedcar_info[consts.PCUSD_MVC[0][0] + 'Result'] == consts.RESULT_Y:
field_compare(usedcar_info, ocr_res_dict, consts.DL_OCR_FIELD, consts.PCUSD_DL, res_set)
comparison_res['OCR_Input']['usedCarInfo'] = usedcar_info
comparison_res['OCR_Input']['wholeResult'] = consts.RESULT_N if consts.RESULT_N in res_set else consts.RESULT_Y
print(comparison_res)
except Exception as e:
compare_log.error('{0} [compare error] [entity={1}] [id={2}] [uniq_seq={3}] [ocr_res_id={4}] '
'[error={5}]'.format(log_base, application_entity, application_id, uniq_seq, ocr_res_id,
traceback.format_exc()))
else:
compare_log.info('{0} [compare success] [entity={1}] [id={2}] [uniq_seq={3}] [ocr_res_id={4}]'.format(
log_base, application_entity, application_id, uniq_seq, ocr_res_id))
# 将比对结果发送GCAP
# try:
# data = gcap.dict_to_xml(comparison_res)
# except Exception as e:
# compare_log.error('{0} [dict to xml failed] [entity={1}] [id={2}] [uniq_seq={3}] [ocr_res_id={4}] '
# '[error={5}]'.format(log_base, application_entity, application_id, uniq_seq, ocr_res_id,
# traceback.format_exc()))
# else:
# try:
# for times in range(consts.RETRY_TIMES):
# try:
# gcap.send(data)
# except Exception as e:
# gcap_exc = str(e)
# else:
# break
# else:
# raise GCAPException(gcap_exc)
# except Exception as e:
# compare_log.error('{0} [gcap failed] [entity={1}] [id={2}] [uniq_seq={3}] [ocr_res_id={4}] '
# '[error={5}]'.format(log_base, application_entity, application_id, uniq_seq,
# ocr_res_id, traceback.format_exc()))
# else:
# compare_log.info('{0} [task success] [entity={1}] [id={2}] [uniq_seq={3}] [ocr_res_id={4}]'.format(
# log_base, application_entity, application_id, uniq_seq, ocr_res_id))
try:
data = gcap.dict_to_xml(comparison_res)
except Exception as e:
compare_log.error('{0} [dict to xml failed] [entity={1}] [id={2}] [uniq_seq={3}] [ocr_res_id={4}] '
'[error={5}]'.format(log_base, application_entity, application_id, uniq_seq, ocr_res_id,
traceback.format_exc()))
else:
try:
for times in range(consts.RETRY_TIMES):
try:
gcap.send(data)
except Exception as e:
gcap_exc = str(e)
else:
break
else:
raise GCAPException(gcap_exc)
except Exception as e:
compare_log.error('{0} [gcap failed] [entity={1}] [id={2}] [uniq_seq={3}] [ocr_res_id={4}] '
'[error={5}]'.format(log_base, application_entity, application_id, uniq_seq,
ocr_res_id, traceback.format_exc()))
else:
compare_log.info('{0} [task success] [entity={1}] [id={2}] [uniq_seq={3}] [ocr_res_id={4}]'.format(
log_base, application_entity, application_id, uniq_seq, ocr_res_id))
......
......@@ -19,7 +19,13 @@ class Comparison:
self.RESULT_Y = 'Y'
self.RESULT_N = 'N'
self.RESULT_NA = 'NA' # TODO NA情况
self.RESULT_NA = 'NA'
self.TRANS_MAP = {
' ': '',
'·': '',
}
self.TRANS = str.maketrans(self.TRANS_MAP)
def build_res(self, result):
if result:
......@@ -28,9 +34,34 @@ class Comparison:
return self.RESULT_N
def common_compare(self, input_str, ocr_str, **kwargs):
if not isinstance(input_str, str) or isinstance(ocr_str, str):
return self.RESULT_N
return self.build_res(input_str == ocr_str)
def name_compare(self, input_str, ocr_str, **kwargs):
if not isinstance(input_str, str) or isinstance(ocr_str, str):
return self.RESULT_N
if kwargs.get('is_passport'):
input_obj = re.search(r'[a-zA-Z]]!', input_str)
if input_obj:
input_s = input_obj.group()
ocr_obj = re.search(r'[a-zA-Z]]!', ocr_str)
if ocr_obj:
ocr_s = ocr_obj.group()
return self.build_res(input_s == ocr_s)
return self.RESULT_N
else:
return self.build_res((input_str == ocr_str))
else:
if re.search(r'[a-zA-Z]]', input_str):
return self.RESULT_NA
input_s = input_str.translate(self.TRANS)
ocr_s = ocr_str.translate(self.TRANS)
return self.build_res(input_s == ocr_s)
def date_compare(self, input_str, ocr_str, **kwargs):
if not isinstance(input_str, str) or isinstance(ocr_str, str):
return self.RESULT_N
if kwargs.get('long', False) and '长期' in ocr_str:
return '2099-12-31'
if kwargs.get('ocr_split', False):
......@@ -42,10 +73,14 @@ class Comparison:
return self.build_res(input_str == ocr_str)
def rmb_compare(self, input_str, ocr_str, **kwargs):
if not isinstance(input_str, str) or isinstance(ocr_str, str):
return self.RESULT_N
input_rmb_upper = to_rmb_upper(float(input_str))
return self.build_res(input_rmb_upper == ocr_str)
def type_compare(self, input_str, ocr_str, **kwargs):
if not isinstance(input_str, str) or isinstance(ocr_str, str):
return self.RESULT_N
for map_tuple in self.TYPE_MAPPING:
if re.search(map_tuple[0], ocr_str) is not None:
compare_str = map_tuple[1]
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!