import re from datetime import datetime from .rmb_upper import to_rmb_upper class Comparison: def __init__(self): self.CSIBM = 'CSIBM' self.CSSME = 'CSSME' self.CSOTH = 'CSOTH' self.TYPE_MAPPING = ( (r'个体工商户', self.CSIBM), (r'有限责任公司', self.CSSME), (r'个人独资企业', self.CSSME), (r'有限合伙企业', self.CSSME), (r'股份合作制', self.CSSME), ) self.RESULT_Y = 'Y' self.RESULT_N = 'N' self.RESULT_NA = 'NA' self.TRANS_MAP = { ' ': '', '·': '', } self.TRANS = str.maketrans(self.TRANS_MAP) def build_res(self, result): if result: return self.RESULT_Y else: return self.RESULT_N def common_compare(self, input_str, ocr_str, idx, **kwargs): if not isinstance(input_str, str) or not isinstance(ocr_str, str): return self.RESULT_N return self.build_res(input_str == ocr_str), ocr_str def name_compare(self, input_str, ocr_str, idx, **kwargs): if not isinstance(input_str, str) or not isinstance(ocr_str, str): return self.RESULT_N, ocr_str if kwargs.get('is_passport'): input_obj = re.search(r'[a-zA-Z]]!', input_str) if input_obj: input_s = input_obj.group() ocr_obj = re.search(r'[a-zA-Z]]!', ocr_str) if ocr_obj: ocr_s = ocr_obj.group() return self.build_res(input_s == ocr_s), ocr_str return self.RESULT_N, ocr_str else: return self.build_res((input_str == ocr_str)), ocr_str else: if re.search(r'[a-zA-Z]]', input_str): return self.RESULT_NA, ocr_str input_s = input_str.translate(self.TRANS) ocr_s = ocr_str.translate(self.TRANS) return self.build_res(input_s == ocr_s), ocr_str def date_compare(self, input_str, ocr_str, idx, **kwargs): if not isinstance(input_str, str) or not isinstance(ocr_str, str): return self.RESULT_N, ocr_str if kwargs.get('long', False) and '长期' in ocr_str: return self.RESULT_Y, '2099-12-31' if kwargs.get('ocr_split', False): ocr_str = ocr_str.split('-')[-1] if kwargs.get('ocr_replace', False): ocr_str = ocr_str.replace('年', '-').replace('月', '-').replace('日', '') if kwargs.get('input_replace') is not None: input_str = input_str.replace('-', kwargs.get('input_replace')) try: ocr_output = datetime.strptime(ocr_str, '%Y{0}%m{0}%d'.format( kwargs.get('input_replace'))).strftime('%Y-%m-%d') except Exception as e: ocr_output = None else: ocr_output = ocr_str return self.build_res(input_str == ocr_str), ocr_output def rmb_compare(self, input_str, ocr_str, idx, **kwargs): if not isinstance(input_str, str) or not isinstance(ocr_str, str): return self.RESULT_N, None input_rmb_upper = to_rmb_upper(float(input_str)) res = self.build_res(input_rmb_upper == ocr_str) if res == self.RESULT_Y: return res, input_str else: return res, None def type_compare(self, input_str, ocr_str, idx, **kwargs): if not isinstance(input_str, str) or not isinstance(ocr_str, str): return self.RESULT_N, ocr_str for map_tuple in self.TYPE_MAPPING: if re.search(map_tuple[0], ocr_str) is not None: compare_str = map_tuple[1] break else: compare_str = self.CSOTH return self.build_res(input_str == compare_str), compare_str cp = Comparison()