comparison.py 3.84 KB
import re
from datetime import datetime
from .rmb_upper import to_rmb_upper


class Comparison:

    def __init__(self):
        self.CSIBM = 'CSIBM'
        self.CSSME = 'CSSME'
        self.CSOTH = 'CSOTH'

        self.TYPE_MAPPING = (
            (r'个体工商户', self.CSIBM),
            (r'有限责任公司', self.CSSME),
            (r'个人独资企业', self.CSSME),
            (r'有限合伙企业', self.CSSME),
            (r'股份合作制', self.CSSME),
        )

        self.RESULT_Y = 'Y'
        self.RESULT_N = 'N'
        self.RESULT_NA = 'NA'

        self.TRANS_MAP = {
            ' ': '',
            '·': '',
        }
        self.TRANS = str.maketrans(self.TRANS_MAP)

    def build_res(self, result):
        if result:
            return self.RESULT_Y
        else:
            return self.RESULT_N

    def common_compare(self, input_str, ocr_str, idx, **kwargs):
        if not isinstance(input_str, str) or not isinstance(ocr_str, str):
            return self.RESULT_N, ocr_str
        return self.build_res(input_str == ocr_str), ocr_str

    def name_compare(self, input_str, ocr_str, idx, **kwargs):
        if not isinstance(input_str, str) or not isinstance(ocr_str, str):
            return self.RESULT_N, ocr_str
        if kwargs.get('is_passport'):
            input_obj = re.search(r'[a-zA-Z]]!', input_str)
            if input_obj:
                input_s = input_obj.group()
                ocr_obj = re.search(r'[a-zA-Z]]!', ocr_str)
                if ocr_obj:
                    ocr_s = ocr_obj.group()
                    return self.build_res(input_s == ocr_s), ocr_str
                return self.RESULT_N, ocr_str
            else:
                return self.build_res((input_str == ocr_str)), ocr_str
        else:
            if re.search(r'[a-zA-Z]]', input_str):
                return self.RESULT_NA, ocr_str
            input_s = input_str.translate(self.TRANS)
            ocr_s = ocr_str.translate(self.TRANS)
            return self.build_res(input_s == ocr_s), ocr_str

    def date_compare(self, input_str, ocr_str, idx, **kwargs):
        if not isinstance(input_str, str) or not isinstance(ocr_str, str):
            return self.RESULT_N, ocr_str
        if kwargs.get('long', False) and '长期' in ocr_str:
            return self.RESULT_Y, '2099-12-31'
        if kwargs.get('ocr_split', False):
            ocr_str = ocr_str.split('-')[-1]
        if kwargs.get('ocr_replace', False):
            ocr_str = ocr_str.replace('年', '-').replace('月', '-').replace('日', '')
        if kwargs.get('input_replace') is not None:
            input_str = input_str.replace('-', kwargs.get('input_replace'))
            try:
                ocr_output = datetime.strptime(ocr_str, '%Y{0}%m{0}%d'.format(
                    kwargs.get('input_replace'))).strftime('%Y-%m-%d')
            except Exception as e:
                ocr_output = None
        else:
            ocr_output = ocr_str
        return self.build_res(input_str == ocr_str), ocr_output

    def rmb_compare(self, input_str, ocr_str, idx, **kwargs):
        if not isinstance(input_str, str) or not isinstance(ocr_str, str):
            return self.RESULT_N, None
        input_rmb_upper = to_rmb_upper(float(input_str))
        res = self.build_res(input_rmb_upper == ocr_str)
        if res == self.RESULT_Y:
            return res, input_str
        else:
            return res, None

    def type_compare(self, input_str, ocr_str, idx, **kwargs):
        if not isinstance(input_str, str) or not isinstance(ocr_str, str):
            return self.RESULT_N, ocr_str
        for map_tuple in self.TYPE_MAPPING:
            if re.search(map_tuple[0], ocr_str) is not None:
                compare_str = map_tuple[1]
                break
        else:
            compare_str = self.CSOTH

        return self.build_res(input_str == compare_str), compare_str


cp = Comparison()