fix_pred.py 1.42 KB
import re

class FixText:

    @staticmethod
    def do_nothing(pred_text_src):
        return pred_text_src

    @staticmethod
    def only_date(pred_text_src):
        re_se = re.search(r'20.*', pred_text_src)
        if re_se:
            return re_se.group()
        else:
            return pred_text_src
    
    @staticmethod
    def only_digit(pred_text_src):
        re_se = re.search(r'\d+', pred_text_src)
        if re_se:
            return re_se.group()
        else:
            return pred_text_src
    
    @staticmethod
    def remove_start(pred_text_src, start_char='电话'):
        if pred_text_src.startswith(start_char):
            return pred_text_src.replace(start_char, '')
        else:
            return pred_text_src
    
    @staticmethod
    def only_digit_alpha(pred_text_src):
        re_se = re.search(r'\w+', pred_text_src)
        if re_se:
            return re_se.group()
        else:
            return pred_text_src
    
    @staticmethod
    def remove_bank(pred_text_src):
        re_se = re.search(r'户银行(.*)', pred_text_src)
        if re_se:
            return re_se.group(1) 
        else:
            return pred_text_src
    
    @staticmethod
    def only_amount(pred_text_src):
        re_se = re.search(r'\d+[-,\.]\d+', pred_text_src)
        if re_se:
            return re_se.group().replace('-', '.').replace(',', '.')
        else:
            return pred_text_src

fix_text_obj = FixText()