fix_pred.py
1.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import re
class FixText:
@staticmethod
def do_nothing(pred_text_src):
return pred_text_src
@staticmethod
def only_date(pred_text_src):
re_se = re.search(r'20.*', pred_text_src)
if re_se:
return re_se.group()
else:
return pred_text_src
@staticmethod
def only_digit(pred_text_src):
re_se = re.search(r'\d+', pred_text_src)
if re_se:
return re_se.group()
else:
return pred_text_src
@staticmethod
def remove_start(pred_text_src, start_char='电话'):
if pred_text_src.startswith(start_char):
return pred_text_src.replace(start_char, '')
else:
return pred_text_src
@staticmethod
def only_digit_alpha(pred_text_src):
re_se = re.search(r'\w+', pred_text_src)
if re_se:
return re_se.group()
else:
return pred_text_src
@staticmethod
def remove_bank(pred_text_src):
re_se = re.search(r'户银行(.*)', pred_text_src)
if re_se:
return re_se.group(1)
else:
return pred_text_src
@staticmethod
def only_amount(pred_text_src):
re_se = re.search(r'\d+[-,\.]\d+', pred_text_src)
if re_se:
return re_se.group().replace('-', '.').replace(',', '.')
else:
return pred_text_src
fix_text_obj = FixText()