a473474e by 周伟奇

fix registeredCapital

1 parent ad575f50
import re
from datetime import datetime
from .rmb_upper import to_rmb_upper
from .rmb_lower import rmb_handler
class Comparison:
......@@ -36,26 +36,26 @@ class Comparison:
return self.RESULT_N
def common_compare(self, input_str, ocr_str, idx, **kwargs):
if ocr_str == '':
return self.RESULT_NA, None
if not isinstance(input_str, str) or not isinstance(ocr_str, str):
if not isinstance(ocr_str, str) or not isinstance(input_str, str):
return self.RESULT_NA, ocr_str
if ocr_str == '' or ocr_str.strip() == '':
return self.RESULT_NA, None
return self.build_res(input_str == ocr_str), ocr_str
def company_compare(self, input_str, ocr_str, idx, **kwargs):
if ocr_str == '':
return self.RESULT_NA, None
if not isinstance(input_str, str) or not isinstance(ocr_str, str):
if not isinstance(ocr_str, str) or not isinstance(input_str, str):
return self.RESULT_NA, ocr_str
if ocr_str == '' or ocr_str.strip() == '':
return self.RESULT_NA, None
input_tmp = re.sub(self.re_obj, '', input_str).strip()
ocr_tmp = re.sub(self.re_obj, '', ocr_str).strip()
return self.build_res(input_tmp == ocr_tmp), ocr_str
def name_compare(self, input_str, ocr_str, idx, **kwargs):
if ocr_str == '':
return self.RESULT_NA, None
if not isinstance(input_str, str) or not isinstance(ocr_str, str):
if not isinstance(ocr_str, str) or not isinstance(input_str, str):
return self.RESULT_NA, ocr_str
if ocr_str == '' or ocr_str.strip() == '':
return self.RESULT_NA, None
if kwargs.get('is_passport'):
input_tmp = input_str.upper().replace(' ', '')
ocr_tmp = ocr_str.upper().replace(' ', '')
......@@ -71,15 +71,16 @@ class Comparison:
return self.build_res(input_s == ocr_s), ocr_str
def date_compare(self, input_str, ocr_str, idx, **kwargs):
if ocr_str == '':
return self.RESULT_NA, None
if not isinstance(input_str, str) or not isinstance(ocr_str, str):
if not isinstance(ocr_str, str) or not isinstance(input_str, str):
return self.RESULT_NA, ocr_str
if kwargs.get('long', False) and '长期' in ocr_str:
if input_str == '2099-12-31' or input_str == '2099-01-01':
return self.RESULT_Y, '2099-12-31'
else:
return self.RESULT_N, '2099-12-31'
if ocr_str == '' or ocr_str.strip() == '':
return self.RESULT_NA, None
if kwargs.get('long', False):
if '长期' in ocr_str or '永久' in ocr_str:
if input_str == '2099-12-31' or input_str == '2099-01-01':
return self.RESULT_Y, '2099-12-31'
else:
return self.RESULT_N, '2099-12-31'
if kwargs.get('ocr_split', False):
if '至' in ocr_str:
ocr_str = ocr_str.split('至')[-1]
......@@ -102,26 +103,26 @@ class Comparison:
return self.build_res(input_str == ocr_str), ocr_output
def rmb_compare(self, input_str, ocr_str, idx, **kwargs):
if not isinstance(input_str, str) or not isinstance(ocr_str, str):
if not isinstance(ocr_str, str) or not isinstance(input_str, str):
return self.RESULT_NA, None
if ocr_str == '':
if ocr_str == '' or ocr_str.strip() == '':
return self.RESULT_NA, None
try:
input_rmb_upper = to_rmb_upper(float(input_str))
res = self.build_res(input_rmb_upper == ocr_str)
ocr_lower = rmb_handler.to_rmb_lower()
res = self.build_res(float(input_str) == ocr_lower)
except Exception as e:
return self.RESULT_N, None
else:
if res == self.RESULT_Y:
return res, input_str
else:
return res, None
return res, ocr_lower
def type_compare(self, input_str, ocr_str, idx, **kwargs):
if ocr_str == '':
return self.RESULT_NA, None
if not isinstance(input_str, str) or not isinstance(ocr_str, str):
if not isinstance(ocr_str, str) or not isinstance(input_str, str):
return self.RESULT_NA, ocr_str
if ocr_str == '' or ocr_str.strip() == '':
return self.RESULT_NA, None
for map_tuple in self.TYPE_MAPPING:
if re.search(map_tuple[0], ocr_str) is not None:
compare_str = map_tuple[1]
......
import re
class RMBHandler:
def __init__(self):
self.num_mapping = {
'零': 0,
'壹': 1,
'贰': 2,
'叁': 3,
'肆': 4,
'伍': 5,
'陆': 6,
'柒': 7,
'捌': 8,
'玖': 9
}
self.unit_mapping = {
'厘': (0, 0.001),
'分': (1, 0.01),
'角': (2, 0.1),
'圆': (3, 1),
'拾': (4, 10),
'佰': (5, 100),
'仟': (6, 1000),
'万': (7, 10000),
'亿': (8, 100000000)
}
def upper_to_lower(self, price):
result = 0
last_unit_idx = 0
num = 0
for idx, c in enumerate(price):
if c in self.num_mapping:
num = self.num_mapping.get(c)
else:
if idx == 0:
num = 1
unit_idx, unit = self.unit_mapping.get(c, (0, 0))
if unit_idx > last_unit_idx:
result = (result + num) * unit
else:
result = result + (num * unit)
last_unit_idx = unit_idx
num = 0
return result
def pre_process(self, price, upper=True):
if upper:
for idx, c in enumerate(price):
if c in self.num_mapping or c in self.unit_mapping:
head = idx
break
else:
return None
for idx in range(len(price)-1, -1, -1):
if price[idx] in self.num_mapping or price[idx] in self.unit_mapping:
tail = idx + 1
break
else:
return None
return price[head: tail]
else:
pass
def to_rmb_lower(self, price):
try:
if re.search(r'[\d]', price) is None:
price = self.pre_process(price)
if not price:
return None
result = self.upper_to_lower(price)
if result is not None:
result = float(round(result, 3))
return result
else:
re_obj = re.search(r'(\d+\.?\d*)([万亿]?)', price)
digit = float(re_obj.group(1))
unit = re_obj.group(2)
if unit in self.unit_mapping:
digit = digit * self.unit_mapping[unit][1]
return digit
except Exception as e:
return None
rmb_handler = RMBHandler()
if __name__ == '__main__':
test_2 = ['壹万伍仟肆佰壹拾圆叁角伍分肆厘', '捌万陆仟肆佰壹拾圆整', '壹万伍仟肆佰壹拾元贰角捌分肆厘', '拾壹亿壹仟万伍仟肆佰壹拾元贰角捌分肆厘', '拾伍万圆']
test_1 = ['sfdds', '柒佰玖拾万元整', '100万元整', '人民币伍佰万圆整', '人民币壹仟万元', '100万元', '贰佰壹拾捌万圆整', '(人民币)壹仟万元', '壹佰壹拾万圆整', '人民币30.0000万元整', '伍拾万元人民币']
input_list = test_1
for i in input_list:
print('{0}={1}'.format(i, rmb_handler.to_rmb_lower(i)))
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!