1122a082 by 周伟奇

fix bug

1 parent 784ff18a
......@@ -21,8 +21,7 @@ class HMHRetriever:
for bbox, text in pdf_text_list.pop(str(0), []):
# print(text)
if not is_find_name_id_company:
# name_id_company_list = re.findall(r'借款人\(姓名(.*)证件号码(.*)与(.*公司)', text)
name_id_company_list = re.findall(r'承租人\(姓名(.*)证件号码(.*)与(.*公司)', text)
name_id_company_list = re.findall(r'姓名(.*)证件号码(.*)与(.*公司)', text)
for name_id_company_tuple in name_id_company_list:
if len(name_id_company_tuple) == 3:
result[self.search_fields_list[0][0]] = {
......@@ -30,26 +29,7 @@ class HMHRetriever:
self.position_str: bbox
}
result[self.search_fields_list[1][0]] = {
self.words_str: name_id_company_tuple[1].replace('\u3000', '').strip(),
self.position_str: bbox
}
result[self.search_fields_list[2][0]] = {
self.words_str: name_id_company_tuple[2],
self.position_str: bbox
}
is_find_name_id_company = True
break
if not is_find_name_id_company:
name_id_company_list = re.findall(r'借款人\(姓名(.*)证件号码(.*)与(.*公司)', text)
# name_id_company_list = re.findall(r'承租人\(姓名(.*)证件号码(.*)与(.*公司)', text)
for name_id_company_tuple in name_id_company_list:
if len(name_id_company_tuple) == 3:
result[self.search_fields_list[0][0]] = {
self.words_str: name_id_company_tuple[0].replace('\u3000', '').strip(),
self.position_str: bbox
}
result[self.search_fields_list[1][0]] = {
self.words_str: name_id_company_tuple[1].replace('\u3000', '').strip(),
self.words_str: name_id_company_tuple[1].replace('\u3000', '').replace(')', '').replace(')', '').strip(),
self.position_str: bbox
}
result[self.search_fields_list[2][0]] = {
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!