1122a082 by 周伟奇

fix bug

1 parent 784ff18a
...@@ -21,8 +21,7 @@ class HMHRetriever: ...@@ -21,8 +21,7 @@ class HMHRetriever:
21 for bbox, text in pdf_text_list.pop(str(0), []): 21 for bbox, text in pdf_text_list.pop(str(0), []):
22 # print(text) 22 # print(text)
23 if not is_find_name_id_company: 23 if not is_find_name_id_company:
24 # name_id_company_list = re.findall(r'借款人\(姓名(.*)证件号码(.*)与(.*公司)', text) 24 name_id_company_list = re.findall(r'姓名(.*)证件号码(.*)与(.*公司)', text)
25 name_id_company_list = re.findall(r'承租人\(姓名(.*)证件号码(.*)与(.*公司)', text)
26 for name_id_company_tuple in name_id_company_list: 25 for name_id_company_tuple in name_id_company_list:
27 if len(name_id_company_tuple) == 3: 26 if len(name_id_company_tuple) == 3:
28 result[self.search_fields_list[0][0]] = { 27 result[self.search_fields_list[0][0]] = {
...@@ -30,26 +29,7 @@ class HMHRetriever: ...@@ -30,26 +29,7 @@ class HMHRetriever:
30 self.position_str: bbox 29 self.position_str: bbox
31 } 30 }
32 result[self.search_fields_list[1][0]] = { 31 result[self.search_fields_list[1][0]] = {
33 self.words_str: name_id_company_tuple[1].replace('\u3000', '').strip(), 32 self.words_str: name_id_company_tuple[1].replace('\u3000', '').replace(')', '').replace(')', '').strip(),
34 self.position_str: bbox
35 }
36 result[self.search_fields_list[2][0]] = {
37 self.words_str: name_id_company_tuple[2],
38 self.position_str: bbox
39 }
40 is_find_name_id_company = True
41 break
42 if not is_find_name_id_company:
43 name_id_company_list = re.findall(r'借款人\(姓名(.*)证件号码(.*)与(.*公司)', text)
44 # name_id_company_list = re.findall(r'承租人\(姓名(.*)证件号码(.*)与(.*公司)', text)
45 for name_id_company_tuple in name_id_company_list:
46 if len(name_id_company_tuple) == 3:
47 result[self.search_fields_list[0][0]] = {
48 self.words_str: name_id_company_tuple[0].replace('\u3000', '').strip(),
49 self.position_str: bbox
50 }
51 result[self.search_fields_list[1][0]] = {
52 self.words_str: name_id_company_tuple[1].replace('\u3000', '').strip(),
53 self.position_str: bbox 33 self.position_str: bbox
54 } 34 }
55 result[self.search_fields_list[2][0]] = { 35 result[self.search_fields_list[2][0]] = {
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!