fix bug
Showing
1 changed file
with
2 additions
and
22 deletions
... | @@ -21,8 +21,7 @@ class HMHRetriever: | ... | @@ -21,8 +21,7 @@ class HMHRetriever: |
21 | for bbox, text in pdf_text_list.pop(str(0), []): | 21 | for bbox, text in pdf_text_list.pop(str(0), []): |
22 | # print(text) | 22 | # print(text) |
23 | if not is_find_name_id_company: | 23 | if not is_find_name_id_company: |
24 | # name_id_company_list = re.findall(r'借款人\(姓名(.*)证件号码(.*)与(.*公司)', text) | 24 | name_id_company_list = re.findall(r'姓名(.*)证件号码(.*)与(.*公司)', text) |
25 | name_id_company_list = re.findall(r'承租人\(姓名(.*)证件号码(.*)与(.*公司)', text) | ||
26 | for name_id_company_tuple in name_id_company_list: | 25 | for name_id_company_tuple in name_id_company_list: |
27 | if len(name_id_company_tuple) == 3: | 26 | if len(name_id_company_tuple) == 3: |
28 | result[self.search_fields_list[0][0]] = { | 27 | result[self.search_fields_list[0][0]] = { |
... | @@ -30,26 +29,7 @@ class HMHRetriever: | ... | @@ -30,26 +29,7 @@ class HMHRetriever: |
30 | self.position_str: bbox | 29 | self.position_str: bbox |
31 | } | 30 | } |
32 | result[self.search_fields_list[1][0]] = { | 31 | result[self.search_fields_list[1][0]] = { |
33 | self.words_str: name_id_company_tuple[1].replace('\u3000', '').strip(), | 32 | self.words_str: name_id_company_tuple[1].replace('\u3000', '').replace(')', '').replace(')', '').strip(), |
34 | self.position_str: bbox | ||
35 | } | ||
36 | result[self.search_fields_list[2][0]] = { | ||
37 | self.words_str: name_id_company_tuple[2], | ||
38 | self.position_str: bbox | ||
39 | } | ||
40 | is_find_name_id_company = True | ||
41 | break | ||
42 | if not is_find_name_id_company: | ||
43 | name_id_company_list = re.findall(r'借款人\(姓名(.*)证件号码(.*)与(.*公司)', text) | ||
44 | # name_id_company_list = re.findall(r'承租人\(姓名(.*)证件号码(.*)与(.*公司)', text) | ||
45 | for name_id_company_tuple in name_id_company_list: | ||
46 | if len(name_id_company_tuple) == 3: | ||
47 | result[self.search_fields_list[0][0]] = { | ||
48 | self.words_str: name_id_company_tuple[0].replace('\u3000', '').strip(), | ||
49 | self.position_str: bbox | ||
50 | } | ||
51 | result[self.search_fields_list[1][0]] = { | ||
52 | self.words_str: name_id_company_tuple[1].replace('\u3000', '').strip(), | ||
53 | self.position_str: bbox | 33 | self.position_str: bbox |
54 | } | 34 | } |
55 | result[self.search_fields_list[2][0]] = { | 35 | result[self.search_fields_list[2][0]] = { | ... | ... |
-
Please register or sign in to post a comment