fix new hil contract

周伟奇
Showing 1 changed file with 104 additions and 29 deletions
src/common/electronic_hil_contract/get_char_fsm.py
--- a/src/common/electronic_hil_contract/get_char_fsm.py
View file @f2cd1c7
+++ b/src/common/electronic_hil_contract/get_char_fsm.py
View file @f2cd1c7
@@ -753,71 +753,146 @@ class Finder:
                    if re.match('保证人3', text) is not None:
                        anchor = [bbox[0], bbox[1]]
+        need_bbox_find_keys_bbox = [None, None, None]
        if anchor is not None:
            for block in self.pdf_info[page_num]['blocks']:
+                if all(need_bbox_find_keys_bbox):
+                    break
                if block['type'] != 0:
                    continue
                for line in block['lines']:
+                    if all(need_bbox_find_keys_bbox):
+                        break
                    for span in line['spans']:
+                        if all(need_bbox_find_keys_bbox):
+                            break
                        bbox, text = span['bbox'], span['text']
                        # 找到角色姓名
                        if re.match(role_key, text) is not None:
                            words = text.split('：')[-1]
-                            name['words'] = words
+                            if len(words) == 0:
-                            name['page'] = page_num
+                                need_bbox_find_keys_bbox[0] = bbox
-                            name['position'] = bbox
+                            else:
+                                name['words'] = words
+                                name['page'] = page_num
+                                name['position'] = bbox
+                            continue
                        if role_key == '承租人：':
                            # 找到证件号码且确定位置
                            if re.match('证件号码：', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
                                words = text.split('：')[-1]
-                                id_num['words'] = words
+                                if len(words) == 0:
-                                id_num['page'] = page_num
+                                    need_bbox_find_keys_bbox[1] = bbox
-                                id_num['position'] = bbox
+                                else:
+                                    id_num['words'] = words
+                                    id_num['page'] = page_num
+                                    id_num['position'] = bbox
                            # 找到法人代表且确定位置
                            if re.match('法定代表人或授权代表：', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
                                words = text.split('：')[-1]
-                                representative['words'] = words
+                                if len(words) == 0:
-                                representative['page'] = page_num
+                                    need_bbox_find_keys_bbox[2] = bbox
-                                representative['position'] = bbox
+                                else:
-                        if role_key == '保证人1：':
+                                    representative['words'] = words
+                                    representative['page'] = page_num
+                                    representative['position'] = bbox
+                        elif role_key == '保证人1：':
                            # 找到证件号码且确定位置
                            if re.match('证件号码：', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
                                words = text.split('：')[-1]
-                                id_num['words'] = words
+                                if len(words) == 0:
-                                id_num['page'] = page_num
+                                    need_bbox_find_keys_bbox[1] = bbox
-                                id_num['position'] = bbox
+                                else:
+                                    id_num['words'] = words
+                                    id_num['page'] = page_num
+                                    id_num['position'] = bbox
                            # 找到法人代表且确定位置
                            if re.match('法定代表人或授权代表：', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
                                words = text.split('：')[-1]
-                                representative['words'] = words
+                                if len(words) == 0:
-                                representative['page'] = page_num
+                                    need_bbox_find_keys_bbox[2] = bbox
-                                representative['position'] = bbox
+                                else:
-                        if role_key == '保证人2：':
+                                    representative['words'] = words
+                                    representative['page'] = page_num
+                                    representative['position'] = bbox
+                        elif role_key == '保证人2：':
                            # 找到证件号码且确定位置
                            if re.match('证件号码：', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
                                words = text.split('：')[-1]
-                                id_num['words'] = words
+                                if len(words) == 0:
-                                id_num['page'] = page_num
+                                    need_bbox_find_keys_bbox[1] = bbox
-                                id_num['position'] = bbox
+                                else:
+                                    id_num['words'] = words
+                                    id_num['page'] = page_num
+                                    id_num['position'] = bbox
                            # 找到法人代表且确定位置
                            if re.match('法定代表人或授权代表：', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
                                words = text.split('：')[-1]
-                                representative['words'] = words
+                                if len(words) == 0:
-                                representative['page'] = page_num
+                                    need_bbox_find_keys_bbox[2] = bbox
-                                representative['position'] = bbox
+                                else:
-                        if role_key == '保证人3：':
+                                    representative['words'] = words
+                                    representative['page'] = page_num
+                                    representative['position'] = bbox
+                        elif role_key == '保证人3：':
                            # 找到证件号码且确定位置
                            if re.match('证件号码：', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
                                words = text.split('：')[-1]
-                                id_num['words'] = words
+                                if len(words) == 0:
-                                id_num['page'] = page_num
+                                    need_bbox_find_keys_bbox[1] = bbox
-                                id_num['position'] = bbox
+                                else:
+                                    id_num['words'] = words
+                                    id_num['page'] = page_num
+                                    id_num['position'] = bbox
                            # 找到法人代表且确定位置
                            if re.match('法定代表人或授权代表：', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
                                words = text.split('：')[-1]
-                                representative['words'] = words
+                                if len(words) == 0:
+                                    need_bbox_find_keys_bbox[2] = bbox
+                                else:
+                                    representative['words'] = words
+                                    representative['page'] = page_num
+                                    representative['position'] = bbox
+        for idx, bbox in enumerate(need_bbox_find_keys_bbox):
+            if bbox is None:
+                continue
+            is_find = False
+            if idx == 1:
+                width_rate = 3
+            else:
+                width_rate = 1
+            minx = bbox[2]
+            maxx = bbox[2] + (width_rate * (bbox[2]-bbox[0]))
+            miny = bbox[1]
+            maxy = bbox[3]
+            for block in self.pdf_info[page_num]['blocks']:
+                if block['type'] != 0:
+                    continue
+                if is_find:
+                    break
+                for line in block['lines']:
+                    if is_find:
+                        break
+                    for span in line['spans']:
+                        if is_find:
+                            break
+                        value_bbox, text = span['bbox'], span['text']
+                        if minx < np.mean(value_bbox[::2]) < maxx and miny < np.mean(value_bbox[1::2]) < maxy:
+                            if idx == 0:
+                                name['words'] = text
+                                name['page'] = page_num
+                                name['position'] = value_bbox
+                            elif idx == 1:
+                                id_num['words'] = text
+                                id_num['page'] = page_num
+                                id_num['position'] = value_bbox 
+                            elif idx == 2:
+                                representative['words'] = text
                                representative['page'] = page_num
-                                representative['position'] = bbox
+                                representative['position'] = value_bbox
+                            is_find = True
+                            break
        return name, id_num, representative
    def get_table_add_product(self):