fix HIL contract

周伟奇
Showing 2 changed files with 199 additions and 76 deletions
src/common/electronic_hil_contract/get_char.py
src/common/electronic_hil_contract/hil_contract_ocr.py
--- a/src/common/electronic_hil_contract/get_char.py
View file @d3de42e
+++ b/src/common/electronic_hil_contract/get_char.py
View file @d3de42e
@@ -6,14 +6,11 @@
 # @Description   : 
 import re
-import cv2
-import base64
 import numpy as np
 from fuzzywuzzy import fuzz
 class Finder:
    def __init__(self, pdf_info):
        self.pdf_info = pdf_info
        self.item = {"words": None,
@@ -25,6 +22,9 @@ class Finder:
                            "承租人-姓名": self.item,
                            "承租人-证件号码": self.item,
                            "承租人-法定代表人或授权代表": self.item,
+                            "共同承租人-姓名": self.item,
+                            "共同承租人-证件号码": self.item,
+                            "共同承租人-法定代表人或授权代表": self.item,
                            "保证人1-姓名": self.item,
                            "保证人1-证件号码": self.item,
                            "保证人1-法定代表人或授权代表": self.item,
@@ -47,6 +47,8 @@ class Finder:
                            "银行账户-开户行": self.item,
                            "签字页-承租人姓名": self.item,
                            "签字页-承租人签章": self.item,
+                            "签字页-共同承租人姓名": self.item,
+                            "签字页-共同承租人签章": self.item,
                            "签字页-保证人1姓名": self.item,
                            "签字页-保证人1签章": self.item,
                            "签字页-保证人2姓名": self.item,
@@ -54,7 +56,6 @@ class Finder:
                            "签字页-保证人3姓名": self.item,
                            "签字页-保证人3签章": self.item,
                            }
        # 格式化输出 车辆处置协议 要是别的字段
        self.init_result_1 = {"合同编号": self.item,
                              "承租人-姓名": self.item,
@@ -66,9 +67,7 @@ class Finder:
                              "签字页-承租人签章": self.item,
                              "签字页-销售经销商": self.item,
                              "签字页-销售经销商签章": self.item,
                              }
        # 格式化输出 车辆租赁抵押合同
        self.init_result_2 = {"合同编号": self.item,
                              "合同编号（正文）": self.item,
@@ -150,23 +149,24 @@ class Finder:
                words = matchObj.group(1)
                contract_no['position'] = None
                contract_no['page'] = pno
-                contract_no['words'] = words
+                # contract_no['words'] = words
+                contract_no['words'] = re.sub("\s", "", words).replace("）", "")
                return contract_no
            matchObj = re.search(r'编号为(.*?)的', all_text)
            if matchObj:
                words = matchObj.group(1).strip()
                contract_no['position'] = None
                contract_no['page'] = pno
-                contract_no['words'] = words
+                # contract_no['words'] = words
+                contract_no['words'] = re.sub("\s", "", words).replace("）", "")
                return contract_no
            matchObj = re.search(r'编号为(.*?)）的', all_text)
            if matchObj:
                words = matchObj.group(1).strip()
                contract_no['position'] = None
                contract_no['page'] = pno
-                contract_no['words'] = words
+                # contract_no['words'] = words
+                contract_no['words'] = re.sub("\s", "", words)
        return contract_no
    def get_key_value(self, key, page_num=None):
@@ -180,10 +180,11 @@ class Finder:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
                        if key in text:
-                            words = text.split('：')[-1]
+                            words = text.split('：')[-1].replace("。", "")
                            value['position'] = bbox
                            value['page'] = pno
-                            value['words'] = words
+                            # value['words'] = words
+                            value['words'] = re.sub("\s", "", words)
        else:
            for pno in self.pdf_info:
                for block in self.pdf_info[pno]['blocks']:
@@ -194,10 +195,11 @@ class Finder:
                            bbox, text = span['bbox'], span['text']
                            if key in text:
                                # print(self.pdf_info[pno])
-                                words = text.split('：')[-1]
+                                words = text.split('：')[-1].replace("。", "")
                                value['position'] = bbox
                                value['page'] = pno
-                                value['words'] = words
+                                # value['words'] = words
+                                value['words'] = re.sub("\s", "", words)
        return value
    def get_loan_principal(self, page_num='0'):
@@ -267,7 +269,6 @@ class Finder:
    def get_asp_details(self, page_num):
        asp_details_table_term = self.item.copy()
        asp_details_table = []
        asp_details_text_list = []
        table = False
@@ -283,25 +284,20 @@ class Finder:
                        table = False
                    if table == True:
                        asp_details_text_list.append(text)
+        for i in range((len(asp_details_text_list) + 2) // 3):
-        for i in range((len(asp_details_text_list)+2)//3):
            line = []
            if i == 0:
                line = [asp_details_text_list[0]]
            else:
                for j in range(3):
-                    line.append(asp_details_text_list[i*3-2+j])
+                    line.append(asp_details_text_list[i * 3 - 2 + j])
            asp_details_table.append(line)
        if len(asp_details_table) > 0:
            asp_details_table_term['words'] = asp_details_table
        return asp_details_table_term
    def get_signature(self):
        signature = self.item.copy()
        for block in self.pdf_info['0']['blocks']:
            if block['type'] != 0:
                continue
@@ -369,8 +365,8 @@ class Finder:
                for line in block['lines']:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
-                        if anchor_bbox[2]<np.mean(bbox[::2])<half_width and \
+                        if anchor_bbox[2] < np.mean(bbox[::2]) < half_width and \
-                            anchor_bbox[1]<np.mean(bbox[1::2])<anchor_bbox[3]:
+                                anchor_bbox[1] < np.mean(bbox[1::2]) < anchor_bbox[3]:
                            seller['position'] = bbox
                            seller['words'] = text
        return seller
@@ -430,7 +426,6 @@ class Finder:
    def get_repayment_schedule(self):
        repayment_schedule = self.item.copy()
        repayment_schedule_text_list = []
        table = False
        page = None
@@ -444,20 +439,25 @@ class Finder:
                        if '以上表格中所列序号' in text:
                            table = False
                        if table == True:
+                            # 过滤汉字
+                            if re.compile(r'[\u4e00-\u9fff]').search(text):
+                                continue
+                            # 过滤 1. - 61. 这些标题
+                            if re.findall("\d+", text):
+                                if len(re.findall("\d+", text)) == 1:
+                                    continue
                            repayment_schedule_text_list.append(text)
                        if '61.' in text:
                            page = pno
                            table = True
+        # print("repayment_schedule_text_list = ", repayment_schedule_text_list)
        repayment_schedule_table = [['序号', '融资租赁成本', '融资租赁费用', '租金', '剩余融资租赁成本']]
-        for i in range(len(repayment_schedule_text_list)//4):
+        for i in range(len(repayment_schedule_text_list) // 4):
-            line = [f'{i+1}.']
+            line = [f'{i + 1}.']
            # 4表示4列的意思
            for j in range(4):
-                line.append(repayment_schedule_text_list[i*4+j])
+                line.append(repayment_schedule_text_list[i * 4 + j])
            repayment_schedule_table.append(line)
        repayment_schedule['words'] = repayment_schedule_table
        repayment_schedule['page'] = page
        return repayment_schedule
@@ -506,7 +506,7 @@ class Finder:
        else:
            words = '无'
        boxes = np.array(boxes).reshape((-1, 2))
-        position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])]
+        position = [min(boxes[:, 0]), min(boxes[:, 1]), max(boxes[:, 0]), max(boxes[:, 1])]
        signature_role_2['page_num'] = page_num
        signature_role_2['position'] = position
        signature_role_2['words'] = words
@@ -541,7 +541,7 @@ class Finder:
        else:
            words = '无'
        boxes = np.array(boxes).reshape((-1, 2))
-        position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])]
+        position = [min(boxes[:, 0]), min(boxes[:, 1]), max(boxes[:, 0]), max(boxes[:, 1])]
        signature_role_3['page_num'] = page_num
        signature_role_3['position'] = position
        signature_role_3['words'] = words
@@ -576,7 +576,7 @@ class Finder:
        else:
            words = '无'
        boxes = np.array(boxes).reshape((-1, 2))
-        position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])]
+        position = [min(boxes[:, 0]), min(boxes[:, 1]), max(boxes[:, 0]), max(boxes[:, 1])]
        signature_role_4['page_num'] = page_num
        signature_role_4['position'] = position
        signature_role_4['words'] = words
@@ -612,7 +612,7 @@ class Finder:
        else:
            words = '无'
        boxes = np.array(boxes).reshape((-1, 2))
-        position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])]
+        position = [min(boxes[:, 0]), min(boxes[:, 1]), max(boxes[:, 0]), max(boxes[:, 1])]
        signature_role_5['page_num'] = page_num
        signature_role_5['position'] = position
        signature_role_5['words'] = words
@@ -640,7 +640,7 @@ class Finder:
                for line in block['lines']:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
-                        if '签署日期' in text and int(anchor_top)<np.mean(bbox[1::2])<int(anchor_bottom):
+                        if '签署日期' in text and int(anchor_top) < np.mean(bbox[1::2]) < int(anchor_bottom):
                            name = text.split(' ')[0]
                            date = text.split(':')[-1]
                            signature_name['words'] = name
@@ -663,7 +663,7 @@ class Finder:
                        if top in text:
                            anchor_top = bbox[1]
                        if bottom in text:
-                            anchor_bottom = bbox[1]
+                            anchor_bottom = bbox[3]
        if anchor_top is not None and anchor_bottom is not None:
            for pno in self.pdf_info:
                for block in self.pdf_info[pno]['blocks']:
@@ -672,7 +672,9 @@ class Finder:
                    for line in block['lines']:
                        for span in line['spans']:
                            bbox, text = span['bbox'], span['text']
-                            if '签署日期' in text and int(anchor_top)<np.mean(bbox[1::2])<int(anchor_bottom):
+                            # ------------ #
+                            # print("--text = ", text)
+                            if '签署日期' in text and int(anchor_top) < np.mean(bbox[1::2]) < int(anchor_bottom):
                                words = text
                                signature['words'] = words
                                signature['page'] = pno
@@ -683,7 +685,6 @@ class Finder:
        name = self.item.copy()
        id_num = self.item.copy()
        representative = self.item.copy()
        # 以保证人3 的左上角为定位点
        anchor = None
        for block in self.pdf_info[page_num]['blocks']:
@@ -695,7 +696,6 @@ class Finder:
                    # 找到角色姓名
                    if re.match('保证人3', text) is not None:
                        anchor = [bbox[0], bbox[1]]
        if anchor is not None:
            for block in self.pdf_info[page_num]['blocks']:
                if block['type'] != 0:
@@ -711,52 +711,60 @@ class Finder:
                            name['position'] = bbox
                        if role_key == '承租人：':
                            # 找到证件号码且确定位置
-                            if re.match('证件号码：', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
+                            if re.match('证件号码：', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(
+                                    bbox[1::2]) < anchor[1]:
                                words = text.split('：')[-1]
                                id_num['words'] = words
                                id_num['page'] = page_num
                                id_num['position'] = bbox
                            # 找到法人代表且确定位置
-                            if re.match('法定代表人或授权代表：', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
+                            if re.match('法定代表人或授权代表：', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(
+                                    bbox[1::2]) < anchor[1]:
                                words = text.split('：')[-1]
                                representative['words'] = words
                                representative['page'] = page_num
                                representative['position'] = bbox
                        if role_key == '保证人1：':
                            # 找到证件号码且确定位置
-                            if re.match('证件号码：', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
+                            if re.match('证件号码：', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(
+                                    bbox[1::2]) > anchor[1]:
                                words = text.split('：')[-1]
                                id_num['words'] = words
                                id_num['page'] = page_num
                                id_num['position'] = bbox
                            # 找到法人代表且确定位置
-                            if re.match('法定代表人或授权代表：', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
+                            if re.match('法定代表人或授权代表：', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(
+                                    bbox[1::2]) > anchor[1]:
                                words = text.split('：')[-1]
                                representative['words'] = words
                                representative['page'] = page_num
                                representative['position'] = bbox
                        if role_key == '保证人2：':
                            # 找到证件号码且确定位置
-                            if re.match('证件号码：', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
+                            if re.match('证件号码：', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(
+                                    bbox[1::2]) < anchor[1]:
                                words = text.split('：')[-1]
                                id_num['words'] = words
                                id_num['page'] = page_num
                                id_num['position'] = bbox
                            # 找到法人代表且确定位置
-                            if re.match('法定代表人或授权代表：', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
+                            if re.match('法定代表人或授权代表：', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(
+                                    bbox[1::2]) < anchor[1]:
                                words = text.split('：')[-1]
                                representative['words'] = words
                                representative['page'] = page_num
                                representative['position'] = bbox
                        if role_key == '保证人3：':
                            # 找到证件号码且确定位置
-                            if re.match('证件号码：', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
+                            if re.match('证件号码：', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(
+                                    bbox[1::2]) > anchor[1]:
                                words = text.split('：')[-1]
                                id_num['words'] = words
                                id_num['page'] = page_num
                                id_num['position'] = bbox
                            # 找到法人代表且确定位置
-                            if re.match('法定代表人或授权代表：', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
+                            if re.match('法定代表人或授权代表：', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(
+                                    bbox[1::2]) > anchor[1]:
                                words = text.split('：')[-1]
                                representative['words'] = words
                                representative['page'] = page_num
@@ -783,12 +791,10 @@ class Finder:
                            start = False
                        if start == True:
                            items.append(text)
        lines = [['项目', '购买价格', '实际融资金额']]
-        for i in range(len(items)//3):
+        for i in range(len(items) // 3):
-            line = [items[2+i*3+0], items[2+i*3+1], items[2+i*3+2]]
+            line = [items[2 + i * 3 + 0], items[2 + i * 3 + 1], items[2 + i * 3 + 2]]
            lines.append(line)
        if len(items) > 0:
            lines.append([items[0], '', items[1]])
@@ -800,7 +806,6 @@ class Finder:
    def get_contract_no_dy(self):
        # 查找抵押合同编号
        contract_no = self.item.copy()
        key_box = None
        for pno in self.pdf_info:
            for block in self.pdf_info[pno]['blocks']:
@@ -811,7 +816,6 @@ class Finder:
                        bbox, text = span['bbox'], span['text']
                        if '抵押合同编号' in text:
                            key_box = bbox
        if key_box is not None:
            for pno in self.pdf_info:
                for block in self.pdf_info[pno]['blocks']:
@@ -829,7 +833,6 @@ class Finder:
    def get_dyr_name_id(self):
        name = self.item.copy()
        _id = self.item.copy()
        key_box = None
        for pno in self.pdf_info:
            for block in self.pdf_info[pno]['blocks']:
@@ -842,7 +845,7 @@ class Finder:
                            key_box = bbox
        if key_box is not None:
-            rh = abs(key_box[1]-key_box[3])
+            rh = abs(key_box[1] - key_box[3])
            for pno in self.pdf_info:
                for block in self.pdf_info[pno]['blocks']:
                    if block['type'] != 0:
@@ -850,12 +853,12 @@ class Finder:
                    for line in block['lines']:
                        for span in line['spans']:
                            bbox, text = span['bbox'], span['text']
-                            if key_box[1] < np.mean(bbox[1::2]) < key_box[3]+rh*3 and '姓名' in text:
+                            if key_box[1] < np.mean(bbox[1::2]) < key_box[3] + rh * 3 and '姓名' in text:
                                words = text.split('：')[-1]
                                name['position'] = bbox
                                name['page'] = pno
                                name['words'] = words
-                            if key_box[1] < np.mean(bbox[1::2]) < key_box[3]+rh*3 and '证件号码' in text:
+                            if key_box[1] < np.mean(bbox[1::2]) < key_box[3] + rh * 3 and '证件号码' in text:
                                words = text.split('：')[-1]
                                _id['position'] = bbox
                                _id['page'] = pno
@@ -864,7 +867,6 @@ class Finder:
    def get_key_value_position(self, key):
        value = self.item.copy()
        key_box = None
        for pno in self.pdf_info:
            for block in self.pdf_info[pno]['blocks']:
@@ -875,9 +877,8 @@ class Finder:
                        bbox, text = span['bbox'], span['text']
                        if text == key:
                            key_box = bbox
        if key_box is not None:
-            rh = abs(key_box[1]-key_box[3])
+            rh = abs(key_box[1] - key_box[3])
            for pno in self.pdf_info:
                for block in self.pdf_info[pno]['blocks']:
                    if block['type'] != 0:
@@ -885,13 +886,104 @@ class Finder:
                    for line in block['lines']:
                        for span in line['spans']:
                            bbox, text = span['bbox'], span['text']
-                            if key_box[1] < np.mean(bbox[1::2]) < key_box[3] and key_box[0] < bbox[0] and abs(key_box[2]-bbox[0]) < rh*10:
+                            if key_box[1] < np.mean(bbox[1::2]) < key_box[3] and key_box[0] < bbox[0] and abs(
+                                    key_box[2] - bbox[0]) < rh * 10:
                                words = text
                                value['position'] = bbox
                                value['page'] = pno
                                value['words'] = words
        return value
+    def get_role_info_3_3(self, role_key, page_num='0'):
+        name = self.item.copy()
+        id_num = self.item.copy()
+        representative = self.item.copy()
+        # 以保证人2 的左上角为定位点
+        anchor = None
+        for block in self.pdf_info[page_num]['blocks']:
+            if block['type'] != 0:
+                continue
+            for line in block['lines']:
+                for span in line['spans']:
+                    bbox, text = span['bbox'], span['text']
+                    # 找到角色姓名
+                    if re.match('保证人2', text) is not None:
+                        anchor = [bbox[0], bbox[1]]
+        if anchor is not None:
+            for block in self.pdf_info[page_num]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        # 找到角色姓名
+                        if re.match(role_key, text) is not None:
+                            words = text.split('：')[-1]
+                            name['words'] = words
+                            name['page'] = page_num
+                            name['position'] = bbox
+                        if role_key == '承租人一：':
+                            # 找到证件号码且确定位置
+                            if re.match('证件号码：', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(
+                                    bbox[1::2]) < anchor[1]:
+                                words = text.split('：')[-1]
+                                id_num['words'] = words
+                                id_num['page'] = page_num
+                                id_num['position'] = bbox
+                            # 找到法人代表且确定位置
+                            if re.match('法定代表人或授权代表：', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(
+                                    bbox[1::2]) < anchor[1]:
+                                words = text.split('：')[-1]
+                                representative['words'] = words
+                                representative['page'] = page_num
+                                representative['position'] = bbox
+                        if role_key == '共同承租人：':
+                            # 找到证件号码且确定位置
+                            if re.match('证件号码：', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(
+                                    bbox[1::2]) > anchor[1]:
+                                words = text.split('：')[-1]
+                                id_num['words'] = words
+                                id_num['page'] = page_num
+                                id_num['position'] = bbox
+                            # 找到法人代表且确定位置
+                            if re.match('法定代表人或授权代表：', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(
+                                    bbox[1::2]) > anchor[1]:
+                                words = text.split('：')[-1]
+                                representative['words'] = words
+                                representative['page'] = page_num
+                                representative['position'] = bbox
+                        if role_key == '保证人1：':
+                            # 找到证件号码且确定位置
+                            if re.match('证件号码：', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(
+                                    bbox[1::2]) < anchor[1]:
+                                words = text.split('：')[-1]
+                                id_num['words'] = words
+                                id_num['page'] = page_num
+                                id_num['position'] = bbox
+                            # 找到法人代表且确定位置
+                            if re.match('法定代表人或授权代表：', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(
+                                    bbox[1::2]) < anchor[1]:
+                                words = text.split('：')[-1]
+                                representative['words'] = words
+                                representative['page'] = page_num
+                                representative['position'] = bbox
+                        if role_key == '保证人2：':
+                            # 找到证件号码且确定位置
+                            if re.match('证件号码：', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(
+                                    bbox[1::2]) > anchor[1]:
+                                words = text.split('：')[-1]
+                                id_num['words'] = words
+                                id_num['page'] = page_num
+                                id_num['position'] = bbox
+                            # 找到法人代表且确定位置
+                            if re.match('法定代表人或授权代表：', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(
+                                    bbox[1::2]) > anchor[1]:
+                                words = text.split('：')[-1]
+                                representative['words'] = words
+                                representative['page'] = page_num
+                                representative['position'] = bbox
+        return name, id_num, representative
    def get_info(self):
        """
            block['type'] == 0 : 表示该元素为图片
@@ -905,6 +997,8 @@ class Finder:
            self.init_result['合同编号'] = contract_no
            # 从第一页上取四个角色的姓名和证件号码
            name, id_num, representative = self.get_role_info(role_key='承租人：', page_num='0')
+            if name["words"] == None:
+                name, id_num, representative = self.get_role_info_3_3(role_key='承租人一：', page_num='0')
            self.init_result['承租人-姓名'] = name
            self.init_result['承租人-证件号码'] = id_num
            self.init_result['承租人-法定代表人或授权代表'] = representative
@@ -912,14 +1006,31 @@ class Finder:
            self.init_result['保证人1-姓名'] = name
            self.init_result['保证人1-证件号码'] = id_num
            self.init_result['保证人1-法定代表人或授权代表'] = representative
+            # if条件判别 对应3_3版本
+            if name["words"] == None:
+                name, id_num, representative = self.get_role_info_3_3(role_key='共同承租人：', page_num='0')
+                self.init_result['共同承租人-姓名'] = name
+                self.init_result['共同承租人-证件号码'] = id_num
+                self.init_result['共同承租人-法定代表人或授权代表'] = representative
            name, id_num, representative = self.get_role_info(role_key='保证人2：', page_num='0')
            self.init_result['保证人2-姓名'] = name
            self.init_result['保证人2-证件号码'] = id_num
            self.init_result['保证人2-法定代表人或授权代表'] = representative
+            # if条件判别 对应3_3版本
+            if name["words"] == None:
+                name, id_num, representative = self.get_role_info_3_3(role_key='保证人1：', page_num='0')
+                self.init_result['保证人2-姓名'] = name
+                self.init_result['保证人2-证件号码'] = id_num
+                self.init_result['保证人2-法定代表人或授权代表'] = representative
            name, id_num, representative = self.get_role_info(role_key='保证人3：', page_num='0')
            self.init_result['保证人3-姓名'] = name
            self.init_result['保证人3-证件号码'] = id_num
            self.init_result['保证人3-法定代表人或授权代表'] = representative
+            if name["words"] == None:
+                name, id_num, representative = self.get_role_info_3_3(role_key='保证人2：', page_num='0')
+                self.init_result['保证人3-姓名'] = name
+                self.init_result['保证人3-证件号码'] = id_num
+                self.init_result['保证人3-法定代表人或授权代表'] = representative
            # 在所有页面中找正文中（第二部分 融资租赁主要条款及付款计划）的那个编号，因为存在换行的情况所以暂时不带位置输出
            contract_no = self.get_contract_no_one()
            self.init_result['合同编号（正文）'] = contract_no
@@ -955,6 +1066,9 @@ class Finder:
            # 承租人姓名、签章
            name = self.get_key_value(key='承租人姓名：')
            electronic_signature = self.get_electronic_signature(top='承租人姓名：', bottom='保证人1姓名：')
+            if name["words"] == None:
+                name = self.get_key_value(key='承租人一姓名：')
+                electronic_signature = self.get_electronic_signature(top='承租人一姓名：', bottom='共同承租人名称：')
            self.init_result['签字页-承租人姓名'] = name
            self.init_result['签字页-承租人签章'] = electronic_signature
            # 保证人1姓名、签章
@@ -962,19 +1076,35 @@ class Finder:
            electronic_signature = self.get_electronic_signature(top='保证人1姓名：', bottom='保证人2姓名：')
            self.init_result['签字页-保证人1姓名'] = name
            self.init_result['签字页-保证人1签章'] = electronic_signature
+            # 这里用的是 name["words"] == ""
+            if name["words"] == "":
+                name = self.get_key_value(key='共同承租人名称：')
+                electronic_signature = self.get_electronic_signature(top='共同承租人名称：', bottom='保证人1姓名：')
+                self.init_result['签字页-共同承租人姓名'] = name
+                self.init_result['签字页-共同承租人签章'] = electronic_signature
            # 保证人2姓名、签章
            name = self.get_key_value(key='保证人2姓名：')
            electronic_signature = self.get_electronic_signature(top='保证人2姓名：', bottom='保证人3姓名：')
            self.init_result['签字页-保证人2姓名'] = name
            self.init_result['签字页-保证人2签章'] = electronic_signature
-            # 保证人2姓名、签章
+            # if判断条件对应3_3版本
+            if name["words"] == "":
+                name = self.get_key_value(key='保证人1姓名：')
+                electronic_signature = self.get_electronic_signature(top='保证人1姓名：', bottom='保证人2姓名：')
+                self.init_result['签字页-保证人1姓名'] = name
+                self.init_result['签字页-保证人1签章'] = electronic_signature
+            # 保证人3姓名、签章
            name = self.get_key_value(key='保证人3姓名：')
            electronic_signature = self.get_electronic_signature(top='保证人3姓名：', bottom='日期：')
            self.init_result['签字页-保证人3姓名'] = name
            self.init_result['签字页-保证人3签章'] = electronic_signature
+            # if判断条件对应3_3版本
+            if name["words"] == None:
+                name = self.get_key_value(key='保证人2姓名：')
+                electronic_signature = self.get_electronic_signature(top='保证人2姓名：', bottom='日期：')
+                self.init_result['签字页-保证人2姓名'] = name
+                self.init_result['签字页-保证人2签章'] = electronic_signature
        return self.init_result
        # results['is_shhz_contract'] = True
        # results['pdf_info'] = self.init_result
--- a/src/common/electronic_hil_contract/hil_contract_ocr.py
View file @d3de42e
+++ b/src/common/electronic_hil_contract/hil_contract_ocr.py
View file @d3de42e
@@ -18,7 +18,6 @@ def predict(pdf_info, file_cls):
    Returns:
        TYPE: Description
    """
    # 0: 售后回租合同
    pdf_info_0 = []
    for pno in pdf_info:
@@ -30,7 +29,6 @@ def predict(pdf_info, file_cls):
                    bbox, text = span['bbox'], span['text']
                    if '售后回租合同_' in text:
                        pdf_info_0.append(pdf_info[pno])
    # 1: 车辆处置协议
    pdf_info_1 = []
    for pno in pdf_info:
@@ -42,7 +40,6 @@ def predict(pdf_info, file_cls):
                    bbox, text = span['bbox'], span['text']
                    if '售后回租合同附件一' in text:
                        pdf_info_1.append(pdf_info[pno])
    # 2: 车辆租赁抵押合同
    pdf_info_2 = []
    for pno in pdf_info:
@@ -54,7 +51,6 @@ def predict(pdf_info, file_cls):
                    bbox, text = span['bbox'], span['text']
                    if '车辆租赁抵押合同_' in text:
                        pdf_info_2.append(pdf_info[pno])
    is_clczxy = False
    # 如果 pdf_info_1 == 4 页，则说明此时输入包含了车辆处置协议
    if len(pdf_info_1) == 4 and file_cls == 1 and len(pdf_info_0) != 0:
@@ -62,7 +58,6 @@ def predict(pdf_info, file_cls):
        pdf_info = dict()
        for pno, page_info in enumerate(pdf_info_1):
            pdf_info[str(pno)] = page_info
    f = Finder(pdf_info)
    if file_cls == 0:
        results = f.get_info()
@@ -72,13 +67,11 @@ def predict(pdf_info, file_cls):
    if file_cls == 2:
        # 提取信息 ———— 车辆租赁抵押合同
        results = f.get_info_2()
+    if is_clczxy is True:
-    if is_clczxy == True:
        for key in results:
            if results[key]['page'] is not None:
-                results[key]['page'] = str(int(results[key]['page'])+6)
+                results[key]['page'] = str(int(results[key]['page']) + 6)
    for key in results:
        if results[key]['page'] is not None:
-            results[key]['page'] = 'page_' + str(int(results[key]['page'])+1)
+            results[key]['page'] = 'page_' + str(int(results[key]['page']) + 1)
    return results