get_char_fsm.py 72.3 KB

Raw Blame History Permalink

import re
import numpy as np
from fuzzywuzzy import fuzz
from shapely.geometry import Polygon

def caculate_iou(g, p):
    g = Polygon(np.array(g).reshape((-1, 2)))
    p = Polygon(np.array(p).reshape((-1, 2)))
    inter = Polygon(g).intersection(Polygon(p)).area
    union = g.area + p.area - inter
    return inter/union

def get_table_info(bbox_1, bbox_2, ocr_result):
    anchor = [bbox_2[0], bbox_1[1], bbox_2[2], bbox_1[3],
            bbox_2[4], bbox_1[5], bbox_2[6], bbox_1[7]]
    table_info = ''
    for span in ocr_result:
        iou = caculate_iou(anchor, span[0])
        if iou > 0:
            table_info = span[1]
    return table_info

class Finder:

    def __init__(self, pdf_info):
        self.pdf_info = pdf_info
        self.item = {"words": None,
                     "page": None,
                     "position": None,
                    }
        # 格式化算法输出
        self.init_result = {"合同编号": self.item,
                            "承租人-姓名": self.item,
                            "承租人-证件号码": self.item,
                            "承租人-法定代表人或授权代表": self.item,

                            # "共同承租人-姓名": self.item,
                            # "共同承租人-证件号码": self.item,
                            # "共同承租人-法定代表人或授权代表": self.item,

                            "保证人1-姓名": self.item,
                            "保证人1-证件号码": self.item,
                            "保证人1-法定代表人或授权代表": self.item,

                            "保证人2-姓名": self.item,
                            "保证人2-证件号码": self.item,
                            "保证人2-法定代表人或授权代表": self.item,
                            "保证人3-姓名": self.item,
                            "保证人3-证件号码": self.item,
                            "保证人3-法定代表人或授权代表": self.item,
                            "合同编号（正文）": self.item,
                            "车辆识别代码": self.item,
                            "车辆卖方（经销商）": self.item,
                            "车辆代理商": self.item,
                            "车辆原始销售价格（《机动车销售统一发票》所列金额）": self.item,
                            "车辆附加产品明细表": self.item,
                            "融资成本总额": self.item,
                            "租期": self.item,
                            "付款计划表": self.item,
                            "收款银行账户-户名": self.item,
                            "收款银行账户-银行账号": self.item,
                            "收款银行账户-开户行": self.item,
                            "银行账户-户名": self.item,
                            "银行账户-银行账号": self.item,
                            "银行账户-开户行": self.item,
                            "签字页-承租人姓名": self.item,
                            "签字页-承租人签章": self.item,

                            # "签字页-共同承租人姓名": self.item,
                            # "签字页-共同承租人签章": self.item,

                            "签字页-保证人1姓名": self.item,
                            "签字页-保证人1签章": self.item,

                            "签字页-保证人2姓名": self.item,
                            "签字页-保证人2签章": self.item,
                            "签字页-保证人3姓名": self.item,
                            "签字页-保证人3签章": self.item,
        }

        # 格式化输出 车辆处置协议 要是别的字段
        self.init_result_1 = {"合同编号": self.item,
                              "承租人-姓名": self.item,
                              "承租人-证件号码": self.item,
                              "销售经销商": self.item,
                              "合同编号（正文）": self.item,
                              "签字页-承租人姓名": self.item,
                              "签字页-承租人证件号码": self.item,
                              "签字页-承租人签章": self.item,
                              "签字页-销售经销商": self.item,
                              "签字页-销售经销商签章": self.item,
        }

        # 格式化输出 车辆租赁抵押合同
        self.init_result_2 = {"合同编号": self.item,
                              "合同编号（正文）": self.item,
                              "抵押人姓名/名称": self.item,
                              "抵押人证件号码": self.item,
                              "抵押人配偶姓名/名称": self.item,
                              "抵押人配偶证件号码": self.item,
                              "车辆识别代码": self.item,
                              "租金总额": self.item,
                              "融资租赁期限": self.item,
                              "签字页-抵押人姓名": self.item,
                              "签字页-抵押人签章": self.item,
                              "签字页-抵押人配偶姓名": self.item,
                              "签字页-抵押人配偶签章": self.item,
        }

    def get_contract_no(self, page_num):
        """传入页码,查看该页码右上角的编号

        Args:
            page_num (string):

        Returns:
            sting:
        """
        contract_no = self.item.copy()
        # 只看第一页
        for block in self.pdf_info[page_num]['blocks']:
            if block['type'] != 0:
                continue
            for line in block['lines']:
                for span in line['spans']:
                    bbox, text = span['bbox'], span['text']
                    if '合同编号：' in text:
                        words = text.split('：')[-1]
                        contract_no['position'] = bbox
                        contract_no['page'] = page_num
                        contract_no['words'] = words
        if contract_no['words'] == '':
            for block in self.pdf_info[page_num]['blocks']:
                if block['type'] != 0:
                    continue
                for line in block['lines']:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
                        if bbox[1] < contract_no['position'][3] and 'CH' in text:
                            contract_no['position'] = bbox
                            contract_no['page'] = page_num
                            contract_no['words'] = text
        return contract_no

    def get_vehicle_price(self, page_num='0'):
        vehicle_price = self.item.copy()
        for block in self.pdf_info[page_num]['blocks']:
            if block['type'] != 0:
                continue
            for line in block['lines']:
                for span in line['spans']:
                    bbox, text = span['bbox'], span['text']
                    if '所购车辆价格为人民币' in text:
                        words = text.split('币')[-1]
                        vehicle_price['position'] = bbox
                        vehicle_price['words'] = words
        return vehicle_price

    def get_contract_no_one(self):
        # 查找正文中的合同编号,有可能存在换行的情况
        contract_no = self.item.copy()
        for pno in self.pdf_info:
            all_text = ''
            for block in self.pdf_info[pno]['blocks']:
                if block['type'] != 0:
                    continue
                for line in block['lines']:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
                        all_text += text
            all_text = all_text.replace(' ', '')
            matchObj = re.search(r'（合同编号：\[(.*?)\]）', all_text)
            if matchObj:
                words = matchObj.group(1)
                contract_no['position'] = None
                contract_no['page'] = pno
                # contract_no['words'] = words
                contract_no['words'] = re.sub("\s", "", words).replace("）", "")
                return contract_no

            matchObj = re.search(r'编号为(.*?)的', all_text)
            if matchObj:
                words = matchObj.group(1).strip()
                contract_no['position'] = None
                contract_no['page'] = pno
                # contract_no['words'] = words
                contract_no['words'] = re.sub("\s", "", words).replace("）", "")
                return contract_no

            matchObj = re.search(r'编号为(.*?)）的', all_text)
            if matchObj:
                words = matchObj.group(1).strip()
                contract_no['position'] = None
                contract_no['page'] = pno
                # contract_no['words'] = words
                contract_no['words'] = re.sub("\s", "", words)
        return contract_no

    def get_key_value(self, key, page_num=None):
        value = self.item.copy()
        if page_num is not None:
            pno = page_num
            for block in self.pdf_info[pno]['blocks']:
                if block['type'] != 0:
                    continue
                for line in block['lines']:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
                        if key in text:
                            words = text.split('：')[-1].replace("。", "")
                            value['position'] = bbox
                            value['page'] = pno
                            # value['words'] = words
                            value['words'] = re.sub("\s", "", words)
        else:
            for pno in self.pdf_info:
                for block in self.pdf_info[pno]['blocks']:
                    if block['type'] != 0:
                        continue
                    for line in block['lines']:
                        for span in line['spans']:
                            bbox, text = span['bbox'], span['text']
                            if key in text:
                                # print(self.pdf_info[pno])
                                words = text.split('：')[-1].replace("。", "")
                                value['position'] = bbox
                                value['page'] = pno
                                # value['words'] = words
                                value['words'] = re.sub("\s", "", words)
        return value

    def get_loan_principal(self, page_num='0'):
        chinese_keywords = ['壹', '贰', '叁', '肆', '伍', '陆', '柒', '捌', '玖', '拾',
                            '佰', '仟', '万', '亿', '元', '角', '分', '零', '整']
        upper = self.item.copy()
        lower = self.item.copy()
        asp_1 = self.item.copy()
        asp_2 = self.item.copy()
        anchor_bbox = None
        for block in self.pdf_info[page_num]['blocks']:
            if block['type'] != 0:
                continue
            for line in block['lines']:
                for span in line['spans']:
                    bbox, text = span['bbox'], span['text']
                    if fuzz.ratio(''.join(chinese_keywords), text) > 15:
                        text = text.split('：')[-1].strip()
                        upper['position'] = bbox
                        upper['words'] = text
                    if '小写：¥' in text:
                        words = text.split('¥')[-1].strip()
                        lower['position'] = bbox
                        lower['words'] = words
                    if '附加产品融资贷款本金总金额' == text:
                        anchor_bbox = bbox
        if anchor_bbox:
            for block in self.pdf_info[page_num]['blocks']:
                if block['type'] != 0:
                    continue
                for line in block['lines']:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
                        if np.mean(bbox[1::2]) < np.mean(anchor_bbox[1::2]) and '人民币：小写：' in text:
                            words = re.findall(r'人民币：小写：\[(.*)\]', text)[0]
                            asp_1['position'] = bbox
                            asp_1['words'] = words
                        if np.mean(bbox[1::2]) > np.mean(anchor_bbox[1::2]) and '人民币：小写：' in text:
                            words = re.findall(r'人民币：小写：\[(.*)\]', text)[0]
                            asp_2['position'] = bbox
                            asp_2['words'] = words
        return upper, lower, asp_1, asp_2

    def get_loan_term(self, page_num='0'):
        loan_term = self.item.copy()
        all_text = ''
        for block in self.pdf_info[page_num]['blocks']:
            if block['type'] != 0:
                continue
            for line in block['lines']:
                for span in line['spans']:
                    bbox, text = span['bbox'], span['text']
                    all_text += text
        matchs = re.search(r'贷款期限(\d+)个月', all_text)
        if matchs:
            words = matchs.group(1)
            for block in self.pdf_info[page_num]['blocks']:
                if block['type'] != 0:
                    continue
                for line in block['lines']:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
                        if f'{words}个月' in text:
                            loan_term['position'] = bbox
                            loan_term['words'] = words
        return loan_term

    def get_asp_details(self, page_num):
        asp_details_table_term = self.item.copy()

        asp_details_table = []
        asp_details_text_list = []
        table = False
        for block in self.pdf_info[page_num]['blocks']:
            if block['type'] != 0:
                continue
            for line in block['lines']:
                for span in line['spans']:
                    bbox, text = span['bbox'], span['text']
                    if '附加产品融资贷款本金总金额明细' == text:
                        table = True
                    if '第二条' in text or '征信管理' in text:
                        table = False
                    if table == True:
                        asp_details_text_list.append(text)

        for i in range((len(asp_details_text_list)+2)//3):

            line = []
            if i == 0:
                line = [asp_details_text_list[0]]
            else:
                for j in range(3):
                    line.append(asp_details_text_list[i*3-2+j])

            asp_details_table.append(line)

        if len(asp_details_table) > 0:
            asp_details_table_term['words'] = asp_details_table
        return asp_details_table_term

    def get_signature(self):
        signature = self.item.copy()

        for block in self.pdf_info['0']['blocks']:
            if block['type'] != 0:
                continue
            for line in block['lines']:
                for span in line['spans']:
                    bbox, text = span['bbox'], span['text']
                    if '签署日期' in text:
                        words = text
                        signature['words'] = words
                        signature['position'] = bbox
        return signature

    def get_somebody(self, top, bottom):
        # 指定上下边界后,返回上下边界内的客户信息
        _name = self.item.copy()
        _id = self.item.copy()
        # 只看第一页，先划定上下边界
        y_top = 0
        y_bottom = 0
        for block in self.pdf_info['1']['blocks']:
            if block['type'] != 0:
                continue
            for line in block['lines']:
                for span in line['spans']:
                    bbox, text = span['bbox'], span['text']
                    if top in text:
                        y_top = bbox[3]
                    if bottom in text:
                        y_bottom = bbox[3]
        for block in self.pdf_info['1']['blocks']:
            if block['type'] != 0:
                continue
            for line in block['lines']:
                for span in line['spans']:
                    bbox, text = span['bbox'], span['text']
                    if y_top < bbox[3] < y_bottom:
                        if '姓名/名称' in text:
                            words = text.split('：')[-1]
                            _name['position'] = bbox
                            _name['words'] = words
                        if '自然人身份证件号码/法人执照号码' in text:
                            words = text.split('：')[-1]
                            _id['position'] = bbox
                            _id['words'] = words
        return _name, _id

    def get_seller(self):
        seller = self.item.copy()
        # 先找到 key
        anchor_bbox = None
        for block in self.pdf_info['1']['blocks']:
            if block['type'] != 0:
                continue
            for line in block['lines']:
                for span in line['spans']:
                    bbox, text = span['bbox'], span['text']
                    if '经销商' == text:
                        anchor_bbox = bbox
        # 当找到了 key, 则根据 key 去匹配 value
        if anchor_bbox:
            half_width = self.pdf_info['1']['width'] * 0.5
            for block in self.pdf_info['1']['blocks']:
                if block['type'] != 0:
                    continue
                for line in block['lines']:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
                        if anchor_bbox[2]<np.mean(bbox[::2])<half_width and \
                            anchor_bbox[1]<np.mean(bbox[1::2])<anchor_bbox[3]:
                            seller['position'] = bbox
                            seller['words'] = text
        return seller

    def get_payback_account(self):
        account = self.item.copy()
        account_name = self.item.copy()
        account_bank = self.item.copy()
        all_text = ''
        for block in self.pdf_info['1']['blocks']:
            if block['type'] != 0:
                continue
            for line in block['lines']:
                for span in line['spans']:
                    bbox, text = span['bbox'], span['text']
                    all_text += text
        # 首先确定账户信息是哪种,我们只输出非另行通知的格式
        if '☑账号' in all_text:
            all_text = all_text.replace('　', '')
            matchs_1 = re.findall(r'账号：(.*)户名', all_text)
            if matchs_1:
                words = matchs_1[0]
                for block in self.pdf_info['1']['blocks']:
                    if block['type'] != 0:
                        continue
                    for line in block['lines']:
                        for span in line['spans']:
                            bbox, text = span['bbox'], span['text']
                            if f'{words}' in text:
                                account['position'] = bbox
                                account['words'] = words
            matchs_2 = re.findall(r'户名：(.*)开户行', all_text)
            if matchs_2:
                words = matchs_2[0]
                for block in self.pdf_info['1']['blocks']:
                    if block['type'] != 0:
                        continue
                    for line in block['lines']:
                        for span in line['spans']:
                            bbox, text = span['bbox'], span['text']
                            if f'{words}' in text:
                                account_name['position'] = bbox
                                account_name['words'] = words
            matchs_3 = re.findall(r'开户行：(.*)；', all_text)
            if matchs_3:
                words = matchs_3[0]
                for block in self.pdf_info['1']['blocks']:
                    if block['type'] != 0:
                        continue
                    for line in block['lines']:
                        for span in line['spans']:
                            bbox, text = span['bbox'], span['text']
                            if f'开户行：{words}；' in text.replace('　', ''):
                                account_bank['position'] = bbox
                                account_bank['words'] = words
        return account, account_name, account_bank

    def get_repayment_schedule(self):
        repayment_schedule = self.item.copy()

        repayment_schedule_text_list = []
        table = False
        page = None
        left = 0
        right = 0
        for pno in self.pdf_info:
            for block in self.pdf_info[pno]['blocks']:
                if block['type'] != 0:
                    continue
                for line in block['lines']:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
                        if '剩余融资' in text:
                            right = bbox[2]
                        if '以上表格中所列序号' in text:
                            table = False
                        if table == True:
                            # 过滤汉字
                            if re.compile(r'[\u4e00-\u9fff]').search(text):
                                continue
                            # 过滤 1. - 61. 这些标题
                            if re.findall("\d+", text):
                                if len(re.findall("\d+", text)) == 1:
                                    continue
                            if not left < bbox[0] < right:
                                continue
                            repayment_schedule_text_list.append(text)

                        if text.strip() == "61.":
                            page = pno
                            table = True
                            left = bbox[0]
        # print("repayment_schedule_text_list = ", repayment_schedule_text_list)
        # repayment_schedule_table = [['序号', '融资租赁成本', '融资租赁费用', '租金', '剩余融资租赁成本']]
        repayment_schedule_table = [['序号', '租金']]
        for i in range(len(repayment_schedule_text_list)//4):
            line = [f'{i+1}.']
            # 4表示4列的意思
            for j in range(4):
                line.append(repayment_schedule_text_list[i*4+j])

            # 只保留序号和租金列
            line = [line[0].replace('.', ''), line[3]]

            repayment_schedule_table.append(line)

        repayment_schedule['words'] = repayment_schedule_table
        repayment_schedule['page'] = page
        return repayment_schedule

    def get_signature_role_1(self):
        signature_role_1 = self.item.copy()
        for pno in self.pdf_info:
            for block in self.pdf_info[pno]['blocks']:
                if block['type'] != 0:
                    continue
                for line in block['lines']:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
                        if '签署日期' in text:
                            signature_role_1['position'] = bbox
                            signature_role_1['page'] = pno
                            signature_role_1['words'] = text
        return signature_role_1

    def get_signature_role_2(self):
        signature_role_2 = self.init_item.copy()
        # 先定位签字区域
        texts = []
        boxes = []
        page_num = None
        position = None
        words = None
        region = False
        for i in list(self.pdf_info.keys()):
            for block in self.pdf_info[i]['blocks']:
                if block['type'] != 0:
                    continue
                for line in block['lines']:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
                        if '共同借款人(共同抵押人)' in text:
                            region = True
                        if '日期' in text:
                            region = False
                        if region == True:
                            page_num = i
                            texts.append(text)
                            boxes.append(bbox)
        if len(texts) > 4:
            words = '有'
        else:
            words = '无'
        boxes = np.array(boxes).reshape((-1, 2))
        position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])]
        signature_role_2['page_num'] = page_num
        signature_role_2['position'] = position
        signature_role_2['words'] = words
        return signature_role_2

    def get_signature_role_3(self):
        signature_role_3 = self.init_item.copy()
        # 先定位签字区域
        texts = []
        boxes = []
        page_num = None
        position = None
        words = None
        region = False
        for i in list(self.pdf_info.keys()):
            for block in self.pdf_info[i]['blocks']:
                if block['type'] != 0:
                    continue
                for line in block['lines']:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
                        if '保证人1' in text and int(i) != 0:
                            region = True
                        if '日期' in text:
                            region = False
                        if region == True:
                            page_num = i
                            texts.append(text)
                            boxes.append(bbox)
        if len(texts) > 4:
            words = '有'
        else:
            words = '无'
        boxes = np.array(boxes).reshape((-1, 2))
        position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])]
        signature_role_3['page_num'] = page_num
        signature_role_3['position'] = position
        signature_role_3['words'] = words
        return signature_role_3

    def get_signature_role_4(self):
        signature_role_4 = self.init_item.copy()
        # 先定位签字区域
        texts = []
        boxes = []
        page_num = None
        position = None
        words = None
        region = False
        for i in list(self.pdf_info.keys()):
            for block in self.pdf_info[i]['blocks']:
                if block['type'] != 0:
                    continue
                for line in block['lines']:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
                        if '保证人2' in text and int(i) != 0:
                            region = True
                        if '日期' in text:
                            region = False
                        if region == True:
                            page_num = i
                            texts.append(text)
                            boxes.append(bbox)
        if len(texts) > 4:
            words = '有'
        else:
            words = '无'
        boxes = np.array(boxes).reshape((-1, 2))
        position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])]
        signature_role_4['page_num'] = page_num
        signature_role_4['position'] = position
        signature_role_4['words'] = words
        return signature_role_4

    def get_signature_role_5(self):
        signature_role_5 = self.init_item.copy()
        # 先定位签字区域
        texts = []
        boxes = []
        page_num = None
        position = None
        words = None
        region = False
        for i in list(self.pdf_info.keys()):
            for block in self.pdf_info[i]['blocks']:
                if block['type'] != 0:
                    continue
                for line in block['lines']:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
                        if '见证人签字' in text and int(i) != 0:
                            region = True
                        if '年' in text:
                            region = False
                        if region == True:
                            page_num = i
                            texts.append(text)
                            boxes.append(bbox)
        # print(texts)
        if len(texts) > 4:
            words = '有'
        else:
            words = '无'
        boxes = np.array(boxes).reshape((-1, 2))
        position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])]
        signature_role_5['page_num'] = page_num
        signature_role_5['position'] = position
        signature_role_5['words'] = words
        return signature_role_5

    def get_last_page_signature(self, page_num, top, bottom):
        signature_name = self.item.copy()
        signature_date = self.item.copy()
        anchor_top = None
        anchor_bottom = None
        for block in self.pdf_info[page_num]['blocks']:
            if block['type'] != 0:
                continue
            for line in block['lines']:
                for span in line['spans']:
                    bbox, text = span['bbox'], span['text']
                    if top in text:
                        anchor_top = bbox[1]
                    if bottom in text:
                        anchor_bottom = bbox[1]
        if anchor_top is not None and anchor_bottom is not None:
            for block in self.pdf_info[page_num]['blocks']:
                if block['type'] != 0:
                    continue
                for line in block['lines']:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
                        if '签署日期' in text and int(anchor_top)<np.mean(bbox[1::2])<int(anchor_bottom):
                            name = text.split(' ')[0]
                            date = text.split(':')[-1]
                            signature_name['words'] = name
                            signature_name['position'] = bbox
                            signature_date['words'] = date
                            signature_name['position'] = bbox
        return signature_name, signature_date

    def get_electronic_signature(self, top, bottom, t_pno=None):
        signature = self.item.copy()
        anchor_top = None
        anchor_bottom = None
        for pno in self.pdf_info:
            if t_pno is not None and pno != t_pno:
                continue
            for block in self.pdf_info[pno]['blocks']:
                if block['type'] != 0:
                    continue
                for line in block['lines']:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
                        if top in text:
                            anchor_top = bbox[1]
                        elif bottom in text and anchor_top is not None and bbox[3] > anchor_top:
                            anchor_bottom = bbox[3]
        if anchor_top is not None and anchor_bottom is not None:
            # print('in')
            # print(anchor_top)
            # print(anchor_bottom)
            for pno in self.pdf_info:
                if t_pno is not None and pno != t_pno:
                    continue
                for block in self.pdf_info[pno]['blocks']:
                    if block['type'] != 0:
                        continue
                    for line in block['lines']:
                        for span in line['spans']:
                            bbox, text = span['bbox'], span['text']
                            # ------------ #
                            # print("--text = ", text)
                            if '签署日期' in text and int(anchor_top)<np.mean(bbox[1::2])<int(anchor_bottom):
                                words = text
                                signature['words'] = words
                                signature['page'] = pno
                                signature['position'] = bbox
        return signature

    def get_role_info(self, role_key, page_num='0'):
        name = self.item.copy()
        id_num = self.item.copy()
        representative = self.item.copy()

        # 以保证人3 的左上角为定位点
        anchor = None
        for block in self.pdf_info[page_num]['blocks']:
            if block['type'] != 0:
                continue
            for line in block['lines']:
                for span in line['spans']:
                    bbox, text = span['bbox'], span['text']
                    # 找到角色姓名
                    if re.match('保证人3', text) is not None:
                        anchor = [bbox[0], bbox[1]]

        need_bbox_find_keys_bbox = [None, None, None]
        if anchor is not None:
            for block in self.pdf_info[page_num]['blocks']:
                if all(need_bbox_find_keys_bbox):
                    break
                if block['type'] != 0:
                    continue
                for line in block['lines']:
                    if all(need_bbox_find_keys_bbox):
                        break
                    for span in line['spans']:
                        if all(need_bbox_find_keys_bbox):
                            break

                        bbox, text = span['bbox'], span['text']
                        # 找到角色姓名
                        if re.match(role_key, text) is not None:
                            words = text.split('：')[-1]
                            if len(words) == 0:
                                need_bbox_find_keys_bbox[0] = bbox
                            # else:
                            name['words'] = words
                            name['page'] = page_num
                            name['position'] = bbox
                            continue
                        if role_key == '承租人：':
                            # 找到证件号码且确定位置
                            if re.match('证件号码：', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
                                words = text.split('：')[-1]
                                if len(words) == 0:
                                    need_bbox_find_keys_bbox[1] = bbox
                                # else:
                                id_num['words'] = words
                                id_num['page'] = page_num
                                id_num['position'] = bbox
                            # 找到法人代表且确定位置
                            if re.match('法定代表人或授权代表：', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
                                words = text.split('：')[-1]
                                if len(words) == 0:
                                    need_bbox_find_keys_bbox[2] = bbox
                                # else:
                                representative['words'] = words
                                representative['page'] = page_num
                                representative['position'] = bbox
                        elif role_key == '保证人1：':
                            # 找到证件号码且确定位置
                            if re.match('证件号码：', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
                                words = text.split('：')[-1]
                                if len(words) == 0:
                                    need_bbox_find_keys_bbox[1] = bbox
                                # else:
                                id_num['words'] = words
                                id_num['page'] = page_num
                                id_num['position'] = bbox
                            # 找到法人代表且确定位置
                            if re.match('法定代表人或授权代表：', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
                                words = text.split('：')[-1]
                                if len(words) == 0:
                                    need_bbox_find_keys_bbox[2] = bbox
                                # else:
                                representative['words'] = words
                                representative['page'] = page_num
                                representative['position'] = bbox
                        elif role_key == '保证人2：':
                            # 找到证件号码且确定位置
                            if re.match('证件号码：', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
                                words = text.split('：')[-1]
                                if len(words) == 0:
                                    need_bbox_find_keys_bbox[1] = bbox
                                # else:
                                id_num['words'] = words
                                id_num['page'] = page_num
                                id_num['position'] = bbox
                            # 找到法人代表且确定位置
                            if re.match('法定代表人或授权代表：', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
                                words = text.split('：')[-1]
                                if len(words) == 0:
                                    need_bbox_find_keys_bbox[2] = bbox
                                # else:
                                representative['words'] = words
                                representative['page'] = page_num
                                representative['position'] = bbox
                        elif role_key == '保证人3：':
                            # 找到证件号码且确定位置
                            if re.match('证件号码：', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
                                words = text.split('：')[-1]
                                if len(words) == 0:
                                    need_bbox_find_keys_bbox[1] = bbox
                                # else:
                                id_num['words'] = words
                                id_num['page'] = page_num
                                id_num['position'] = bbox
                            # 找到法人代表且确定位置
                            if re.match('法定代表人或授权代表：', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
                                words = text.split('：')[-1]
                                if len(words) == 0:
                                    need_bbox_find_keys_bbox[2] = bbox
                                # else:
                                representative['words'] = words
                                representative['page'] = page_num
                                representative['position'] = bbox
        for idx, bbox in enumerate(need_bbox_find_keys_bbox):
            if bbox is None:
                continue
            is_find = False
            if idx == 1:
                width_rate = 3
            else:
                width_rate = 1
            minx = bbox[2]
            maxx = bbox[2] + (width_rate * (bbox[2]-bbox[0]))
            miny = bbox[1]
            maxy = bbox[3]
            for block in self.pdf_info[page_num]['blocks']:
                if block['type'] != 0:
                    continue
                if is_find:
                    break
                for line in block['lines']:
                    if is_find:
                        break
                    for span in line['spans']:
                        if is_find:
                            break
                        value_bbox, text = span['bbox'], span['text']
                        if minx < np.mean(value_bbox[::2]) < maxx and miny < np.mean(value_bbox[1::2]) < maxy:
                            if idx == 0:
                                name['words'] = text
                                name['page'] = page_num
                                name['position'] = value_bbox
                            elif idx == 1:
                                id_num['words'] = text
                                id_num['page'] = page_num
                                id_num['position'] = value_bbox
                            elif idx == 2:
                                representative['words'] = text
                                representative['page'] = page_num
                                representative['position'] = value_bbox
                            is_find = True
                            break
        return name, id_num, representative

    def get_table_add_product(self):
        table_add_product = self.item.copy()

        add_product_page_num = None
        for pno in self.pdf_info:
            for block in self.pdf_info[f'{pno}']['blocks']:
                if block['type'] != 0:
                    continue
                for line in block['lines']:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
                        if '车辆附加产品（明细见下表）' in text:
                            add_product_page_num = pno
        ocr_results = []
        for block in self.pdf_info[f'{add_product_page_num}']['blocks']:
            if block['type'] != 0:
                continue
            for line in block['lines']:
                for span in line['spans']:
                    bbox, text = span['bbox'], span['text']
                    xmin, ymin, xmax, ymax = bbox
                    bbox = [xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax]
                    ocr_results.append([bbox, text])

        lines = [['项目', '购买价格', '实际融资金额']]

        key_xm = None
        key_gmjg = None
        key_sjrzje = None
        key_total = None

        for index, span in enumerate(ocr_results):
            if span[1] == '项目':
                key_xm = index
            if span[1] == '购买价格':
                key_gmjg = index
            if span[1] == '实际融资金额':
                key_sjrzje = index
            if span[1] == '总计':
                key_total = index

        bbox, text = ocr_results[key_xm]
        rh = abs(bbox[1]-bbox[-1])
        anchor = np.array(bbox).reshape((-1, 2))
        anchor[:, 0] += 2*rh
        anchor[:, 1] += rh

        for i in range(5):
            for span in ocr_results:
                iou = caculate_iou(anchor, span[0])
                if iou > 0.01 and span[1].strip() != '所购':
                    x = get_table_info(span[0], ocr_results[key_gmjg][0], ocr_results)
                    y = get_table_info(span[0], ocr_results[key_sjrzje][0], ocr_results)
                    line = [span[1].replace('\u3000', ' '), x, y]
                    # print(line)
                    lines.append(line)
                    anchor = np.array(span[0]).reshape((-1, 2))
                    anchor[:, 1] += rh

        total = get_table_info(ocr_results[key_total][0], ocr_results[key_sjrzje][0], ocr_results)
        lines.append(['总计', '', total])

        # 所购　BMW悦然焕
        # 新服务

        # 所购　BMW5年10
        # 万公里长悦保养套餐

        # 所购　事故维修补偿
        # 方案

        # 所购 BMW5年10万公里
        # 长悦保养套餐

        # 所购 MINI4年6万公里长悦
        # 保养套餐

        filtered_lines = []
        for line in lines:
            if line[0][:2] not in ['所购', '项目', '总计']:
                continue
            if 'BMW悦然' in line[0]:
                line[0] = '所购 BMW悦然焕新服务'
            if 'BMW5年10' in line[0]:
                line[0] = '所购 BMW5年10万公里长悦保养套餐'
            if '事故维修补' in line[0]:
                line[0] = '所购 事故维修补偿方案'
            if 'MINI4年6万公里长悦' in line[0]:
                line[0] = '所购 MINI4年6万公里长悦保养套餐'
            filtered_lines.append(line)
        table_add_product['words'] = filtered_lines
        table_add_product['page'] = add_product_page_num
        table_add_product['position'] = None
        return table_add_product

    def get_contract_no_dy(self):
        # 查找抵押合同编号
        contract_no = self.item.copy()

        key_box = None
        for pno in self.pdf_info:
            for block in self.pdf_info[pno]['blocks']:
                if block['type'] != 0:
                    continue
                for line in block['lines']:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
                        if '抵押合同编号' in text:
                            key_box = bbox

        if key_box is not None:
            for pno in self.pdf_info:
                for block in self.pdf_info[pno]['blocks']:
                    if block['type'] != 0:
                        continue
                    for line in block['lines']:
                        for span in line['spans']:
                            bbox, text = span['bbox'], span['text']
                            if key_box[1] < np.mean(bbox[1::2]) < key_box[3] and 'CH-' in text:
                                contract_no['position'] = bbox
                                contract_no['page'] = pno
                                contract_no['words'] = text
        return contract_no

    def get_dyr_name_id(self):
        name = self.item.copy()
        _id = self.item.copy()

        key_box = None
        for pno in self.pdf_info:
            for block in self.pdf_info[pno]['blocks']:
                if block['type'] != 0:
                    continue
                for line in block['lines']:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
                        if text == '抵押人':
                           key_box = bbox

        if key_box is not None:
            rh = abs(key_box[1]-key_box[3])
            for pno in self.pdf_info:
                for block in self.pdf_info[pno]['blocks']:
                    if block['type'] != 0:
                        continue
                    for line in block['lines']:
                        for span in line['spans']:
                            bbox, text = span['bbox'], span['text']
                            if key_box[1] < np.mean(bbox[1::2]) < key_box[3]+rh*3 and '姓名' in text:
                                words = text.split('：')[-1]
                                name['position'] = bbox
                                name['page'] = pno
                                name['words'] = words
                            if key_box[1] < np.mean(bbox[1::2]) < key_box[3]+rh*3 and '证件号码' in text:
                                words = text.split('：')[-1]
                                _id['position'] = bbox
                                _id['page'] = pno
                                _id['words'] = words
        return name, _id

    def get_dyrpo_name_id(self):
        name = self.item.copy()
        _id = self.item.copy()

        key_box = None
        for pno in self.pdf_info:
            for block in self.pdf_info[pno]['blocks']:
                if block['type'] != 0:
                    continue
                for line in block['lines']:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
                        if text == '抵押人配偶(如适':
                           key_box = bbox

        if key_box is not None:
            rh = abs(key_box[1]-key_box[3])
            for pno in self.pdf_info:
                for block in self.pdf_info[pno]['blocks']:
                    if block['type'] != 0:
                        continue
                    for line in block['lines']:
                        for span in line['spans']:
                            bbox, text = span['bbox'], span['text']
                            if key_box[1] < np.mean(bbox[1::2]) < key_box[3]+rh*3 and '姓名' in text:
                                words = text.split('：')[-1]
                                name['position'] = bbox
                                name['page'] = pno
                                name['words'] = words
                            if key_box[1] < np.mean(bbox[1::2]) < key_box[3]+rh*3 and '证件号码' in text:
                                words = text.split('：')[-1]
                                _id['position'] = bbox
                                _id['page'] = pno
                                _id['words'] = words.strip()
        return name, _id

    def get_key_value_position(self, key):
        value = self.item.copy()

        key_box = None
        for pno in self.pdf_info:
            for block in self.pdf_info[pno]['blocks']:
                if block['type'] != 0:
                    continue
                for line in block['lines']:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
                        if text == key:
                           key_box = bbox

        if key_box is not None:
            rh = abs(key_box[1]-key_box[3])
            for pno in self.pdf_info:
                for block in self.pdf_info[pno]['blocks']:
                    if block['type'] != 0:
                        continue
                    for line in block['lines']:
                        for span in line['spans']:
                            bbox, text = span['bbox'], span['text']
                            if key_box[1] < np.mean(bbox[1::2]) < key_box[3] and key_box[0] < bbox[0] and abs(key_box[2]-bbox[0]) < rh*10:
                                words = text
                                value['position'] = bbox
                                value['page'] = pno
                                value['words'] = words
        return value

    def get_role_info_3_3(self, role_key, page_num='0'):
        name = self.item.copy()
        id_num = self.item.copy()
        representative = self.item.copy()

        # 以保证人2 的左上角为定位点
        anchor = None
        for block in self.pdf_info[page_num]['blocks']:
            if block['type'] != 0:
                continue
            for line in block['lines']:
                for span in line['spans']:
                    bbox, text = span['bbox'], span['text']
                    # 找到角色姓名
                    if re.match('保证人2', text) is not None:
                        anchor = [bbox[0], bbox[1]]

        if anchor is not None:
            for block in self.pdf_info[page_num]['blocks']:
                if block['type'] != 0:
                    continue
                for line in block['lines']:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
                        # 找到角色姓名
                        if re.match(role_key, text) is not None:
                            words = text.split('：')[-1]
                            name['words'] = words
                            name['page'] = page_num
                            name['position'] = bbox
                        if role_key == '承租人一：':
                            # 找到证件号码且确定位置
                            if re.match('证件号码：', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
                                words = text.split('：')[-1]
                                id_num['words'] = words
                                id_num['page'] = page_num
                                id_num['position'] = bbox
                            # 找到法人代表且确定位置
                            if re.match('法定代表人或授权代表：', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
                                words = text.split('：')[-1]
                                representative['words'] = words
                                representative['page'] = page_num
                                representative['position'] = bbox
                        if role_key == '共同承租人：':
                            # 找到证件号码且确定位置
                            if re.match('证件号码：', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
                                words = text.split('：')[-1]
                                id_num['words'] = words
                                id_num['page'] = page_num
                                id_num['position'] = bbox
                            # 找到法人代表且确定位置
                            if re.match('法定代表人或授权代表：', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
                                words = text.split('：')[-1]
                                representative['words'] = words
                                representative['page'] = page_num
                                representative['position'] = bbox
                        if role_key == '保证人1：':
                            # 找到证件号码且确定位置
                            if re.match('证件号码：', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
                                words = text.split('：')[-1]
                                id_num['words'] = words
                                id_num['page'] = page_num
                                id_num['position'] = bbox
                            # 找到法人代表且确定位置
                            if re.match('法定代表人或授权代表：', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
                                words = text.split('：')[-1]
                                representative['words'] = words
                                representative['page'] = page_num
                                representative['position'] = bbox
                        if role_key == '保证人2：':
                            # 找到证件号码且确定位置
                            if re.match('证件号码：', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
                                words = text.split('：')[-1]
                                id_num['words'] = words
                                id_num['page'] = page_num
                                id_num['position'] = bbox
                            # 找到法人代表且确定位置
                            if re.match('法定代表人或授权代表：', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
                                words = text.split('：')[-1]
                                representative['words'] = words
                                representative['page'] = page_num
                                representative['position'] = bbox
        return name, id_num, representative

    def get_value_by_findall(self, prefix, suffix, page_num):
        value = self.item.copy()
        all_text = ''
        pno = page_num
        for block in self.pdf_info[pno]['blocks']:
            if block['type'] != 0:
                continue
            for line in block['lines']:
                for span in line['spans']:
                    bbox, text = span['bbox'], span['text']
                    all_text += text
        words_list = re.findall(f"{prefix}(.*?){suffix}", all_text)
        if len(words_list) > 0:
            for block in self.pdf_info[pno]['blocks']:
                if block['type'] != 0:
                    continue
                for line in block['lines']:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
                        if words_list[0] in text:
                            value['position'] = bbox
                            value['page'] = pno
                            value['words'] = words_list[0]
        return value

    def get_info(self):
        """
            block['type'] == 0 : 表示该元素为图片

        Returns:
            dict: Description
        """
        if len(self.pdf_info) > 0:
            # 取 Page 1 上的合同编号
            contract_no = self.get_contract_no(page_num='0')
            self.init_result['合同编号'] = contract_no

            # 粗略判断是否是 ‘车贷分离版本’ 的合同
            # is_cdfl = False
            # for block in self.pdf_info['0']['blocks']:
            #     if block['type'] != 0:
            #         continue
            #     for line in block['lines']:
            #         for span in line['spans']:
            #             bbox, text = span['bbox'], span['text']
            #             if '共同承租人：' in text:
            #                 is_cdfl = True

            # if is_cdfl == False:
                # 从第一页上取四个角色的姓名和证件号码
            name, id_num, representative = self.get_role_info(role_key='承租人：', page_num='0')

            if name["words"] == None:
                name, id_num, representative = self.get_role_info_3_3(role_key='承租人一：', page_num='0')
            self.init_result['承租人-姓名'] = name
            self.init_result['承租人-证件号码'] = id_num
            self.init_result['承租人-法定代表人或授权代表'] = representative

            name, id_num, representative = self.get_role_info(role_key='保证人1：', page_num='0')
            self.init_result['保证人1-姓名'] = name
            self.init_result['保证人1-证件号码'] = id_num
            self.init_result['保证人1-法定代表人或授权代表'] = representative
            # if条件判别 对应3_3版本
            # if name["words"] == None:
            #     name, id_num, representative = self.get_role_info_3_3(role_key='共同承租人：', page_num='0')
            #     self.init_result['共同承租人-姓名'] = name
            #     self.init_result['共同承租人-证件号码'] = id_num
            #     self.init_result['共同承租人-法定代表人或授权代表'] = representative

            name, id_num, representative = self.get_role_info(role_key='保证人2：', page_num='0')
            self.init_result['保证人2-姓名'] = name
            self.init_result['保证人2-证件号码'] = id_num
            self.init_result['保证人2-法定代表人或授权代表'] = representative
            # if条件判别 对应3_3版本
            # if name["words"] == None:
            #     name, id_num, representative = self.get_role_info_3_3(role_key='保证人1：', page_num='0')
            #     self.init_result['保证人2-姓名'] = name
            #     self.init_result['保证人2-证件号码'] = id_num
            #     self.init_result['保证人2-法定代表人或授权代表'] = representative

            name, id_num, representative = self.get_role_info(role_key='保证人3：', page_num='0')
            self.init_result['保证人3-姓名'] = name
            self.init_result['保证人3-证件号码'] = id_num
            self.init_result['保证人3-法定代表人或授权代表'] = representative
            # if name["words"] == None:
            #     name, id_num, representative = self.get_role_info_3_3(role_key='保证人2：', page_num='0')
            #     self.init_result['保证人3-姓名'] = name
            #     self.init_result['保证人3-证件号码'] = id_num
            #     self.init_result['保证人3-法定代表人或授权代表'] = representative
            # else:
            #     name, id_num, representative = self.get_role_info_3_3(role_key='承租人一：', page_num='0')
            #     self.init_result['承租人-姓名'] = name
            #     self.init_result['承租人-证件号码'] = id_num
            #     self.init_result['承租人-法定代表人或授权代表'] = representative

            #     name, id_num, representative = self.get_role_info_3_3(role_key='共同承租人：', page_num='0')
            #     self.init_result['共同承租人-姓名'] = name
            #     self.init_result['共同承租人-证件号码'] = id_num
            #     self.init_result['共同承租人-法定代表人或授权代表'] = representative

            #     name, id_num, representative = self.get_role_info_3_3(role_key='保证人1：', page_num='0')
            #     self.init_result['保证人1-姓名'] = name
            #     self.init_result['保证人1-证件号码'] = id_num
            #     self.init_result['保证人1-法定代表人或授权代表'] = representative

            #     name, id_num, representative = self.get_role_info_3_3(role_key='保证人2：', page_num='0')
            #     self.init_result['保证人2-姓名'] = name
            #     self.init_result['保证人2-证件号码'] = id_num
            #     self.init_result['保证人2-法定代表人或授权代表'] = representative

            # 在所有页面中找正文中（第二部分 融资租赁主要条款及付款计划）的那个编号，因为存在换行的情况所以暂时不带位置输出
            contract_no = self.get_contract_no_one()
            self.init_result['合同编号（正文）'] = contract_no
            # 找到车辆识别代码
            vin = self.get_key_value(key='车辆识别代码：')
            self.init_result['车辆识别代码'] = vin
            # 找到经销商(车辆卖方(经销商))
            seller = self.get_key_value(key='车辆卖方（经销商）：')
            if seller['words'] == None:
                seller = self.get_key_value(key='车辆卖方：')
            self.init_result['车辆卖方（经销商）'] = seller
            # 找到车辆代理商
            cldls = self.get_key_value(key='车辆代理商', page_num='4')
            self.init_result['车辆代理商'] = cldls
            # 找到 —— 车辆原始销售价格
            vehicle_price = self.get_key_value(key='车辆原始销售价格（《机动车销售统一发票》所列金额）：')
            self.init_result['车辆原始销售价格（《机动车销售统一发票》所列金额）'] = vehicle_price
            # 找车辆附加产品明细（表）
            table_add_product = self.get_table_add_product()
            self.init_result['车辆附加产品明细表'] = table_add_product
            # 找融资成本总额
            financing_cost = self.get_key_value(key='融资成本总额：')
            self.init_result['融资成本总额'] = financing_cost
            # 找租期
            lease_term = self.get_key_value(key='租期：')
            self.init_result['租期'] = lease_term
            # 找还款计划(表)
            repayment_schedule = self.get_repayment_schedule()
            self.init_result['付款计划表'] = repayment_schedule
            # 找承租人收款账户户名、银行账号、银行
            name = self.get_key_value(key='户名：', page_num='4')
            self.init_result['收款银行账户-户名'] = name
            account = self.get_key_value(key='银行账号：', page_num='4')
            self.init_result['收款银行账户-银行账号'] = account
            bank = self.get_key_value(key='开户银行：', page_num='4')
            self.init_result['收款银行账户-开户行'] = bank
            # 找承租人扣款账户户名、银行账号、银行
            name = self.get_key_value(key='户名：', page_num='5')
            self.init_result['银行账户-户名'] = name
            account = self.get_key_value(key='银行账号：', page_num='5')
            self.init_result['银行账户-银行账号'] = account
            bank = self.get_key_value(key='开户银行：', page_num='5')
            self.init_result['银行账户-开户行'] = bank

            # 找签字页上的系列信息
            # 承租人姓名、签章
            # if is_cdfl == False:
            name = self.get_key_value(key='承租人姓名：')
            electronic_signature = self.get_electronic_signature(top='承租人姓名：', bottom='保证人1姓名：', t_pno='5')

            if name["words"] == None:
                name = self.get_key_value(key='承租人一姓名：')
                electronic_signature = self.get_electronic_signature(top='承租人一姓名：', bottom='共同承租人名称：', t_pno='5')

            self.init_result['签字页-承租人姓名'] = name
            self.init_result['签字页-承租人签章'] = electronic_signature
            # 保证人1姓名、签章
            name = self.get_key_value(key='保证人1姓名：')
            electronic_signature = self.get_electronic_signature(top='保证人1姓名：', bottom='保证人2姓名：', t_pno='5')
            self.init_result['签字页-保证人1姓名'] = name
            self.init_result['签字页-保证人1签章'] = electronic_signature
            # 这里用的是 name["words"] == ""
            # if name["words"] == "":
            #     name = self.get_key_value(key='共同承租人名称：')
            #     electronic_signature = self.get_electronic_signature(top='共同承租人名称：', bottom='保证人1姓名：', t_pno='5')
            #     self.init_result['签字页-共同承租人姓名'] = name
            #     self.init_result['签字页-共同承租人签章'] = electronic_signature
            # 保证人2姓名、签章
            name = self.get_key_value(key='保证人2姓名：')
            electronic_signature = self.get_electronic_signature(top='保证人2姓名：', bottom='保证人3姓名：', t_pno='5')
            self.init_result['签字页-保证人2姓名'] = name
            self.init_result['签字页-保证人2签章'] = electronic_signature
            # if判断条件对应3_3版本
            # if name["words"] == "":
            #     name = self.get_key_value(key='保证人1姓名：')
            #     electronic_signature = self.get_electronic_signature(top='保证人1姓名：', bottom='保证人2姓名：', t_pno='5')
            #     self.init_result['签字页-保证人1姓名'] = name
            #     self.init_result['签字页-保证人1签章'] = electronic_signature
            # 保证人3姓名、签章
            name = self.get_key_value(key='保证人3姓名：')
            electronic_signature = self.get_electronic_signature(top='保证人3姓名：', bottom='日期：', t_pno='5')
            self.init_result['签字页-保证人3姓名'] = name
            self.init_result['签字页-保证人3签章'] = electronic_signature
            # if判断条件对应3_3版本
            # if name["words"] == None:
            #     name = self.get_key_value(key='保证人2姓名：')
            #     electronic_signature = self.get_electronic_signature(top='保证人2姓名：', bottom='日期：', t_pno='5')
            #     self.init_result['签字页-保证人2姓名'] = name
            #     self.init_result['签字页-保证人2签章'] = electronic_signature
            # else:
            #     name = self.get_key_value(key='承租人一姓名：')
            #     electronic_signature = self.get_electronic_signature(top='承租人一姓名：', bottom='共同承租人名称：', t_pno='5')
            #     self.init_result['签字页-承租人姓名'] = name
            #     self.init_result['签字页-承租人签章'] = electronic_signature

            #     name = self.get_key_value(key='共同承租人名称：')
            #     electronic_signature = self.get_electronic_signature(top='共同承租人名称：', bottom='保证人1姓名：', t_pno='5')
            #     self.init_result['签字页-共同承租人姓名'] = name
            #     self.init_result['签字页-共同承租人签章'] = electronic_signature

            #     name = self.get_key_value(key='保证人1姓名：')
            #     electronic_signature = self.get_electronic_signature(top='保证人1姓名：', bottom='保证人2姓名：', t_pno='5')
            #     self.init_result['签字页-保证人1姓名'] = name
            #     self.init_result['签字页-保证人1签章'] = electronic_signature

            #     name = self.get_key_value(key='保证人2姓名：')
            #     electronic_signature = self.get_electronic_signature(top='保证人2姓名：', bottom='保证人3姓名：', t_pno='5')
            #     self.init_result['签字页-保证人2姓名'] = name
            #     self.init_result['签字页-保证人2签章'] = electronic_signature

        return self.init_result

    def get_info_1(self):
        if len(self.pdf_info) > 0:
            contract_no = self.get_contract_no(page_num='0')
            self.init_result_1['合同编号'] = contract_no
            # 承租人姓名
            name = self.get_key_value(key='承租人：', page_num='0')
            self.init_result_1['承租人-姓名'] = name
            # 承租人证件号码
            _id = self.get_key_value(key='证件号码：', page_num='0')
            self.init_result_1['承租人-证件号码'] = _id
            # 销售经销商
            seller = self.get_key_value(key='销售经销商：', page_num='0')
            if seller['words'] == "":
                seller = self.get_value_by_findall('销售经销商：', '地址：', page_num='0')
            self.init_result_1['销售经销商'] = seller
            # 合同编号（正文）
            contract_no = self.get_contract_no_one()
            self.init_result_1['合同编号（正文）'] = contract_no
            # 签字页-承租人姓名
            name = self.get_key_value(key='姓名/名称：')
            self.init_result_1['签字页-承租人姓名'] = name
            # 签字页-承租人证件号码
            _id = self.get_key_value(key='自然人身份证件号码/法人执照号码：')
            self.init_result_1['签字页-承租人证件号码'] = _id
            # 签字页-承租人签章
            signature_role_1 = self.get_signature_role_1()
            self.init_result_1['签字页-承租人签章'] = signature_role_1
            # 签字页-销售经销商
            seller = self.get_key_value(key='销售经销商：')
            if seller['words'] == "":
                # 销售经销商：深圳市宝创汽车贸易有限公司南山分公司（请授权代表签字并请盖章）
                seller = self.get_value_by_findall('销售经销商：', '（请授权代表签字并请盖章）', page_num='3')
            self.init_result_1['签字页-销售经销商'] = seller
            # 经销商签章
            pass
        return self.init_result_1

    def get_info_2(self):
        if len(self.pdf_info) > 0:
            contract_no = self.get_contract_no_dy()
            self.init_result_2['合同编号'] = contract_no
            # 合同编号（正文）
            contract_no = self.get_contract_no_one()
            self.init_result_2['合同编号（正文）'] = contract_no
            # 抵押人姓名/名称
            name, _id = self.get_dyr_name_id()
            self.init_result_2['抵押人姓名/名称'] = name
            self.init_result_2['抵押人证件号码'] = _id
            # 抵押人配偶信息
            name, _id = self.get_dyrpo_name_id()
            self.init_result_2['抵押人配偶姓名/名称'] = name
            self.init_result_2['抵押人配偶证件号码'] = _id
            # 车辆识别代码
            vin = self.get_key_value(key='车辆识别代码：')
            self.init_result_2['车辆识别代码'] = vin
            # 租金总额
            rent = self.get_key_value_position(key='租金总额')
            self.init_result_2['租金总额'] = rent
            # 融资租赁期限
            lease_term = self.get_key_value_position(key='融资租赁期限')
            self.init_result_2['融资租赁期限'] = lease_term
            # 签字页抵押人姓名和签章
            name = self.get_key_value(key='抵押人姓名：')
            electronic_signature = self.get_electronic_signature(top='抵押权人盖章', bottom='抵押人配偶姓名：', t_pno='1')
            self.init_result_2['签字页-抵押人姓名'] = name
            self.init_result_2['签字页-抵押人签章'] = electronic_signature
            # 签字页抵押人配偶姓名和签章
            name = self.get_key_value(key='抵押人配偶姓名：')
            electronic_signature = self.get_electronic_signature(top='抵押人配偶姓名：', bottom='日期', t_pno='1')
            self.init_result_2['签字页-抵押人配偶姓名'] = name
            self.init_result_2['签字页-抵押人配偶签章'] = electronic_signature
        return self.init_result_2