# -*- coding: utf-8 -*-
# @Author        : lk
# @Email         : 9428.al@gmail.com
# @Create Date   : 2021-07-20 16:42:41
# @Last Modified : 2021-10-28 17:41:00
# @Description   : 

import re
import numpy as np
from fuzzywuzzy import fuzz


class Finder:
    def __init__(self, pdf_info):
        self.pdf_info = pdf_info
        self.item = {"words": None,
                     "page": None,
                     "position": None,
                     }
        # 格式化算法输出
        self.init_result = {"合同编号": self.item,
                            "承租人-姓名": self.item,
                            "承租人-证件号码": self.item,
                            "承租人-法定代表人或授权代表": self.item,
                            "共同承租人-姓名": self.item,
                            "共同承租人-证件号码": self.item,
                            "共同承租人-法定代表人或授权代表": self.item,
                            "保证人1-姓名": self.item,
                            "保证人1-证件号码": self.item,
                            "保证人1-法定代表人或授权代表": self.item,
                            "保证人2-姓名": self.item,
                            "保证人2-证件号码": self.item,
                            "保证人2-法定代表人或授权代表": self.item,
                            "保证人3-姓名": self.item,
                            "保证人3-证件号码": self.item,
                            "保证人3-法定代表人或授权代表": self.item,
                            "合同编号（正文）": self.item,
                            "车辆识别代码": self.item,
                            "车辆卖方（经销商）": self.item,
                            "车辆原始销售价格（《机动车销售统一发票》所列金额）": self.item,
                            "车辆附加产品明细表": self.item,
                            "融资成本总额": self.item,
                            "租期": self.item,
                            "付款计划表": self.item,
                            "银行账户-户名": self.item,
                            "银行账户-银行账号": self.item,
                            "银行账户-开户行": self.item,
                            "签字页-承租人姓名": self.item,
                            "签字页-承租人签章": self.item,
                            "签字页-共同承租人姓名": self.item,
                            "签字页-共同承租人签章": self.item,
                            "签字页-保证人1姓名": self.item,
                            "签字页-保证人1签章": self.item,
                            "签字页-保证人2姓名": self.item,
                            "签字页-保证人2签章": self.item,
                            "签字页-保证人3姓名": self.item,
                            "签字页-保证人3签章": self.item,
                            }
        # 格式化输出 车辆处置协议 要是别的字段
        self.init_result_1 = {"合同编号": self.item,
                              "承租人-姓名": self.item,
                              "承租人-证件号码": self.item,
                              "销售经销商": self.item,
                              "合同编号（正文）": self.item,
                              "签字页-承租人姓名": self.item,
                              "签字页-承租人证件号码": self.item,
                              "签字页-承租人签章": self.item,
                              "签字页-销售经销商": self.item,
                              "签字页-销售经销商签章": self.item,
                              }
        # 格式化输出 车辆租赁抵押合同
        self.init_result_2 = {"合同编号": self.item,
                              "合同编号（正文）": self.item,
                              "抵押人姓名/名称": self.item,
                              "抵押人证件号码": self.item,
                              "抵押人配偶姓名/名称": self.item,
                              "抵押人配偶证件号码": self.item,
                              "车辆识别代码": self.item,
                              "租金总额": self.item,
                              "融资租赁期限": self.item,
                              "签字页-抵押人姓名": self.item,
                              "签字页-抵押人签章": self.item,
                              "签字页-抵押人配偶姓名": self.item,
                              "签字页-抵押人配偶签章": self.item,
                              }

    def get_contract_no(self, page_num):
        """传入页码,查看该页码右上角的编号

        Args:
            page_num (string):

        Returns:
            sting:
        """
        contract_no = self.item.copy()
        # 只看第一页
        for block in self.pdf_info[page_num]['blocks']:
            if block['type'] != 0:
                continue
            for line in block['lines']:
                for span in line['spans']:
                    bbox, text = span['bbox'], span['text']
                    if '合同编号：' in text:
                        words = text.split('：')[-1]
                        contract_no['position'] = bbox
                        contract_no['page'] = page_num
                        contract_no['words'] = words
        if contract_no['words'] == '':
            for block in self.pdf_info[page_num]['blocks']:
                if block['type'] != 0:
                    continue
                for line in block['lines']:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
                        if bbox[1] < contract_no['position'][3] and 'CH' in text:
                            contract_no['position'] = bbox
                            contract_no['page'] = page_num
                            contract_no['words'] = text
        return contract_no

    def get_vehicle_price(self, page_num='0'):
        vehicle_price = self.item.copy()
        for block in self.pdf_info[page_num]['blocks']:
            if block['type'] != 0:
                continue
            for line in block['lines']:
                for span in line['spans']:
                    bbox, text = span['bbox'], span['text']
                    if '所购车辆价格为人民币' in text:
                        words = text.split('币')[-1]
                        vehicle_price['position'] = bbox
                        vehicle_price['words'] = words
        return vehicle_price

    def get_contract_no_one(self):
        # 查找正文中的合同编号,有可能存在换行的情况
        contract_no = self.item.copy()
        for pno in self.pdf_info:
            all_text = ''
            for block in self.pdf_info[pno]['blocks']:
                if block['type'] != 0:
                    continue
                for line in block['lines']:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
                        all_text += text
            all_text = all_text.replace(' ', '')
            matchObj = re.search(r'（合同编号：\[(.*?)\]）', all_text)
            if matchObj:
                words = matchObj.group(1)
                contract_no['position'] = None
                contract_no['page'] = pno
                # contract_no['words'] = words
                contract_no['words'] = re.sub("\s", "", words).replace("）", "")
                return contract_no
            matchObj = re.search(r'编号为(.*?)的', all_text)
            if matchObj:
                words = matchObj.group(1).strip()
                contract_no['position'] = None
                contract_no['page'] = pno
                # contract_no['words'] = words
                contract_no['words'] = re.sub("\s", "", words).replace("）", "")
                return contract_no
            matchObj = re.search(r'编号为(.*?)）的', all_text)
            if matchObj:
                words = matchObj.group(1).strip()
                contract_no['position'] = None
                contract_no['page'] = pno
                # contract_no['words'] = words
                contract_no['words'] = re.sub("\s", "", words)
        return contract_no

    def get_key_value(self, key, page_num=None):
        value = self.item.copy()
        if page_num is not None:
            pno = page_num
            for block in self.pdf_info[pno]['blocks']:
                if block['type'] != 0:
                    continue
                for line in block['lines']:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
                        if key in text:
                            words = text.split('：')[-1].replace("。", "")
                            value['position'] = bbox
                            value['page'] = pno
                            # value['words'] = words
                            value['words'] = re.sub("\s", "", words)
        else:
            for pno in self.pdf_info:
                for block in self.pdf_info[pno]['blocks']:
                    if block['type'] != 0:
                        continue
                    for line in block['lines']:
                        for span in line['spans']:
                            bbox, text = span['bbox'], span['text']
                            if key in text:
                                # print(self.pdf_info[pno])
                                words = text.split('：')[-1].replace("。", "")
                                value['position'] = bbox
                                value['page'] = pno
                                # value['words'] = words
                                value['words'] = re.sub("\s", "", words)
        return value

    def get_loan_principal(self, page_num='0'):
        chinese_keywords = ['壹', '贰', '叁', '肆', '伍', '陆', '柒', '捌', '玖', '拾',
                            '佰', '仟', '万', '亿', '元', '角', '分', '零', '整']
        upper = self.item.copy()
        lower = self.item.copy()
        asp_1 = self.item.copy()
        asp_2 = self.item.copy()
        anchor_bbox = None
        for block in self.pdf_info[page_num]['blocks']:
            if block['type'] != 0:
                continue
            for line in block['lines']:
                for span in line['spans']:
                    bbox, text = span['bbox'], span['text']
                    if fuzz.ratio(''.join(chinese_keywords), text) > 15:
                        text = text.split('：')[-1].strip()
                        upper['position'] = bbox
                        upper['words'] = text
                    if '小写：¥' in text:
                        words = text.split('¥')[-1].strip()
                        lower['position'] = bbox
                        lower['words'] = words
                    if '附加产品融资贷款本金总金额' == text:
                        anchor_bbox = bbox
        if anchor_bbox:
            for block in self.pdf_info[page_num]['blocks']:
                if block['type'] != 0:
                    continue
                for line in block['lines']:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
                        if np.mean(bbox[1::2]) < np.mean(anchor_bbox[1::2]) and '人民币：小写：' in text:
                            words = re.findall(r'人民币：小写：\[(.*)\]', text)[0]
                            asp_1['position'] = bbox
                            asp_1['words'] = words
                        if np.mean(bbox[1::2]) > np.mean(anchor_bbox[1::2]) and '人民币：小写：' in text:
                            words = re.findall(r'人民币：小写：\[(.*)\]', text)[0]
                            asp_2['position'] = bbox
                            asp_2['words'] = words
        return upper, lower, asp_1, asp_2

    def get_loan_term(self, page_num='0'):
        loan_term = self.item.copy()
        all_text = ''
        for block in self.pdf_info[page_num]['blocks']:
            if block['type'] != 0:
                continue
            for line in block['lines']:
                for span in line['spans']:
                    bbox, text = span['bbox'], span['text']
                    all_text += text
        matchs = re.search(r'贷款期限(\d+)个月', all_text)
        if matchs:
            words = matchs.group(1)
            for block in self.pdf_info[page_num]['blocks']:
                if block['type'] != 0:
                    continue
                for line in block['lines']:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
                        if f'{words}个月' in text:
                            loan_term['position'] = bbox
                            loan_term['words'] = words
        return loan_term

    def get_asp_details(self, page_num):
        asp_details_table_term = self.item.copy()
        asp_details_table = []
        asp_details_text_list = []
        table = False
        for block in self.pdf_info[page_num]['blocks']:
            if block['type'] != 0:
                continue
            for line in block['lines']:
                for span in line['spans']:
                    bbox, text = span['bbox'], span['text']
                    if '附加产品融资贷款本金总金额明细' == text:
                        table = True
                    if '第二条' in text or '征信管理' in text:
                        table = False
                    if table == True:
                        asp_details_text_list.append(text)
        for i in range((len(asp_details_text_list) + 2) // 3):
            line = []
            if i == 0:
                line = [asp_details_text_list[0]]
            else:
                for j in range(3):
                    line.append(asp_details_text_list[i * 3 - 2 + j])
            asp_details_table.append(line)
        if len(asp_details_table) > 0:
            asp_details_table_term['words'] = asp_details_table
        return asp_details_table_term

    def get_signature(self):
        signature = self.item.copy()
        for block in self.pdf_info['0']['blocks']:
            if block['type'] != 0:
                continue
            for line in block['lines']:
                for span in line['spans']:
                    bbox, text = span['bbox'], span['text']
                    if '签署日期' in text:
                        words = text
                        signature['words'] = words
                        signature['position'] = bbox
        return signature

    def get_somebody(self, top, bottom):
        # 指定上下边界后,返回上下边界内的客户信息
        _name = self.item.copy()
        _id = self.item.copy()
        # 只看第一页，先划定上下边界
        y_top = 0
        y_bottom = 0
        for block in self.pdf_info['1']['blocks']:
            if block['type'] != 0:
                continue
            for line in block['lines']:
                for span in line['spans']:
                    bbox, text = span['bbox'], span['text']
                    if top in text:
                        y_top = bbox[3]
                    if bottom in text:
                        y_bottom = bbox[3]
        for block in self.pdf_info['1']['blocks']:
            if block['type'] != 0:
                continue
            for line in block['lines']:
                for span in line['spans']:
                    bbox, text = span['bbox'], span['text']
                    if y_top < bbox[3] < y_bottom:
                        if '姓名/名称' in text:
                            words = text.split('：')[-1]
                            _name['position'] = bbox
                            _name['words'] = words
                        if '自然人身份证件号码/法人执照号码' in text:
                            words = text.split('：')[-1]
                            _id['position'] = bbox
                            _id['words'] = words
        return _name, _id

    def get_seller(self):
        seller = self.item.copy()
        # 先找到 key
        anchor_bbox = None
        for block in self.pdf_info['1']['blocks']:
            if block['type'] != 0:
                continue
            for line in block['lines']:
                for span in line['spans']:
                    bbox, text = span['bbox'], span['text']
                    if '经销商' == text:
                        anchor_bbox = bbox
        # 当找到了 key, 则根据 key 去匹配 value
        if anchor_bbox:
            half_width = self.pdf_info['1']['width'] * 0.5
            for block in self.pdf_info['1']['blocks']:
                if block['type'] != 0:
                    continue
                for line in block['lines']:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
                        if anchor_bbox[2] < np.mean(bbox[::2]) < half_width and \
                                anchor_bbox[1] < np.mean(bbox[1::2]) < anchor_bbox[3]:
                            seller['position'] = bbox
                            seller['words'] = text
        return seller

    def get_payback_account(self):
        account = self.item.copy()
        account_name = self.item.copy()
        account_bank = self.item.copy()
        all_text = ''
        for block in self.pdf_info['1']['blocks']:
            if block['type'] != 0:
                continue
            for line in block['lines']:
                for span in line['spans']:
                    bbox, text = span['bbox'], span['text']
                    all_text += text
        # 首先确定账户信息是哪种,我们只输出非另行通知的格式
        if '☑账号' in all_text:
            all_text = all_text.replace('　', '')
            matchs_1 = re.findall(r'账号：(.*)户名', all_text)
            if matchs_1:
                words = matchs_1[0]
                for block in self.pdf_info['1']['blocks']:
                    if block['type'] != 0:
                        continue
                    for line in block['lines']:
                        for span in line['spans']:
                            bbox, text = span['bbox'], span['text']
                            if f'{words}' in text:
                                account['position'] = bbox
                                account['words'] = words
            matchs_2 = re.findall(r'户名：(.*)开户行', all_text)
            if matchs_2:
                words = matchs_2[0]
                for block in self.pdf_info['1']['blocks']:
                    if block['type'] != 0:
                        continue
                    for line in block['lines']:
                        for span in line['spans']:
                            bbox, text = span['bbox'], span['text']
                            if f'{words}' in text:
                                account_name['position'] = bbox
                                account_name['words'] = words
            matchs_3 = re.findall(r'开户行：(.*)；', all_text)
            if matchs_3:
                words = matchs_3[0]
                for block in self.pdf_info['1']['blocks']:
                    if block['type'] != 0:
                        continue
                    for line in block['lines']:
                        for span in line['spans']:
                            bbox, text = span['bbox'], span['text']
                            if f'开户行：{words}；' in text.replace('　', ''):
                                account_bank['position'] = bbox
                                account_bank['words'] = words
        return account, account_name, account_bank

    def get_repayment_schedule(self):
        repayment_schedule = self.item.copy()
        repayment_schedule_text_list = []
        table = False
        page = None
        left = 0
        right = 0
        for pno in self.pdf_info:
            for block in self.pdf_info[pno]['blocks']:
                if block['type'] != 0:
                    continue
                for line in block['lines']:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
                        if '剩余融资' in text:
                            right = bbox[2]
                        if '以上表格中所列序号' in text:
                            table = False
                        if table == True:
                            # 过滤汉字
                            if re.compile(r'[\u4e00-\u9fff]').search(text):
                                continue
                            # 过滤 1. - 61. 这些标题
                            if re.findall("\d+", text):
                                if len(re.findall("\d+", text)) == 1:
                                    continue
                            if not left < bbox[0] < right:
                                continue
                            repayment_schedule_text_list.append(text)
                        if text.strip() == "61.":
                            page = pno
                            table = True
                            left = bbox[0]
        # print("repayment_schedule_text_list = ", repayment_schedule_text_list)
        # repayment_schedule_table = [['序号', '融资租赁成本', '融资租赁费用', '租金', '剩余融资租赁成本']]
        repayment_schedule_table = [['序号', '租金']]
        for i in range(len(repayment_schedule_text_list) // 4):
            line = [f'{i + 1}.']
            # 4表示4列的意思
            for j in range(4):
                line.append(repayment_schedule_text_list[i * 4 + j])
            # 只保留序号和租金列
            line = [line[0].replace('.', ''), line[3]]
            repayment_schedule_table.append(line)
        repayment_schedule['words'] = repayment_schedule_table
        repayment_schedule['page'] = page
        return repayment_schedule

    def get_signature_role_1(self):
        signature_role_1 = self.item.copy()
        for pno in self.pdf_info:
            for block in self.pdf_info[pno]['blocks']:
                if block['type'] != 0:
                    continue
                for line in block['lines']:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
                        if '签署日期' in text:
                            signature_role_1['position'] = bbox
                            signature_role_1['page'] = pno
                            signature_role_1['words'] = text
        return signature_role_1

    def get_signature_role_2(self):
        signature_role_2 = self.init_item.copy()
        # 先定位签字区域
        texts = []
        boxes = []
        page_num = None
        position = None
        words = None
        region = False
        for i in list(self.pdf_info.keys()):
            for block in self.pdf_info[i]['blocks']:
                if block['type'] != 0:
                    continue
                for line in block['lines']:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
                        if '共同借款人(共同抵押人)' in text:
                            region = True
                        if '日期' in text:
                            region = False
                        if region == True:
                            page_num = i
                            texts.append(text)
                            boxes.append(bbox)
        if len(texts) > 4:
            words = '有'
        else:
            words = '无'
        boxes = np.array(boxes).reshape((-1, 2))
        position = [min(boxes[:, 0]), min(boxes[:, 1]), max(boxes[:, 0]), max(boxes[:, 1])]
        signature_role_2['page_num'] = page_num
        signature_role_2['position'] = position
        signature_role_2['words'] = words
        return signature_role_2

    def get_signature_role_3(self):
        signature_role_3 = self.init_item.copy()
        # 先定位签字区域
        texts = []
        boxes = []
        page_num = None
        position = None
        words = None
        region = False
        for i in list(self.pdf_info.keys()):
            for block in self.pdf_info[i]['blocks']:
                if block['type'] != 0:
                    continue
                for line in block['lines']:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
                        if '保证人1' in text and int(i) != 0:
                            region = True
                        if '日期' in text:
                            region = False
                        if region == True:
                            page_num = i
                            texts.append(text)
                            boxes.append(bbox)
        if len(texts) > 4:
            words = '有'
        else:
            words = '无'
        boxes = np.array(boxes).reshape((-1, 2))
        position = [min(boxes[:, 0]), min(boxes[:, 1]), max(boxes[:, 0]), max(boxes[:, 1])]
        signature_role_3['page_num'] = page_num
        signature_role_3['position'] = position
        signature_role_3['words'] = words
        return signature_role_3

    def get_signature_role_4(self):
        signature_role_4 = self.init_item.copy()
        # 先定位签字区域
        texts = []
        boxes = []
        page_num = None
        position = None
        words = None
        region = False
        for i in list(self.pdf_info.keys()):
            for block in self.pdf_info[i]['blocks']:
                if block['type'] != 0:
                    continue
                for line in block['lines']:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
                        if '保证人2' in text and int(i) != 0:
                            region = True
                        if '日期' in text:
                            region = False
                        if region == True:
                            page_num = i
                            texts.append(text)
                            boxes.append(bbox)
        if len(texts) > 4:
            words = '有'
        else:
            words = '无'
        boxes = np.array(boxes).reshape((-1, 2))
        position = [min(boxes[:, 0]), min(boxes[:, 1]), max(boxes[:, 0]), max(boxes[:, 1])]
        signature_role_4['page_num'] = page_num
        signature_role_4['position'] = position
        signature_role_4['words'] = words
        return signature_role_4

    def get_signature_role_5(self):
        signature_role_5 = self.init_item.copy()
        # 先定位签字区域
        texts = []
        boxes = []
        page_num = None
        position = None
        words = None
        region = False
        for i in list(self.pdf_info.keys()):
            for block in self.pdf_info[i]['blocks']:
                if block['type'] != 0:
                    continue
                for line in block['lines']:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
                        if '见证人签字' in text and int(i) != 0:
                            region = True
                        if '年' in text:
                            region = False
                        if region == True:
                            page_num = i
                            texts.append(text)
                            boxes.append(bbox)
        print(texts)
        if len(texts) > 4:
            words = '有'
        else:
            words = '无'
        boxes = np.array(boxes).reshape((-1, 2))
        position = [min(boxes[:, 0]), min(boxes[:, 1]), max(boxes[:, 0]), max(boxes[:, 1])]
        signature_role_5['page_num'] = page_num
        signature_role_5['position'] = position
        signature_role_5['words'] = words
        return signature_role_5

    def get_last_page_signature(self, page_num, top, bottom):
        signature_name = self.item.copy()
        signature_date = self.item.copy()
        anchor_top = None
        anchor_bottom = None
        for block in self.pdf_info[page_num]['blocks']:
            if block['type'] != 0:
                continue
            for line in block['lines']:
                for span in line['spans']:
                    bbox, text = span['bbox'], span['text']
                    if top in text:
                        anchor_top = bbox[1]
                    if bottom in text:
                        anchor_bottom = bbox[1]
        if anchor_top is not None and anchor_bottom is not None:
            for block in self.pdf_info[page_num]['blocks']:
                if block['type'] != 0:
                    continue
                for line in block['lines']:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
                        if '签署日期' in text and int(anchor_top) < np.mean(bbox[1::2]) < int(anchor_bottom):
                            name = text.split(' ')[0]
                            date = text.split(':')[-1]
                            signature_name['words'] = name
                            signature_name['position'] = bbox
                            signature_date['words'] = date
                            signature_name['position'] = bbox
        return signature_name, signature_date

    def get_electronic_signature(self, top, bottom):
        signature = self.item.copy()
        anchor_top = None
        anchor_bottom = None
        for pno in self.pdf_info:
            for block in self.pdf_info[pno]['blocks']:
                if block['type'] != 0:
                    continue
                for line in block['lines']:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
                        if top in text:
                            anchor_top = bbox[1]
                        if bottom in text:
                            anchor_bottom = bbox[3]
        if anchor_top is not None and anchor_bottom is not None:
            for pno in self.pdf_info:
                for block in self.pdf_info[pno]['blocks']:
                    if block['type'] != 0:
                        continue
                    for line in block['lines']:
                        for span in line['spans']:
                            bbox, text = span['bbox'], span['text']
                            # ------------ #
                            # print("--text = ", text)
                            if '签署日期' in text and int(anchor_top) < np.mean(bbox[1::2]) < int(anchor_bottom):
                                words = text
                                signature['words'] = words
                                signature['page'] = pno
                                signature['position'] = bbox
        return signature

    def get_role_info(self, role_key, page_num='0'):
        name = self.item.copy()
        id_num = self.item.copy()
        representative = self.item.copy()
        # 以保证人3 的左上角为定位点
        anchor = None
        for block in self.pdf_info[page_num]['blocks']:
            if block['type'] != 0:
                continue
            for line in block['lines']:
                for span in line['spans']:
                    bbox, text = span['bbox'], span['text']
                    # 找到角色姓名
                    if re.match('保证人3', text) is not None:
                        anchor = [bbox[0], bbox[1]]
        if anchor is not None:
            for block in self.pdf_info[page_num]['blocks']:
                if block['type'] != 0:
                    continue
                for line in block['lines']:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
                        # 找到角色姓名
                        if re.match(role_key, text) is not None:
                            words = text.split('：')[-1]
                            name['words'] = words
                            name['page'] = page_num
                            name['position'] = bbox
                        if role_key == '承租人：':
                            # 找到证件号码且确定位置
                            if re.match('证件号码：', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(
                                    bbox[1::2]) < anchor[1]:
                                words = text.split('：')[-1]
                                id_num['words'] = words
                                id_num['page'] = page_num
                                id_num['position'] = bbox
                            # 找到法人代表且确定位置
                            if re.match('法定代表人或授权代表：', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(
                                    bbox[1::2]) < anchor[1]:
                                words = text.split('：')[-1]
                                representative['words'] = words
                                representative['page'] = page_num
                                representative['position'] = bbox
                        if role_key == '保证人1：':
                            # 找到证件号码且确定位置
                            if re.match('证件号码：', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(
                                    bbox[1::2]) > anchor[1]:
                                words = text.split('：')[-1]
                                id_num['words'] = words
                                id_num['page'] = page_num
                                id_num['position'] = bbox
                            # 找到法人代表且确定位置
                            if re.match('法定代表人或授权代表：', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(
                                    bbox[1::2]) > anchor[1]:
                                words = text.split('：')[-1]
                                representative['words'] = words
                                representative['page'] = page_num
                                representative['position'] = bbox
                        if role_key == '保证人2：':
                            # 找到证件号码且确定位置
                            if re.match('证件号码：', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(
                                    bbox[1::2]) < anchor[1]:
                                words = text.split('：')[-1]
                                id_num['words'] = words
                                id_num['page'] = page_num
                                id_num['position'] = bbox
                            # 找到法人代表且确定位置
                            if re.match('法定代表人或授权代表：', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(
                                    bbox[1::2]) < anchor[1]:
                                words = text.split('：')[-1]
                                representative['words'] = words
                                representative['page'] = page_num
                                representative['position'] = bbox
                        if role_key == '保证人3：':
                            # 找到证件号码且确定位置
                            if re.match('证件号码：', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(
                                    bbox[1::2]) > anchor[1]:
                                words = text.split('：')[-1]
                                id_num['words'] = words
                                id_num['page'] = page_num
                                id_num['position'] = bbox
                            # 找到法人代表且确定位置
                            if re.match('法定代表人或授权代表：', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(
                                    bbox[1::2]) > anchor[1]:
                                words = text.split('：')[-1]
                                representative['words'] = words
                                representative['page'] = page_num
                                representative['position'] = bbox
        return name, id_num, representative

    def get_table_add_product(self):
        table_add_product = self.item.copy()
        items = []
        start = False
        page = None
        greater_equal_v35 = False
        for pno in self.pdf_info:
            condition = False
            for block in self.pdf_info[f'{pno}']['blocks']:
                if block['type'] != 0:
                    continue
                for line in block['lines']:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
                        if text == '租赁利率':
                            greater_equal_v35 = True
                        if '总计' in text:
                            start = True
                        if '注：出租人向承租人购买租赁车辆的对价' in text:
                            page = pno
                            start = False
                        if start == True:
                            items.append(text)
        lines = [['项目', '购买价格', '实际融资金额']]
        if greater_equal_v35:
            for i in range(len(items) // 4):
                line = [items[2 + i * 4 + 0], items[2 + i * 4 + 1], items[2 + i * 4 + 2]]
                lines.append(line)
        else:
            for i in range(len(items) // 3):
                line = [items[2 + i * 3 + 0], items[2 + i * 3 + 1], items[2 + i * 3 + 2]]
                lines.append(line)
        if len(items) > 0:
            lines.append([items[0], '', items[1]])

        table_add_product['words'] = lines
        table_add_product['page'] = page
        table_add_product['position'] = None
        return table_add_product

    def get_contract_no_dy(self):
        # 查找抵押合同编号
        contract_no = self.item.copy()
        key_box = None
        for pno in self.pdf_info:
            for block in self.pdf_info[pno]['blocks']:
                if block['type'] != 0:
                    continue
                for line in block['lines']:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
                        if '抵押合同编号' in text:
                            key_box = bbox
        if key_box is not None:
            for pno in self.pdf_info:
                for block in self.pdf_info[pno]['blocks']:
                    if block['type'] != 0:
                        continue
                    for line in block['lines']:
                        for span in line['spans']:
                            bbox, text = span['bbox'], span['text']
                            if key_box[1] < np.mean(bbox[1::2]) < key_box[3] and 'CH-' in text:
                                contract_no['position'] = bbox
                                contract_no['page'] = pno
                                contract_no['words'] = text
        return contract_no

    def get_dyr_name_id(self):
        name = self.item.copy()
        _id = self.item.copy()
        key_box = None
        for pno in self.pdf_info:
            for block in self.pdf_info[pno]['blocks']:
                if block['type'] != 0:
                    continue
                for line in block['lines']:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
                        if text == '抵押人':
                            key_box = bbox

        if key_box is not None:
            rh = abs(key_box[1] - key_box[3])
            for pno in self.pdf_info:
                for block in self.pdf_info[pno]['blocks']:
                    if block['type'] != 0:
                        continue
                    for line in block['lines']:
                        for span in line['spans']:
                            bbox, text = span['bbox'], span['text']
                            if key_box[1] < np.mean(bbox[1::2]) < key_box[3] + rh * 3 and '姓名' in text:
                                words = text.split('：')[-1]
                                name['position'] = bbox
                                name['page'] = pno
                                name['words'] = words
                            if key_box[1] < np.mean(bbox[1::2]) < key_box[3] + rh * 3 and '证件号码' in text:
                                words = text.split('：')[-1]
                                _id['position'] = bbox
                                _id['page'] = pno
                                _id['words'] = words
        return name, _id

    def get_dyrpo_name_id(self):
        name = self.item.copy()
        _id = self.item.copy()
        key_box = None
        for pno in self.pdf_info:
            for block in self.pdf_info[pno]['blocks']:
                if block['type'] != 0:
                    continue
                for line in block['lines']:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
                        if text == '抵押人配偶(如适':
                            key_box = bbox
        if key_box is not None:
            rh = abs(key_box[1] - key_box[3])
            for pno in self.pdf_info:
                for block in self.pdf_info[pno]['blocks']:
                    if block['type'] != 0:
                        continue
                    for line in block['lines']:
                        for span in line['spans']:
                            bbox, text = span['bbox'], span['text']
                            if key_box[1] < np.mean(bbox[1::2]) < key_box[3] + rh * 3 and '姓名' in text:
                                words = text.split('：')[-1]
                                name['position'] = bbox
                                name['page'] = pno
                                name['words'] = words
                            if key_box[1] < np.mean(bbox[1::2]) < key_box[3] + rh * 3 and '证件号码' in text:
                                words = text.split('：')[-1]
                                _id['position'] = bbox
                                _id['page'] = pno
                                _id['words'] = words
        return name, _id

    def get_key_value_position(self, key):
        value = self.item.copy()
        key_box = None
        for pno in self.pdf_info:
            for block in self.pdf_info[pno]['blocks']:
                if block['type'] != 0:
                    continue
                for line in block['lines']:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
                        if text == key:
                            key_box = bbox
        if key_box is not None:
            rh = abs(key_box[1] - key_box[3])
            for pno in self.pdf_info:
                for block in self.pdf_info[pno]['blocks']:
                    if block['type'] != 0:
                        continue
                    for line in block['lines']:
                        for span in line['spans']:
                            bbox, text = span['bbox'], span['text']
                            if key_box[1] < np.mean(bbox[1::2]) < key_box[3] and key_box[0] < bbox[0] and abs(
                                    key_box[2] - bbox[0]) < rh * 10:
                                words = text
                                value['position'] = bbox
                                value['page'] = pno
                                value['words'] = words
        return value

    def get_role_info_3_3(self, role_key, page_num='0'):
        name = self.item.copy()
        id_num = self.item.copy()
        representative = self.item.copy()
        # 以保证人2 的左上角为定位点
        anchor = None
        for block in self.pdf_info[page_num]['blocks']:
            if block['type'] != 0:
                continue
            for line in block['lines']:
                for span in line['spans']:
                    bbox, text = span['bbox'], span['text']
                    # 找到角色姓名
                    if re.match('保证人2', text) is not None:
                        anchor = [bbox[0], bbox[1]]
        if anchor is not None:
            for block in self.pdf_info[page_num]['blocks']:
                if block['type'] != 0:
                    continue
                for line in block['lines']:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
                        # 找到角色姓名
                        if re.match(role_key, text) is not None:
                            words = text.split('：')[-1]
                            name['words'] = words
                            name['page'] = page_num
                            name['position'] = bbox
                        if role_key == '承租人一：':
                            # 找到证件号码且确定位置
                            if re.match('证件号码：', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(
                                    bbox[1::2]) < anchor[1]:
                                words = text.split('：')[-1]
                                id_num['words'] = words
                                id_num['page'] = page_num
                                id_num['position'] = bbox
                            # 找到法人代表且确定位置
                            if re.match('法定代表人或授权代表：', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(
                                    bbox[1::2]) < anchor[1]:
                                words = text.split('：')[-1]
                                representative['words'] = words
                                representative['page'] = page_num
                                representative['position'] = bbox
                        if role_key == '共同承租人：':
                            # 找到证件号码且确定位置
                            if re.match('证件号码：', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(
                                    bbox[1::2]) > anchor[1]:
                                words = text.split('：')[-1]
                                id_num['words'] = words
                                id_num['page'] = page_num
                                id_num['position'] = bbox
                            # 找到法人代表且确定位置
                            if re.match('法定代表人或授权代表：', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(
                                    bbox[1::2]) > anchor[1]:
                                words = text.split('：')[-1]
                                representative['words'] = words
                                representative['page'] = page_num
                                representative['position'] = bbox
                        if role_key == '保证人1：':
                            # 找到证件号码且确定位置
                            if re.match('证件号码：', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(
                                    bbox[1::2]) < anchor[1]:
                                words = text.split('：')[-1]
                                id_num['words'] = words
                                id_num['page'] = page_num
                                id_num['position'] = bbox
                            # 找到法人代表且确定位置
                            if re.match('法定代表人或授权代表：', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(
                                    bbox[1::2]) < anchor[1]:
                                words = text.split('：')[-1]
                                representative['words'] = words
                                representative['page'] = page_num
                                representative['position'] = bbox
                        if role_key == '保证人2：':
                            # 找到证件号码且确定位置
                            if re.match('证件号码：', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(
                                    bbox[1::2]) > anchor[1]:
                                words = text.split('：')[-1]
                                id_num['words'] = words
                                id_num['page'] = page_num
                                id_num['position'] = bbox
                            # 找到法人代表且确定位置
                            if re.match('法定代表人或授权代表：', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(
                                    bbox[1::2]) > anchor[1]:
                                words = text.split('：')[-1]
                                representative['words'] = words
                                representative['page'] = page_num
                                representative['position'] = bbox
        return name, id_num, representative

    def get_info(self):
        """
            block['type'] == 0 : 表示该元素为图片

        Returns:
            dict: Description
        """
        if len(self.pdf_info) > 0:
            # 取 Page 1 上的合同编号
            contract_no = self.get_contract_no(page_num='0')
            self.init_result['合同编号'] = contract_no
            # 从第一页上取四个角色的姓名和证件号码
            name, id_num, representative = self.get_role_info(role_key='承租人：', page_num='0')
            if name["words"] == None:
                name, id_num, representative = self.get_role_info_3_3(role_key='承租人一：', page_num='0')
            self.init_result['承租人-姓名'] = name
            self.init_result['承租人-证件号码'] = id_num
            self.init_result['承租人-法定代表人或授权代表'] = representative
            name, id_num, representative = self.get_role_info(role_key='保证人1：', page_num='0')
            self.init_result['保证人1-姓名'] = name
            self.init_result['保证人1-证件号码'] = id_num
            self.init_result['保证人1-法定代表人或授权代表'] = representative
            # if条件判别 对应3_3版本
            if name["words"] == None:
                name, id_num, representative = self.get_role_info_3_3(role_key='共同承租人：', page_num='0')
                self.init_result['共同承租人-姓名'] = name
                self.init_result['共同承租人-证件号码'] = id_num
                self.init_result['共同承租人-法定代表人或授权代表'] = representative
            name, id_num, representative = self.get_role_info(role_key='保证人2：', page_num='0')
            self.init_result['保证人2-姓名'] = name
            self.init_result['保证人2-证件号码'] = id_num
            self.init_result['保证人2-法定代表人或授权代表'] = representative
            # if条件判别 对应3_3版本
            if name["words"] == None:
                name, id_num, representative = self.get_role_info_3_3(role_key='保证人1：', page_num='0')
                self.init_result['保证人2-姓名'] = name
                self.init_result['保证人2-证件号码'] = id_num
                self.init_result['保证人2-法定代表人或授权代表'] = representative
            name, id_num, representative = self.get_role_info(role_key='保证人3：', page_num='0')
            self.init_result['保证人3-姓名'] = name
            self.init_result['保证人3-证件号码'] = id_num
            self.init_result['保证人3-法定代表人或授权代表'] = representative
            if name["words"] == None:
                name, id_num, representative = self.get_role_info_3_3(role_key='保证人2：', page_num='0')
                self.init_result['保证人3-姓名'] = name
                self.init_result['保证人3-证件号码'] = id_num
                self.init_result['保证人3-法定代表人或授权代表'] = representative
            # 在所有页面中找正文中（第二部分 融资租赁主要条款及付款计划）的那个编号，因为存在换行的情况所以暂时不带位置输出
            contract_no = self.get_contract_no_one()
            self.init_result['合同编号（正文）'] = contract_no
            # 找到车辆识别代码
            vin = self.get_key_value(key='车辆识别代码：')
            self.init_result['车辆识别代码'] = vin
            # 找到经销商(车辆卖方(经销商))
            seller = self.get_key_value(key='车辆卖方（经销商）：')
            self.init_result['车辆卖方（经销商）'] = seller
            # 找到 —— 车辆原始销售价格
            vehicle_price = self.get_key_value(key='车辆原始销售价格（《机动车销售统一发票》所列金额）：')
            self.init_result['车辆原始销售价格（《机动车销售统一发票》所列金额）'] = vehicle_price
            # 找车辆附加产品明细（表）
            table_add_product = self.get_table_add_product()
            self.init_result['车辆附加产品明细表'] = table_add_product
            # 找融资成本总额
            financing_cost = self.get_key_value(key='融资成本总额：')
            self.init_result['融资成本总额'] = financing_cost
            # 找租期
            lease_term = self.get_key_value(key='租期：')
            self.init_result['租期'] = lease_term
            # 找还款计划(表)
            repayment_schedule = self.get_repayment_schedule()
            self.init_result['付款计划表'] = repayment_schedule
            # 找开户行户名、银行账号、银行
            name = self.get_key_value(key='户名：')
            self.init_result['银行账户-户名'] = name
            account = self.get_key_value(key='银行账号：')
            self.init_result['银行账户-银行账号'] = account
            bank = self.get_key_value(key='开户银行：')
            self.init_result['银行账户-开户行'] = bank
            # 找签字页上的系列信息
            # 承租人姓名、签章
            name = self.get_key_value(key='承租人姓名：')
            electronic_signature = self.get_electronic_signature(top='承租人姓名：', bottom='保证人1姓名：')
            if name["words"] == None:
                name = self.get_key_value(key='承租人一姓名：')
                electronic_signature = self.get_electronic_signature(top='承租人一姓名：', bottom='共同承租人名称：')
            self.init_result['签字页-承租人姓名'] = name
            self.init_result['签字页-承租人签章'] = electronic_signature
            # 保证人1姓名、签章
            name = self.get_key_value(key='保证人1姓名：')
            electronic_signature = self.get_electronic_signature(top='保证人1姓名：', bottom='保证人2姓名：')
            self.init_result['签字页-保证人1姓名'] = name
            self.init_result['签字页-保证人1签章'] = electronic_signature
            # 这里用的是 name["words"] == ""
            if name["words"] == "":
                name = self.get_key_value(key='共同承租人名称：')
                electronic_signature = self.get_electronic_signature(top='共同承租人名称：', bottom='保证人1姓名：')
                self.init_result['签字页-共同承租人姓名'] = name
                self.init_result['签字页-共同承租人签章'] = electronic_signature
            # 保证人2姓名、签章
            name = self.get_key_value(key='保证人2姓名：')
            electronic_signature = self.get_electronic_signature(top='保证人2姓名：', bottom='保证人3姓名：')
            self.init_result['签字页-保证人2姓名'] = name
            self.init_result['签字页-保证人2签章'] = electronic_signature
            # if判断条件对应3_3版本
            if name["words"] == "":
                name = self.get_key_value(key='保证人1姓名：')
                electronic_signature = self.get_electronic_signature(top='保证人1姓名：', bottom='保证人2姓名：')
                self.init_result['签字页-保证人1姓名'] = name
                self.init_result['签字页-保证人1签章'] = electronic_signature
            # 保证人3姓名、签章
            name = self.get_key_value(key='保证人3姓名：')
            electronic_signature = self.get_electronic_signature(top='保证人3姓名：', bottom='日期：')
            self.init_result['签字页-保证人3姓名'] = name
            self.init_result['签字页-保证人3签章'] = electronic_signature
            # if判断条件对应3_3版本
            if name["words"] == None:
                name = self.get_key_value(key='保证人2姓名：')
                electronic_signature = self.get_electronic_signature(top='保证人2姓名：', bottom='日期：')
                self.init_result['签字页-保证人2姓名'] = name
                self.init_result['签字页-保证人2签章'] = electronic_signature
        return self.init_result
        # results['is_shhz_contract'] = True
        # results['pdf_info'] = self.init_result

        # return results

    def get_info_1(self):
        if len(self.pdf_info) > 0:
            contract_no = self.get_contract_no(page_num='0')
            self.init_result_1['合同编号'] = contract_no
            # 承租人姓名
            name = self.get_key_value(key='承租人：', page_num='0')
            self.init_result_1['承租人-姓名'] = name
            # 承租人证件号码
            _id = self.get_key_value(key='证件号码：', page_num='0')
            self.init_result_1['承租人-证件号码'] = _id
            # 销售经销商
            seller = self.get_key_value(key='销售经销商：', page_num='0')
            self.init_result_1['销售经销商'] = seller
            # 合同编号（正文）
            contract_no = self.get_contract_no_one()
            self.init_result_1['合同编号（正文）'] = contract_no
            # 签字页-承租人姓名
            name = self.get_key_value(key='姓名/名称：')
            self.init_result_1['签字页-承租人姓名'] = name
            # 签字页-承租人证件号码
            _id = self.get_key_value(key='自然人身份证件号码/法人执照号码：')
            self.init_result_1['签字页-承租人证件号码'] = _id
            # 签字页-承租人签章
            signature_role_1 = self.get_signature_role_1()
            self.init_result_1['签字页-承租人签章'] = signature_role_1
            # 签字页-销售经销商
            seller = self.get_key_value(key='销售经销商：')
            self.init_result_1['签字页-销售经销商'] = seller
            # 经销商签章
            pass
        return self.init_result_1

    def get_info_2(self):
        if len(self.pdf_info) > 0:
            contract_no = self.get_contract_no_dy()
            self.init_result_2['合同编号'] = contract_no
            # 合同编号（正文）
            contract_no = self.get_contract_no_one()
            self.init_result_2['合同编号（正文）'] = contract_no
            # 抵押人姓名/名称
            name, _id = self.get_dyr_name_id()
            self.init_result_2['抵押人姓名/名称'] = name
            self.init_result_2['抵押人证件号码'] = _id
            # 抵押人配偶信息
            name, _id = self.get_dyrpo_name_id()
            self.init_result_2['抵押人配偶姓名/名称'] = name
            self.init_result_2['抵押人配偶证件号码'] = _id
            # 车辆识别代码
            vin = self.get_key_value(key='车辆识别代码：')
            self.init_result_2['车辆识别代码'] = vin
            # 租金总额
            rent = self.get_key_value_position(key='租金总额')
            self.init_result_2['租金总额'] = rent
            # 融资租赁期限
            lease_term = self.get_key_value_position(key='融资租赁期限')
            self.init_result_2['融资租赁期限'] = lease_term
            # 签字页抵押人姓名和签章
            name = self.get_key_value(key='抵押人姓名：')
            electronic_signature = self.get_electronic_signature(top='抵押权人盖章', bottom='抵押人配偶姓名：')
            self.init_result_2['签字页-抵押人姓名'] = name
            self.init_result_2['签字页-抵押人签章'] = electronic_signature
            # 签字页抵押人配偶姓名和签章
            name = self.get_key_value(key='抵押人配偶姓名：')
            electronic_signature = self.get_electronic_signature(top='抵押人配偶姓名：', bottom='日期')
            self.init_result_2['签字页-抵押人配偶姓名'] = name
            self.init_result_2['签字页-抵押人配偶签章'] = electronic_signature
        return self.init_result_2