get_char.py 45.6 KB

Raw Blame History Permalink

# -*- coding: utf-8 -*-
# @Author        : lk
# @Email         : 9428.al@gmail.com
# @Create Date   : 2021-07-20 16:42:41
# @Last Modified : 2021-09-07 19:52:39
# @Description   :

import re
import numpy as np
from fuzzywuzzy import fuzz
from shapely.geometry import Polygon


class Finder:
    def __init__(self, pdf_info, ocr_results):
        self.pdf_info = pdf_info
        self.ocr_results = ocr_results
        self.is_asp = False
        self.item = {"words": None,
                     "position": None,
                     }

    def gen_init_result(self, is_asp):
        # 格式化算法输出
        self.init_result = {"page_1": {"合同编号": self.item,
                                       "所购车辆价格": self.item,
                                       "车架号": self.item,
                                       "贷款本金金额": {"大写": self.item,
                                                  "小写": self.item,
                                                  "车辆贷款本金金额": self.item,
                                                  "附加产品融资贷款本金总金额": self.item,
                                                  },
                                       "贷款期限": self.item,
                                       "附加产品融资贷款本金总金额明细": self.item,
                                       "借款人签字及时间": self.item,
                                       },
                            "page_2": {"合同编号": self.item,
                                       "借款人及抵押人": {"name": self.item,
                                                   "id": self.item,
                                                   },
                                       "共同借款人及共同抵押人": {"name": self.item,
                                                       "id": self.item,
                                                       },
                                       "保证人1": {"name": self.item,
                                                "id": self.item,
                                                },
                                       "保证人2": {"name": self.item,
                                                "id": self.item,
                                                },
                                       "所购车辆价格": self.item,
                                       "车架号": self.item,
                                       "经销商": self.item,
                                       "贷款本金金额": {"大写": self.item,
                                                  "小写": self.item,
                                                  "车辆贷款本金金额": self.item,
                                                  "附加产品融资贷款本金总金额": self.item,
                                                  },
                                       "贷款期限": self.item,
                                       "标准利率": self.item,
                                       "还款账户": {"账号": self.item,
                                                "户名": self.item,
                                                "开户行": self.item,
                                                },
                                       },
                            "page_3": {"合同编号": self.item,
                                       "还款计划表": self.item,
                                       },
                            "page_4": {"合同编号": self.item,
                                       "附加产品融资贷款本金总金额明细": self.item,
                                       },
                            "page_5": {"合同编号": self.item,
                                       },
                            "page_6": {"合同编号": self.item,
                                       },
                            }
        if self.is_asp == False:
            self.init_result["page_7"] = {"合同编号": self.item,
                                          "主借人签字": {"签字": self.item,
                                                    "日期": self.item,
                                                    },
                                          "共借人签字": {"签字": self.item,
                                                    "日期": self.item,
                                                    },
                                          "保证人1签字": {"签字": self.item,
                                                     "日期": self.item,
                                                     },
                                          "保证人2签字": {"签字": self.item,
                                                     "日期": self.item,
                                                     },
                                          "见证人签字": {"签字": self.item,
                                                    "日期": self.item,
                                                    },
                                          }
        else:
            self.init_result["page_7"] = {"合同编号": self.item,
                                          }
            self.init_result["page_8"] = {"合同编号": self.item,
                                          "主借人签字": {"签字": self.item,
                                                    "日期": self.item,
                                                    },
                                          "共借人签字": {"签字": self.item,
                                                    "日期": self.item,
                                                    },
                                          "保证人1签字": {"签字": self.item,
                                                     "日期": self.item,
                                                     },
                                          "保证人2签字": {"签字": self.item,
                                                     "日期": self.item,
                                                     },
                                          "见证人签字": {"签字": self.item,
                                                    "日期": self.item,
                                                    },
                                          }

    def get_top_iou(self, poly, ocr_result):
        """传入一个多边形, 找到与之最匹配的多边形

        Args:
            poly (TYPE): Description
        """
        iou_list = []
        for key in ocr_result:
            bbox, text = ocr_result[key]
            g = Polygon(np.array(bbox).reshape((-1, 2)))
            p = Polygon(np.array(poly).reshape((-1, 2)))
            if not g.is_valid or not p.is_valid:
                continue
            inter = Polygon(g).intersection(Polygon(p)).area
            union = g.area + p.area - inter
            iou = inter / union
            iou_list.append([iou, key])
        if len(iou_list) == 0:
            return -1, -1
        top_iou = sorted(iou_list, key=lambda x: x[0])[-1]
        return top_iou

    def poly_to_rectangle(self, poly):
        xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax = poly
        bbox = [xmin, ymin, xmax, ymax]
        return bbox

    def get_contract_no(self, page_num):
        """传入页码,查看该页码右上角的编号

        Args:
            page_num (string):

        Returns:
            sting:
        """
        contract_no = self.item.copy()
        # contract_no['words'] = ''
        # contract_no['position'] = [-1, -1, -1, -1]
        # 只看第一页
        for key in self.ocr_results[page_num]:
            bbox, text = self.ocr_results[page_num][key]
            if '合同编号:' in text:
                words = text.split(':')[-1]
                location = self.poly_to_rectangle(bbox)
                contract_no['words'] = words
                contract_no['position'] = location
        return contract_no

    def get_vehicle_price(self, page_num='0'):
        vehicle_price = self.item.copy()
        # vehicle_price['words'] = ''
        # vehicle_price['position'] = [-1, -1, -1, -1]
        for key in self.ocr_results[page_num]:
            bbox, text = self.ocr_results[page_num][key]
            if '所购车辆价格为人民币' in text:
                words = text.split('币')[-1]
                location = self.poly_to_rectangle(bbox)
                vehicle_price['words'] = words
                vehicle_price['position'] = location
        return vehicle_price

    def get_vin(self, page_num='0'):
        vin = self.item.copy()
        # vin['words'] = ''
        # vin['position'] = [-1, -1, -1, -1]
        for key in self.ocr_results[page_num]:
            bbox, text = self.ocr_results[page_num][key]
            if '车架号:' in text:
                words = text.split(':')[-1]
                location = self.poly_to_rectangle(bbox)
                vin['words'] = words
                vin['position'] = location
        return vin

    def get_loan_principal(self, page_num='0'):
        chinese_keywords = ['壹', '贰', '叁', '肆', '伍', '陆', '柒', '捌', '玖', '拾',
                            '佰', '仟', '万', '亿', '元', '角', '分', '零', '整']
        upper = self.item.copy()
        lower = self.item.copy()
        asp_1 = self.item.copy()
        asp_2 = self.item.copy()
        anchor_bbox = None
        for block in self.pdf_info[page_num]['blocks']:
            if block['type'] != 0:
                continue
            for line in block['lines']:
                for span in line['spans']:
                    bbox, text = span['bbox'], span['text']
                    if fuzz.ratio(''.join(chinese_keywords), text) > 15:
                        text = text.split('：')[-1].strip()
                        upper['position'] = bbox
                        upper['words'] = text
                    if '小写：¥' in text:
                        words = text.split('¥')[-1].strip()
                        lower['position'] = bbox
                        lower['words'] = words
                    if '附加产品融资贷款本金总金额' == text:
                        anchor_bbox = bbox
        if anchor_bbox:
            for block in self.pdf_info[page_num]['blocks']:
                if block['type'] != 0:
                    continue
                for line in block['lines']:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
                        if np.mean(bbox[1::2]) < np.mean(anchor_bbox[1::2]) and '人民币：小写：' in text:
                            words = re.findall(r'人民币：小写：\[(.*)\]', text)[0]
                            asp_1['position'] = bbox
                            asp_1['words'] = words
                        if np.mean(bbox[1::2]) > np.mean(anchor_bbox[1::2]) and '人民币：小写：' in text:
                            words = re.findall(r'人民币：小写：\[(.*)\]', text)[0]
                            asp_2['position'] = bbox
                            asp_2['words'] = words
        return upper, lower, asp_1, asp_2

    def get_loan_term(self, page_num='0'):
        loan_term = self.item.copy()
        all_text = ''
        for block in self.pdf_info[page_num]['blocks']:
            if block['type'] != 0:
                continue
            for line in block['lines']:
                for span in line['spans']:
                    bbox, text = span['bbox'], span['text']
                    all_text += text
        matchs = re.search(r'贷款期限(\d+)个月', all_text)
        if matchs:
            words = matchs.group(1)
            for block in self.pdf_info[page_num]['blocks']:
                if block['type'] != 0:
                    continue
                for line in block['lines']:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
                        if f'{words}个月' in text:
                            loan_term['position'] = bbox
                            loan_term['words'] = words
        return loan_term

    def get_standard_rate(self, page_num='0'):
        standard_rate = self.item.copy()
        for block in self.pdf_info[page_num]['blocks']:
            if block['type'] != 0:
                continue
            for line in block['lines']:
                for span in line['spans']:
                    bbox, text = span['bbox'], span['text']
                    matchs = re.search(r'本合同当期的标准利率为(\S+)%/年', text)
                    if matchs:
                        standard_rate['position'] = bbox
                        standard_rate['words'] = matchs.group(1)
        return standard_rate

    def mergelist(self, text_list):
        pattern = re.compile("[^\u4e00-\u9fa5]")  # 匹配不是中文的其他字符
        mergeindex = -1
        for index, i in enumerate(text_list):
            if '所购' in i and len(pattern.sub('', pattern.sub('', text_list[index + 1]))) != 0:
                # if '所购' in i and '.00' not in text_list[index+1]:
                mergeindex = index
        if mergeindex == -1:
            return text_list
        else:
            new_text_list = text_list[:mergeindex] + [text_list[mergeindex] + text_list[mergeindex + 1]] + text_list[
                                                                                                           mergeindex + 2:]
            return self.mergelist(new_text_list)

    def get_asp_details(self, page_num):
        asp_details_table_term = self.item.copy()
        asp_details_table = [['附加产品融资贷款本金总金额及贷款利率明细'], ['项目1', '用途总金额2', '贷款本金3']]
        bbox_xm = None
        bbox_ytzje = None
        bbox_dkbj = None
        bbox_total = None
        for key in self.ocr_results[page_num]:
            bbox, text = self.ocr_results[page_num][key]
            if text == '项目1':
                bbox_xm = bbox
            if text == '用途总金额2':
                bbox_ytzje = bbox
            if text == '贷款本金3':
                bbox_dkbj = bbox
            if text in ['附加产品融资贷款本', '附加产品融资贷款本金', '附加产品融资贷']:
                bbox_total = bbox
        if bbox_xm:
            for i in range(10):
                rh = abs(bbox_xm[1] - bbox_xm[-1])
                anchor = np.array(bbox_xm).reshape((-1, 2))
                anchor[:, 1] += int(rh * 1.4)
                _iou, _key = self.get_top_iou(poly=anchor, ocr_result=self.ocr_results[page_num])
                if _iou > 0:
                    bbox, xm_text = self.ocr_results[page_num][_key]
                    bbox_xm = bbox
                    # 解决项目内容是两行的问题
                    if not '所购' in xm_text:
                        line = asp_details_table[-1]
                        line[0] += xm_text
                        asp_details_table[-1] = line
                        continue
                    # print(xm_text)
                    anchor_1 = [bbox_ytzje[0], bbox[1], bbox_ytzje[2], bbox[3],
                                bbox_ytzje[4], bbox[5], bbox_ytzje[6], bbox[7]]
                    _iou, _key = self.get_top_iou(poly=anchor_1, ocr_result=self.ocr_results[page_num])
                    bbox, ytzje_text = self.ocr_results[page_num][_key]
                    # print(ytzje_text)
                    anchor_2 = [bbox_dkbj[0], bbox[1], bbox_dkbj[2], bbox[3],
                                bbox_dkbj[4], bbox[5], bbox_dkbj[6], bbox[7]]
                    _iou, _key = self.get_top_iou(poly=anchor_2, ocr_result=self.ocr_results[page_num])
                    bbox, dkbj_text = self.ocr_results[page_num][_key]
                    # print(dkbj_text)
                    if xm_text == ytzje_text:
                        xm_text, ytzje_text = xm_text.split(' ')
                    line = [xm_text, ytzje_text, dkbj_text]
                    asp_details_table.append(line)
                else:
                    break

        if bbox_total:
            anchor = [bbox_dkbj[0], bbox_total[1], bbox_dkbj[2], bbox_total[3],
                      bbox_dkbj[4], bbox_total[5], bbox_dkbj[6], bbox_total[7]]
            _iou, _key = self.get_top_iou(poly=anchor, ocr_result=self.ocr_results[page_num])
            bbox, total_text = self.ocr_results[page_num][_key]
            asp_details_table.append(['附加产品融资贷款本金总金额:', '', total_text])
        asp_details_table_term['words'] = asp_details_table
        return asp_details_table_term

    def get_signature(self):
        signature = self.item.copy()
        for block in self.pdf_info['0']['blocks']:
            if block['type'] != 0:
                continue
            for line in block['lines']:
                for span in line['spans']:
                    bbox, text = span['bbox'], span['text']
                    if '签署日期' in text:
                        words = text
                        signature['words'] = words
                        signature['position'] = bbox
        return signature

    def get_somebody(self, top, bottom):
        # 指定上下边界后,返回上下边界内的客户信息
        _name = self.item.copy()
        _id = self.item.copy()
        # 只看第一页，先划定上下边界
        y_top = 0
        y_bottom = 0
        for block in self.pdf_info['1']['blocks']:
            if block['type'] != 0:
                continue
            for line in block['lines']:
                for span in line['spans']:
                    bbox, text = span['bbox'], span['text']
                    if top in text:
                        y_top = bbox[3]
                    if bottom in text:
                        y_bottom = bbox[3]
        for block in self.pdf_info['1']['blocks']:
            if block['type'] != 0:
                continue
            for line in block['lines']:
                for span in line['spans']:
                    bbox, text = span['bbox'], span['text']
                    if y_top < bbox[3] < y_bottom:
                        # print(top, bottom, text)
                        if '姓名/名称' in text:
                            words = text.split('：')[-1]
                            _name['position'] = bbox
                            _name['words'] = words
                        if '自然人身份证件号码/法人执照号码' in text:
                            words = text.split('：')[-1]
                            _id['position'] = bbox
                            _id['words'] = words
        return _name, _id

    def get_seller(self):
        seller = self.item.copy()
        # 先找到 key
        anchor_bbox = None
        for block in self.pdf_info['1']['blocks']:
            if block['type'] != 0:
                continue
            for line in block['lines']:
                for span in line['spans']:
                    bbox, text = span['bbox'], span['text']
                    if '经销商' == text:
                        anchor_bbox = bbox
        # 当找到了 key, 则根据 key 去匹配 value
        if anchor_bbox:
            half_width = self.pdf_info['1']['width'] * 0.5
            for block in self.pdf_info['1']['blocks']:
                if block['type'] != 0:
                    continue
                for line in block['lines']:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
                        if anchor_bbox[2] < np.mean(bbox[::2]) < half_width and \
                                anchor_bbox[1] < np.mean(bbox[1::2]) < anchor_bbox[3]:
                            seller['position'] = bbox
                            seller['words'] = text
        return seller

    def get_payback_account(self):
        account = self.item.copy()
        account_name = self.item.copy()
        account_bank = self.item.copy()
        all_text = ''
        for block in self.pdf_info['1']['blocks']:
            if block['type'] != 0:
                continue
            for line in block['lines']:
                for span in line['spans']:
                    bbox, text = span['bbox'], span['text']
                    all_text += text
        # 首先确定账户信息是哪种,我们只输出非另行通知的格式
        if '☑账号' in all_text:
            all_text = all_text.replace('　', '')
            matchs_1 = re.findall(r'账号：(.*)户名', all_text)
            if matchs_1:
                words = matchs_1[0]
                for block in self.pdf_info['1']['blocks']:
                    if block['type'] != 0:
                        continue
                    for line in block['lines']:
                        for span in line['spans']:
                            bbox, text = span['bbox'], span['text']
                            if f'{words}' in text:
                                account['position'] = bbox
                                account['words'] = words
            matchs_2 = re.findall(r'户名：(.*)开户行', all_text)
            if matchs_2:
                words = matchs_2[0]
                for block in self.pdf_info['1']['blocks']:
                    if block['type'] != 0:
                        continue
                    for line in block['lines']:
                        for span in line['spans']:
                            bbox, text = span['bbox'], span['text']
                            if f'{words}' in text:
                                account_name['position'] = bbox
                                account_name['words'] = words
            matchs_3 = re.findall(r'开户行：(.*)；', all_text)
            if matchs_3:
                words = matchs_3[0]
                for block in self.pdf_info['1']['blocks']:
                    if block['type'] != 0:
                        continue
                    for line in block['lines']:
                        for span in line['spans']:
                            bbox, text = span['bbox'], span['text']
                            if f'开户行：{words}；' in text.replace('　', ''):
                                account_bank['position'] = bbox
                                account_bank['words'] = words
        return account, account_name, account_bank

    def get_repayment_schedule(self):
        repayment_schedule = self.item.copy()
        # 只看第二页
        repayment_schedule_table = []
        repayment_schedule_text_list = []
        table = False
        for block in self.pdf_info['2']['blocks']:
            if block['type'] != 0:
                continue
            for line in block['lines']:
                for span in line['spans']:
                    bbox, text = span['bbox'], span['text']
                    if '序号' == text:
                        table = True
                    if '以上表格中所列的序号并非还款期数' in text:
                        table = False
                    if table == True:
                        repayment_schedule_text_list.append(text)
        for i in range(len(repayment_schedule_text_list) // 5):
            line = []
            # 5表示5列的意思
            for j in range(5):
                line.append(repayment_schedule_text_list[i * 5 + j])
            if str(i + 1) == line[1]:
                break
            repayment_schedule_table.append(line)
        if len(repayment_schedule_table) > 0:
            repayment_schedule['words'] = repayment_schedule_table
        return repayment_schedule

    def get_signature_role_1(self):
        signature_role_1 = self.init_item.copy()
        # 先定位签字区域
        texts = []
        boxes = []
        page_num = None
        position = None
        words = None
        region = False
        for i in list(self.pdf_info.keys()):
            for block in self.pdf_info[i]['blocks']:
                if block['type'] != 0:
                    continue
                for line in block['lines']:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
                        if '借款人(抵押人)' in text:
                            region = True
                        if '日期' in text:
                            region = False
                        if region == True:
                            page_num = i
                            texts.append(text)
                            boxes.append(bbox)
        if len(texts) > 4:
            words = '有'
        else:
            words = '无'
        boxes = np.array(boxes).reshape((-1, 2))
        position = [min(boxes[:, 0]), min(boxes[:, 1]), max(boxes[:, 0]), max(boxes[:, 1])]
        signature_role_1['page_num'] = page_num
        signature_role_1['position'] = position
        signature_role_1['words'] = words
        return signature_role_1

    def get_signature_role_2(self):
        signature_role_2 = self.init_item.copy()
        # 先定位签字区域
        texts = []
        boxes = []
        page_num = None
        position = None
        words = None
        region = False
        for i in list(self.pdf_info.keys()):
            for block in self.pdf_info[i]['blocks']:
                if block['type'] != 0:
                    continue
                for line in block['lines']:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
                        if '共同借款人(共同抵押人)' in text:
                            region = True
                        if '日期' in text:
                            region = False
                        if region == True:
                            page_num = i
                            texts.append(text)
                            boxes.append(bbox)
        if len(texts) > 4:
            words = '有'
        else:
            words = '无'
        boxes = np.array(boxes).reshape((-1, 2))
        position = [min(boxes[:, 0]), min(boxes[:, 1]), max(boxes[:, 0]), max(boxes[:, 1])]
        signature_role_2['page_num'] = page_num
        signature_role_2['position'] = position
        signature_role_2['words'] = words
        return signature_role_2

    def get_signature_role_3(self):
        signature_role_3 = self.init_item.copy()
        # 先定位签字区域
        texts = []
        boxes = []
        page_num = None
        position = None
        words = None
        region = False
        for i in list(self.pdf_info.keys()):
            for block in self.pdf_info[i]['blocks']:
                if block['type'] != 0:
                    continue
                for line in block['lines']:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
                        if '保证人1' in text and int(i) != 0:
                            region = True
                        if '日期' in text:
                            region = False
                        if region == True:
                            page_num = i
                            texts.append(text)
                            boxes.append(bbox)
        if len(texts) > 4:
            words = '有'
        else:
            words = '无'
        boxes = np.array(boxes).reshape((-1, 2))
        position = [min(boxes[:, 0]), min(boxes[:, 1]), max(boxes[:, 0]), max(boxes[:, 1])]
        signature_role_3['page_num'] = page_num
        signature_role_3['position'] = position
        signature_role_3['words'] = words
        return signature_role_3

    def get_signature_role_4(self):
        signature_role_4 = self.init_item.copy()
        # 先定位签字区域
        texts = []
        boxes = []
        page_num = None
        position = None
        words = None
        region = False
        for i in list(self.pdf_info.keys()):
            for block in self.pdf_info[i]['blocks']:
                if block['type'] != 0:
                    continue
                for line in block['lines']:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
                        if '保证人2' in text and int(i) != 0:
                            region = True
                        if '日期' in text:
                            region = False
                        if region == True:
                            page_num = i
                            texts.append(text)
                            boxes.append(bbox)
        if len(texts) > 4:
            words = '有'
        else:
            words = '无'
        boxes = np.array(boxes).reshape((-1, 2))
        position = [min(boxes[:, 0]), min(boxes[:, 1]), max(boxes[:, 0]), max(boxes[:, 1])]
        signature_role_4['page_num'] = page_num
        signature_role_4['position'] = position
        signature_role_4['words'] = words
        return signature_role_4

    def get_signature_role_5(self):
        signature_role_5 = self.init_item.copy()
        # 先定位签字区域
        texts = []
        boxes = []
        page_num = None
        position = None
        words = None
        region = False
        for i in list(self.pdf_info.keys()):
            for block in self.pdf_info[i]['blocks']:
                if block['type'] != 0:
                    continue
                for line in block['lines']:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
                        if '见证人签字' in text and int(i) != 0:
                            region = True
                        if '年' in text:
                            region = False
                        if region == True:
                            page_num = i
                            texts.append(text)
                            boxes.append(bbox)
        print(texts)
        if len(texts) > 4:
            words = '有'
        else:
            words = '无'
        boxes = np.array(boxes).reshape((-1, 2))
        position = [min(boxes[:, 0]), min(boxes[:, 1]), max(boxes[:, 0]), max(boxes[:, 1])]
        signature_role_5['page_num'] = page_num
        signature_role_5['position'] = position
        signature_role_5['words'] = words
        return signature_role_5

    def get_last_page_signature(self, page_num, top, bottom):
        signature_name = self.item.copy()
        signature_date = self.item.copy()
        anchor_top = None
        anchor_bottom = None
        for block in self.pdf_info[page_num]['blocks']:
            if block['type'] != 0:
                continue
            for line in block['lines']:
                for span in line['spans']:
                    bbox, text = span['bbox'], span['text']
                    if top in text:
                        anchor_top = bbox[1]
                    if bottom in text:
                        anchor_bottom = bbox[1]
        # print(top, anchor_top, anchor_bottom)
        if anchor_top is not None and anchor_bottom is not None:
            for block in self.pdf_info[page_num]['blocks']:
                if block['type'] != 0:
                    continue
                for line in block['lines']:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
                        if '签署日期' in text and int(anchor_top) < np.mean(bbox[1::2]) < int(anchor_bottom):
                            name = text.split(' ')[0]
                            date = text.split(':')[-1]
                            signature_name['words'] = name
                            signature_name['position'] = bbox
                            signature_date['words'] = date
                            signature_date['position'] = bbox
        return signature_name, signature_date

    def get_info(self):
        """
            block['type'] == 0 : 表示该元素为图片

        Returns:
            dict: Description
        """
        # 先判断是否为 ASP 产品
        # 只看第一页，判断是否有 '附加产品融资贷款本金总金额' 这一句话，若有则为 ASP 产品
        # print(self.pdf_info['0']['blocks'])
        # for block in self.pdf_info['0']['blocks']:
        #     if block['type'] != 0:
        #         continue
        #     for line in block['lines']:
        #         for span in line['spans']:
        #             bbox, text = span['bbox'], span['text']
        #             if '附加产品融资贷款本金总金额' == text:
        #                 self.is_asp = True
        for key in self.ocr_results['0']:
            bbox, text = self.ocr_results['0'][key]
            if '附加产品融资贷款本金总金额' in text:
                self.is_asp = True
        self.gen_init_result(self.is_asp)
        if len(list(self.ocr_results.keys())) <= 8:  # 8.5 版本客户提供的样本出现串页的情况，暂时无法识别
            # Page 1
            # 找合同编号
            contract_no = self.get_contract_no(page_num='0')
            # print(contract_no)
            self.init_result['page_1']['合同编号'] = contract_no
            # 所购车辆价格
            vehicle_price = self.get_vehicle_price()
            # print(vehicle_price)
            self.init_result['page_1']['所购车辆价格'] = vehicle_price
            # 车架号
            vin = self.get_vin()
            # print(vin)
            self.init_result['page_1']['车架号'] = vin
            # 贷款本金金额(如果是 ASP产品)则'贷款本金金额'项目中包含'车辆贷款本金金额'和'附加产品融资贷款本金总金额'两个项目
            upper, lower, asp_1, asp_2 = self.get_loan_principal()
            # print(upper, lower, asp_1, asp_2)
            self.init_result['page_1']['贷款本金金额']['大写'] = upper
            self.init_result['page_1']['贷款本金金额']['小写'] = lower
            self.init_result['page_1']['贷款本金金额']['车辆贷款本金金额'] = asp_1
            self.init_result['page_1']['贷款本金金额']['附加产品融资贷款本金总金额'] = asp_2
            # 贷款期限
            loan_term = self.get_loan_term()
            # print(loan_term)
            self.init_result['page_1']['贷款期限'] = loan_term
            # 附加产品融资贷款本金总金额明细（ASP-表格）
            asp_details_table = self.get_asp_details(page_num='0')
            # print(asp_details_table)
            self.init_result['page_1']['附加产品融资贷款本金总金额明细'] = asp_details_table
            # 借款人签字及时间
            signature = self.get_signature()
            # print(signature)
            self.init_result['page_1']['借款人签字及时间'] = signature
            #######################################
            # Page 2
            # 找合同编号
            contract_no = self.get_contract_no(page_num='0')
            # print(contract_no)
            self.init_result['page_2']['合同编号'] = contract_no
            # 找借款人及抵押人(地址字段原本有空格)
            borrower_name, borrower_id = self.get_somebody(top='借款人及抵押人：', bottom='共同借款人：')
            # print(borrower_name, borrower_id)
            # 这是为了同时兼容 8.1 版本
            if borrower_name['words'] == None:
                borrower_name, borrower_id = self.get_somebody(top='借款人及抵押人：', bottom='共同借款人及共同抵押人：')
            # 这是为了兼容车贷分离版本
            if borrower_name['words'] == None:
                borrower_name, borrower_id = self.get_somebody(top='借款人：', bottom='共同借款人及抵押人：')
            self.init_result['page_2']['借款人及抵押人']['name'] = borrower_name
            self.init_result['page_2']['借款人及抵押人']['id'] = borrower_id
            # 找共同借款人及共同抵押人
            co_borrower_name, co_borrower_id = self.get_somebody(top='共同借款人：', bottom='保证人1：')
            # print(co_borrower_name, co_borrower_id)
            self.init_result['page_2']['共同借款人及共同抵押人']['name'] = co_borrower_name
            self.init_result['page_2']['共同借款人及共同抵押人']['id'] = co_borrower_id
            # 保证人1
            first_guarantor_name, first_guarantor_id = self.get_somebody(top='保证人1：', bottom='保证人2：')
            self.init_result['page_2']['保证人1']['name'] = first_guarantor_name
            self.init_result['page_2']['保证人1']['id'] = first_guarantor_id
            # 保证人2
            second_guarantor_name, second_guarantor_id = self.get_somebody(top='保证人2：', bottom='第一章')
            self.init_result['page_2']['保证人2']['name'] = second_guarantor_name
            self.init_result['page_2']['保证人2']['id'] = second_guarantor_id
            # 所购车辆价格
            vehicle_price = self.get_vehicle_price(page_num='1')
            self.init_result['page_2']['所购车辆价格'] = vehicle_price
            # 车架号
            vin = self.get_vin(page_num='1')
            self.init_result['page_2']['车架号'] = vin
            # 经销商
            seller = self.get_seller()
            self.init_result['page_2']['经销商'] = seller
            # 贷款本金金额(如果是 ASP产品)则'贷款本金金额'项目中包含'车辆贷款本金金额'和'附加产品融资贷款本金总金额'两个项目
            upper, lower, asp_1, asp_2 = self.get_loan_principal(page_num='1')
            # print(upper, lower, asp_1, asp_2)
            self.init_result['page_2']['贷款本金金额']['大写'] = upper
            self.init_result['page_2']['贷款本金金额']['小写'] = lower
            self.init_result['page_2']['贷款本金金额']['车辆贷款本金金额'] = asp_1
            self.init_result['page_2']['贷款本金金额']['附加产品融资贷款本金总金额'] = asp_2
            # 贷款期限
            loan_term = self.get_loan_term(page_num='1')
            self.init_result['page_2']['贷款期限'] = loan_term
            # 本合同当期的标准利率
            standard_rate = self.get_standard_rate(page_num='1')
            self.init_result['page_2']['标准利率'] = standard_rate
            # 还款账户
            account, account_name, account_bank = self.get_payback_account()
            # print(account, account_name, account_bank)
            self.init_result['page_2']['还款账户']['账号'] = account
            self.init_result['page_2']['还款账户']['户名'] = account_name
            self.init_result['page_2']['还款账户']['开户行'] = account_bank
            #######################################
            # Page 3
            # 找合同编号
            contract_no = self.get_contract_no(page_num='2')
            self.init_result['page_3']['合同编号'] = contract_no
            # 还款计划表（表格）
            repayment_schedule_table = self.get_repayment_schedule()
            # print(repayment_schedule_table)
            self.init_result['page_3']['还款计划表'] = repayment_schedule_table
            #######################################
            # Page 4
            # 找合同编号
            contract_no = self.get_contract_no(page_num='3')
            self.init_result['page_4']['合同编号'] = contract_no
            # 附加产品融资贷款本金总金额明细（ASP-表格）
            asp_details_table = self.get_asp_details(page_num='3')
            # print(asp_details_table)
            self.init_result['page_4']['附加产品融资贷款本金总金额明细'] = asp_details_table
            #######################################
            # Page 5
            # 找合同编号
            contract_no = self.get_contract_no(page_num='4')
            self.init_result['page_5']['合同编号'] = contract_no
            #######################################
            # Page 6
            # 找合同编号
            contract_no = self.get_contract_no(page_num='5')
            self.init_result['page_6']['合同编号'] = contract_no
            if self.is_asp == False:
                # Page 7
                # 找合同编号
                contract_no = self.get_contract_no(page_num='6')
                self.init_result['page_7']['合同编号'] = contract_no
                signature_name, signature_date = self.get_last_page_signature(page_num='6',
                                                                              top='合同编号', bottom='共同借款人')
                if signature_name['words'] == None:
                    signature_name, signature_date = self.get_last_page_signature(page_num='6',
                                                                                  top='合同编号', bottom='共同借款人（抵押人）')
                self.init_result['page_7']['主借人签字']['签字'] = signature_name
                self.init_result['page_7']['主借人签字']['日期'] = signature_date
                signature_name, signature_date = self.get_last_page_signature(page_num='6',
                                                                              top='共同借款人', bottom='保证人1')
                if signature_name['words'] == None:
                    signature_name, signature_date = self.get_last_page_signature(page_num='6',
                                                                                  top='共同借款人（抵押人）', bottom='保证人1')
                self.init_result['page_7']['共借人签字']['签字'] = signature_name
                self.init_result['page_7']['共借人签字']['日期'] = signature_date
                signature_name, signature_date = self.get_last_page_signature(page_num='6',
                                                                              top='保证人1', bottom='保证人2')
                self.init_result['page_7']['保证人1签字']['签字'] = signature_name
                self.init_result['page_7']['保证人1签字']['日期'] = signature_date
                signature_name, signature_date = self.get_last_page_signature(page_num='6',
                                                                              top='保证人2', bottom='在本人面前亲笔签署本合同')
                self.init_result['page_7']['保证人2签字']['签字'] = signature_name
                self.init_result['page_7']['保证人2签字']['日期'] = signature_date
                signature_name, signature_date = self.get_last_page_signature(page_num='6',
                                                                              top='在本人面前亲笔签署本合同', bottom='以下无正文')
                self.init_result['page_7']['见证人签字']['签字'] = signature_name
                self.init_result['page_7']['见证人签字']['日期'] = signature_date
            else:
                # Page 7
                # 找合同编号
                contract_no = self.get_contract_no(page_num='6')
                self.init_result['page_7']['合同编号'] = contract_no
                # Page 8
                # 找合同编号
                contract_no = self.get_contract_no(page_num='7')
                self.init_result['page_8']['合同编号'] = contract_no
                signature_name, signature_date = self.get_last_page_signature(page_num='7',
                                                                              top='合同编号', bottom='共同借款人')
                if signature_name['words'] == None:
                    signature_name, signature_date = self.get_last_page_signature(page_num='7',
                                                                                  top='合同编号', bottom='共同借款人（抵押人）')
                self.init_result['page_8']['主借人签字']['签字'] = signature_name
                self.init_result['page_8']['主借人签字']['日期'] = signature_date
                signature_name, signature_date = self.get_last_page_signature(page_num='7',
                                                                              top='共同借款人', bottom='保证人1')
                if signature_name['words'] == None:
                    signature_name, signature_date = self.get_last_page_signature(page_num='7',
                                                                                  top='共同借款人（抵押人）', bottom='保证人1')
                self.init_result['page_8']['共借人签字']['签字'] = signature_name
                self.init_result['page_8']['共借人签字']['日期'] = signature_date
                signature_name, signature_date = self.get_last_page_signature(page_num='7',
                                                                              top='保证人1', bottom='保证人2')
                self.init_result['page_8']['保证人1签字']['签字'] = signature_name
                self.init_result['page_8']['保证人1签字']['日期'] = signature_date
                signature_name, signature_date = self.get_last_page_signature(page_num='7',
                                                                              top='保证人2', bottom='在本人面前亲笔签署本合同')
                self.init_result['page_8']['保证人2签字']['签字'] = signature_name
                self.init_result['page_8']['保证人2签字']['日期'] = signature_date
                signature_name, signature_date = self.get_last_page_signature(page_num='7',
                                                                              top='在本人面前亲笔签署本合同', bottom='以下无正文')
                self.init_result['page_8']['见证人签字']['签字'] = signature_name
                self.init_result['page_8']['见证人签字']['日期'] = signature_date
        # 重新定制输出
        new_results = {"is_asp": self.is_asp,
                       "page_info": self.init_result
                       }
        return new_results