add FSM AFC/HIL Contract

周伟奇
Showing 8 changed files with 2395 additions and 15 deletions
src/apps/doc/consts.py
src/apps/doc/management/commands/ocr_process.py
src/apps/doc/named_enum.py
src/apps/doc/views.py
src/common/electronic_afc_contract/afc_contract_ocr.py
src/common/electronic_afc_contract/get_char_fsm.py
src/common/electronic_hil_contract/get_char_fsm.py
src/common/electronic_hil_contract/hil_contract_ocr.py
--- a/src/apps/doc/consts.py
View file @8d595a3
+++ b/src/apps/doc/consts.py
View file @8d595a3
@@ -11,7 +11,7 @@ PAGE_SIZE_DEFAULT = 10
 FIXED_APPLICATION_ID_PREFIX = 'CH-S'

 DOC_SCHEME_LIST = ['ACCEPTANCE', 'SETTLEMENT', 'CONTRACTMANAGEMENT']
-DATA_SOURCE_LIST = ['POS', 'EAPP', 'ECONTRACT']
+DATA_SOURCE_LIST = ['POS', 'EAPP', 'ECONTRACT', 'OVP']
 COMPARE_DOC_SCHEME_LIST = ['CA', 'SE']

 HIL_PREFIX = 'HIL'
--- a/src/apps/doc/management/commands/ocr_process.py
View file @8d595a3
+++ b/src/apps/doc/management/commands/ocr_process.py
View file @8d595a3
@@ -1476,7 +1476,8 @@ class Command(BaseCommand, LoggerMixin):

                        # AFC合同
                        if classify_1_str == str(consts.CONTRACT_CLASSIFY):
-                            ocr_result = afc_predict(pdf_handler.pdf_info)
+                            is_fsm = doc.data_source == consts.DATA_SOURCE_LIST[3]
+                            ocr_result = afc_predict(pdf_handler.pdf_info, is_fsm=is_fsm)
                            page_res = {}
                            for page_num, page_info in ocr_result.get('page_info', {}).items():
                                if isinstance(page_num, str) and page_num.startswith('page_'):
@@ -1499,8 +1500,9 @@ class Command(BaseCommand, LoggerMixin):
                            }
                        # HIL合同
                        elif classify_1_str in consts.HIL_CONTRACT_TYPE_MAP:
+                            is_fsm = doc.data_source == consts.DATA_SOURCE_LIST[3]
                            file_type_1 = consts.HIL_CONTRACT_TYPE_MAP.get(classify_1_str)
-                            ocr_result_1 = hil_predict(pdf_handler.pdf_info, file_type_1)
+                            ocr_result_1 = hil_predict(pdf_handler.pdf_info, file_type_1, is_fsm=is_fsm)
                            rebuild_res_1 = {}
                            page_res = {}
                            for field_name, field_info in ocr_result_1.items():
@@ -1526,8 +1528,8 @@ class Command(BaseCommand, LoggerMixin):
                                        'page_info': page_info
                                    }
                        # hmh
-                        else:
-                            pass
+                        # else:
+                        #     pass


                        contract_res = {}
--- a/src/apps/doc/named_enum.py
View file @8d595a3
+++ b/src/apps/doc/named_enum.py
View file @8d595a3
@@ -36,6 +36,7 @@ class RequestTrigger(NamedEnum):
    DOCUPLOAD = (3, 'Document Upload')
    SUBMITING = (4, 'Submiting')
    UPLOADING = (5, 'Uploading')
+    OVP = (6, 'OVP')


 class FailureReason(NamedEnum):
--- a/src/apps/doc/views.py
View file @8d595a3
+++ b/src/apps/doc/views.py
View file @8d595a3
@@ -590,12 +590,13 @@ class UploadDocView(GenericView, DocHandler):
        is_zip = False

        classify_1 = 0
-        # 电子合同
-        if data_source == consts.DATA_SOURCE_LIST[-1] and document_scheme == consts.DOC_SCHEME_LIST[1]:
-            for keyword, classify_1_tmp in consts.ECONTRACT_KEYWORDS_MAP.get(prefix):
-                if keyword in document_name:
-                    classify_1 = classify_1_tmp
-                    break
+        # 电子合同 Econtract or OVP(FSM)
+        if data_source == consts.DATA_SOURCE_LIST[2] or data_source == consts.DATA_SOURCE_LIST[3]:  
+            if document_scheme == consts.DOC_SCHEME_LIST[1]:
+                for keyword, classify_1_tmp in consts.ECONTRACT_KEYWORDS_MAP.get(prefix):
+                    if keyword in document_name:
+                        classify_1 = classify_1_tmp
+                        break
        # FSM合同：WEP/MSI/SC
        elif data_source == consts.DATA_SOURCE_LIST[0] and document_scheme == consts.DOC_SCHEME_LIST[0]:
            for keyword, classify_1_tmp in consts.FSM_ECONTRACT_KEYWORDS_MAP.get(prefix):
--- a/src/common/electronic_afc_contract/afc_contract_ocr.py
View file @8d595a3
+++ b/src/common/electronic_afc_contract/afc_contract_ocr.py
View file @8d595a3
@@ -6,6 +6,7 @@
 # @Description   :

 from .get_char import Finder
+from .get_char_fsm import Finder as FSMFinder
 import numpy as np


@@ -23,7 +24,7 @@ def extract_info(ocr_results):
    return {'page_1': {'合同编号': contract_no}}


-def predict(pdf_info, is_qrs=False):
+def predict(pdf_info, is_qrs=False, is_fsm=False):
    ocr_results = {}
    for pno in pdf_info:
        ocr_results[pno] = {}
@@ -50,7 +51,10 @@ def predict(pdf_info, is_qrs=False):
        results = extract_info(ocr_results)
    else:
        # 输入是整个 PDF 中的信息
-        f = Finder(pdf_info, ocr_results=ocr_results)
+        if is_fsm:
+            f = FSMFinder(pdf_info, ocr_results=ocr_results) 
+        else:
+            f = Finder(pdf_info, ocr_results=ocr_results)
        results = f.get_info()
    return results

--- a/src/common/electronic_afc_contract/get_char_fsm.py 0 → 100644
View file @8d595a3
+++ b/src/common/electronic_afc_contract/get_char_fsm.py 0 → 100644
View file @8d595a3
+import re
+import numpy as np
+from fuzzywuzzy import fuzz
+from shapely.geometry import Polygon
+
+
+class Finder:
+
+    def __init__(self, pdf_info, ocr_results):
+        self.pdf_info = pdf_info
+        self.ocr_results = ocr_results
+        self.is_asp = False
+        self.item = {"words": None,
+                     "position": None,
+                    }
+
+    def gen_init_result(self, is_asp):
+        # 格式化算法输出
+        self.init_result = {"page_1": {"合同编号": self.item,
+                                       "所购车辆价格": self.item,
+                                       "车架号": self.item,
+                                       "贷款本金金额": {"大写": self.item,
+                                                      "小写": self.item,
+                                                      "车辆贷款本金金额": self.item,
+                                                      "附加产品融资贷款本金总金额": self.item,
+                                                     },
+                                       "贷款期限": self.item,
+                                       "附加产品融资贷款本金总金额明细": self.item,
+                                       "借款人签字及时间": self.item,
+                                      },
+                            "page_2": {"合同编号": self.item,
+                                       "借款人及抵押人": {"name": self.item,
+                                                       "id": self.item,
+                                                       },
+                                       "共同借款人及共同抵押人": {"name": self.item,
+                                                              "id": self.item,
+                                                              },
+                                       "保证人1": {"name": self.item,
+                                                  "id": self.item,
+                                                  },
+                                       "保证人2": {"name": self.item,
+                                                  "id": self.item,
+                                                  },
+                                       "所购车辆价格": self.item,
+                                       "车架号": self.item,
+                                       "经销商": self.item,
+                                       "贷款本金金额": {"大写": self.item,
+                                                      "小写": self.item,
+                                                      "车辆贷款本金金额": self.item,
+                                                      "附加产品融资贷款本金总金额": self.item,
+                                                      },
+                                       "贷款期限": self.item,
+                                       "标准利率": self.item,
+                                       "借款人收款账户": {"账号": self.item,
+                                                        "户名": self.item,
+                                                        "开户行": self.item,
+                                                        },
+                                       "还款账户": {"账号": self.item,
+                                                   "户名": self.item,
+                                                   "开户行": self.item,
+                                                  },
+                                      },
+                            "page_3": {"合同编号": self.item,
+                                       "还款计划表": self.item,
+                                      },
+                            "page_4": {"合同编号": self.item,
+                                       "附加产品融资贷款本金总金额明细": self.item,
+                                      },
+                            "page_5": {"合同编号": self.item,
+                                      },
+                            "page_6": {"合同编号": self.item,
+                                      },
+                            }
+        self.init_result["page_7"] = {"合同编号": self.item,
+                                        }
+        self.init_result["page_8"] = {"合同编号": self.item,
+                                        "主借人签字": {"签字": self.item,
+                                                    "日期": self.item,
+                                                    },
+                                        "共借人签字": {"签字": self.item,
+                                                    "日期": self.item,
+                                                    },
+                                        "保证人1签字": {"签字": self.item,
+                                                    "日期": self.item,
+                                                    },
+                                        "保证人2签字": {"签字": self.item,
+                                                    "日期": self.item,
+                                                    },
+                                        "见证人签字": {"签字": self.item,
+                                                    "日期": self.item,
+                                                    },
+                                        }
+
+    def get_top_iou(self, poly, ocr_result):
+        """传入一个多边形, 找到与之最匹配的多边形
+        
+        Args:
+            poly (TYPE): Description
+        """
+        iou_list = []
+        for key in ocr_result:
+            bbox, text = ocr_result[key]
+            g = Polygon(np.array(bbox).reshape((-1, 2)))
+            p = Polygon(np.array(poly).reshape((-1, 2)))
+            if not g.is_valid or not p.is_valid:
+                continue
+            inter = Polygon(g).intersection(Polygon(p)).area
+            union = g.area + p.area - inter
+            iou = inter/union
+            iou_list.append([iou, key])
+        if len(iou_list) == 0:
+            return -1, -1
+        top_iou = sorted(iou_list, key=lambda x: x[0])[-1]
+        return top_iou
+
+    def poly_to_rectangle(self, poly):
+        xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax = poly
+        bbox = [xmin, ymin, xmax, ymax]
+        return bbox
+
+    def get_contract_no(self, page_num):
+        """传入页码,查看该页码右上角的编号
+        
+        Args:
+            page_num (string): 
+        
+        Returns:
+            sting: 
+        """
+        contract_no = self.item.copy()
+        # contract_no['words'] = ''
+        # contract_no['position'] = [-1, -1, -1, -1]
+        # 只看第一页
+        for key in self.ocr_results[page_num]:
+            bbox, text = self.ocr_results[page_num][key]
+            if '合同编号:' in text:
+                words = text.split(':')[-1]
+                location = self.poly_to_rectangle(bbox)
+                contract_no['words'] = words
+                contract_no['position'] = location
+        return contract_no
+
+    def get_vehicle_price(self, page_num='0'):
+        vehicle_price = self.item.copy()
+        # vehicle_price['words'] = ''
+        # vehicle_price['position'] = [-1, -1, -1, -1]
+        for key in self.ocr_results[page_num]:
+            bbox, text = self.ocr_results[page_num][key]
+            if '所购车辆价格为人民币' in text:
+                words = text.split('币')[-1]
+                location = self.poly_to_rectangle(bbox)
+                vehicle_price['words'] = words
+                vehicle_price['position'] = location
+        return vehicle_price
+
+    def get_vin(self, page_num='0'):
+        vin = self.item.copy()
+        # vin['words'] = ''
+        # vin['position'] = [-1, -1, -1, -1]
+        for key in self.ocr_results[page_num]:
+            bbox, text = self.ocr_results[page_num][key]
+            if '车架号:' in text:
+                words = text.split(':')[-1]
+                location = self.poly_to_rectangle(bbox)
+                vin['words'] = words
+                vin['position'] = location
+        return vin
+
+    def get_loan_principal(self, page_num='0'):
+        chinese_keywords = ['壹', '贰', '叁', '肆', '伍', '陆', '柒', '捌', '玖', '拾',
+                            '佰', '仟', '万', '亿', '元', '角', '分', '零', '整']
+        upper = self.item.copy()
+        lower = self.item.copy()
+        asp_1 = self.item.copy()
+        asp_2 = self.item.copy()
+        anchor_bbox = None
+        for block in self.pdf_info[page_num]['blocks']:
+            if block['type'] != 0:
+                continue
+            for line in block['lines']:
+                for span in line['spans']:
+                    bbox, text = span['bbox'], span['text']
+                    if fuzz.ratio(''.join(chinese_keywords), text) > 15:
+                        text = text.split('：')[-1].strip()
+                        upper['position'] = bbox
+                        upper['words'] = text
+                    if '小写：¥' in text:
+                        words = text.split('¥')[-1].strip()
+                        lower['position'] = bbox
+                        lower['words'] = words
+                    if '附加产品融资贷款本金总金额' == text:
+                        anchor_bbox = bbox
+        if anchor_bbox:
+            for block in self.pdf_info[page_num]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if np.mean(bbox[1::2]) < np.mean(anchor_bbox[1::2]) and '人民币：小写：' in text:
+                            words = re.findall(r'人民币：小写：\[(.*)\]', text)[0]
+                            asp_1['position'] = bbox
+                            asp_1['words'] = words
+                        if np.mean(bbox[1::2]) > np.mean(anchor_bbox[1::2]) and '人民币：小写：' in text:
+                            words = re.findall(r'人民币：小写：\[(.*)\]', text)[0]
+                            asp_2['position'] = bbox
+                            asp_2['words'] = words
+        return upper, lower, asp_1, asp_2
+
+    def get_loan_term(self, page_num='0'):
+        loan_term = self.item.copy()
+        all_text = ''
+        for block in self.pdf_info[page_num]['blocks']:
+            if block['type'] != 0:
+                continue
+            for line in block['lines']:
+                for span in line['spans']:
+                    bbox, text = span['bbox'], span['text']
+                    all_text += text
+        matchs = re.search(r'贷款期限(\d+)个月', all_text)
+        if matchs:
+            words = matchs.group(1)
+            for block in self.pdf_info[page_num]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if f'{words}个月' in text:
+                            loan_term['position'] = bbox
+                            loan_term['words'] = words
+        return loan_term
+
+    def get_standard_rate(self, page_num='0'):
+        standard_rate = self.item.copy()
+        for block in self.pdf_info[page_num]['blocks']:
+            if block['type'] != 0:
+                continue
+            for line in block['lines']:
+                for span in line['spans']:
+                    bbox, text = span['bbox'], span['text']
+                    matchs = re.search(r'本合同当期的标准利率为(\S+)%/年', text)
+                    if matchs:
+                        standard_rate['position'] = bbox
+                        standard_rate['words'] = matchs.group(1)
+        return standard_rate
+
+    def mergelist(self, text_list):
+        pattern = re.compile("[^\u4e00-\u9fa5]")        # 匹配不是中文的其他字符
+        mergeindex = -1
+        for index, i in enumerate(text_list):
+            if '所购' in i and len(pattern.sub('', pattern.sub('', text_list[index+1]))) != 0:
+            # if '所购' in i and '.00' not in text_list[index+1]:
+                mergeindex = index
+        if mergeindex == -1:
+            return text_list
+        else:
+            new_text_list = text_list[:mergeindex] + [text_list[mergeindex] + text_list[mergeindex+1]] + text_list[mergeindex+2:]
+            return self.mergelist(new_text_list)   
+
+    def get_asp_details(self, page_num):
+        asp_details_table_term = self.item.copy()
+
+        asp_details_table = [['附加产品融资贷款本金总金额及贷款利率明细'], ['项目1', '用途总金额2', '贷款本金3']]
+
+        bbox_xm = None
+        bbox_ytzje = None
+        bbox_dkbj = None
+        bbox_total = None
+        for key in self.ocr_results[page_num]:
+            bbox, text = self.ocr_results[page_num][key]
+            if text == '项目1':
+                bbox_xm = bbox
+            if text == '用途总金额2':
+                bbox_ytzje = bbox
+            if text == '贷款本金3':
+                bbox_dkbj = bbox
+            if text in ['附加产品融资贷款本', '附加产品融资贷款本金', '附加产品融资贷']:
+                bbox_total = bbox
+
+        if bbox_xm:
+            for i in range(10):
+                rh = abs(bbox_xm[1]-bbox_xm[-1])
+                anchor = np.array(bbox_xm).reshape((-1 ,2))
+                anchor[:, 1] += int(rh*1.4)
+                _iou, _key = self.get_top_iou(poly=anchor, ocr_result=self.ocr_results[page_num])
+                if _iou > 0:
+                    bbox, xm_text = self.ocr_results[page_num][_key]
+                    bbox_xm = bbox
+                    # 解决项目内容是两行的问题
+                    if not '所购' in xm_text:
+                        line = asp_details_table[-1]
+                        line[0] += xm_text
+                        asp_details_table[-1] = line
+                        continue
+                    # print(xm_text)
+                    anchor_1 = [bbox_ytzje[0], bbox[1], bbox_ytzje[2], bbox[3],
+                                bbox_ytzje[4], bbox[5], bbox_ytzje[6], bbox[7]]
+                    _iou, _key = self.get_top_iou(poly=anchor_1, ocr_result=self.ocr_results[page_num])
+                    bbox, ytzje_text = self.ocr_results[page_num][_key]
+                    # print(ytzje_text)
+                    anchor_2 = [bbox_dkbj[0], bbox[1], bbox_dkbj[2], bbox[3],
+                                bbox_dkbj[4], bbox[5], bbox_dkbj[6], bbox[7]]
+                    _iou, _key = self.get_top_iou(poly=anchor_2, ocr_result=self.ocr_results[page_num])
+                    bbox, dkbj_text = self.ocr_results[page_num][_key]
+                    # print(dkbj_text)
+                    if xm_text == ytzje_text:
+                        xm_text, ytzje_text = xm_text.split(' ')
+                    line = [xm_text, ytzje_text, dkbj_text]
+                    asp_details_table.append(line)
+                else:
+                    break
+        
+        if bbox_total:
+            anchor = [bbox_dkbj[0], bbox_total[1], bbox_dkbj[2], bbox_total[3],
+                    bbox_dkbj[4], bbox_total[5], bbox_dkbj[6], bbox_total[7]]
+            _iou, _key = self.get_top_iou(poly=anchor, ocr_result=self.ocr_results[page_num])
+            bbox, total_text = self.ocr_results[page_num][_key]
+            asp_details_table.append(['附加产品融资贷款本金总金额:', '', total_text])
+        asp_details_table_term['words'] = asp_details_table
+
+        return asp_details_table_term
+
+    def get_signature(self):
+        signature = self.item.copy()
+
+        for block in self.pdf_info['0']['blocks']:
+            if block['type'] != 0:
+                continue
+            for line in block['lines']:
+                for span in line['spans']:
+                    bbox, text = span['bbox'], span['text']
+                    if '签署日期' in text:
+                        words = text
+                        signature['words'] = words
+                        signature['position'] = bbox
+        return signature
+
+    def get_somebody(self, top, bottom):
+        # 指定上下边界后,返回上下边界内的客户信息
+        _name = self.item.copy()
+        _id = self.item.copy()
+        # 只看第一页，先划定上下边界
+        y_top = 0
+        y_bottom = 0
+        for block in self.pdf_info['1']['blocks']:
+            if block['type'] != 0:
+                continue
+            for line in block['lines']:
+                for span in line['spans']:
+                    bbox, text = span['bbox'], span['text']
+                    if top in text:
+                        y_top = bbox[3]
+                    if bottom in text:
+                        y_bottom = bbox[3]
+        for block in self.pdf_info['1']['blocks']:
+            if block['type'] != 0:
+                continue
+            for line in block['lines']:
+                for span in line['spans']:
+                    bbox, text = span['bbox'], span['text']
+                    if y_top < bbox[3] < y_bottom:
+                        # print(top, bottom, text)
+                        if '姓名/名称' in text:
+                            words = text.split('：')[-1]
+                            _name['position'] = bbox
+                            _name['words'] = words
+                        if '自然人身份证件号码/法人执照号码' in text:
+                            words = text.split('：')[-1]
+                            _id['position'] = bbox
+                            _id['words'] = words
+        return _name, _id
+
+    def get_seller(self):
+        seller = self.item.copy()
+        # 先找到 key
+        anchor_bbox = None
+        for block in self.pdf_info['1']['blocks']:
+            if block['type'] != 0:
+                continue
+            for line in block['lines']:
+                for span in line['spans']:
+                    bbox, text = span['bbox'], span['text']
+                    if text in ['经销商', '车辆销售方']:
+                        anchor_bbox = bbox
+        # 当找到了 key, 则根据 key 去匹配 value
+        if anchor_bbox:
+            half_width = self.pdf_info['1']['width'] * 0.5
+            for block in self.pdf_info['1']['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if anchor_bbox[2]<np.mean(bbox[::2])<half_width and \
+                            anchor_bbox[1]<np.mean(bbox[1::2])<anchor_bbox[3]:
+                            seller['position'] = bbox
+                            seller['words'] = text
+        return seller
+
+    def get_borrower_collection_account(self):
+        account = self.item.copy()
+        account_name = self.item.copy()
+        account_bank = self.item.copy()
+        all_text = ''
+        for block in self.pdf_info['1']['blocks']:
+            if block['type'] != 0:
+                continue
+            for line in block['lines']:
+                for span in line['spans']:
+                    bbox, text = span['bbox'], span['text']
+                    all_text += text
+        # 首先确定账户信息是哪种,我们只输出非另行通知的格式
+        if '借款人收款账户' in all_text:
+            all_text = all_text.replace('　', '').replace(' ', '')
+            matchs_1 = re.findall(r'账号：(.*?)户名', all_text)
+            if matchs_1:
+                words = matchs_1[0]
+                for block in self.pdf_info['1']['blocks']:
+                    if block['type'] != 0:
+                        continue
+                    for line in block['lines']:
+                        for span in line['spans']:
+                            bbox, text = span['bbox'], span['text']
+                            if f'{words}' in text:
+                                account['position'] = bbox
+                                account['words'] = words
+            matchs_2 = re.findall(r'户名：(.*?)开户行', all_text)
+            if matchs_2:
+                words = matchs_2[0]
+                for block in self.pdf_info['1']['blocks']:
+                    if block['type'] != 0:
+                        continue
+                    for line in block['lines']:
+                        for span in line['spans']:
+                            bbox, text = span['bbox'], span['text']
+                            if f'{words}' in text:
+                                account_name['position'] = bbox
+                                account_name['words'] = words
+            matchs_3 = re.findall(r'开户行：(.*?)借款人', all_text)
+            if matchs_3:
+                words = matchs_3[0]
+                for block in self.pdf_info['1']['blocks']:
+                    if block['type'] != 0:
+                        continue
+                    for line in block['lines']:
+                        for span in line['spans']:
+                            bbox, text = span['bbox'], span['text']
+                            if f'{words}' in text:
+                                account_bank['position'] = bbox
+                                account_bank['words'] = words
+        return account, account_name, account_bank
+
+    def get_payback_account(self):
+        account = self.item.copy()
+        account_name = self.item.copy()
+        account_bank = self.item.copy()
+        all_text = ''
+        for block in self.pdf_info['1']['blocks']:
+            if block['type'] != 0:
+                continue
+            for line in block['lines']:
+                for span in line['spans']:
+                    bbox, text = span['bbox'], span['text']
+                    all_text += text
+        # 首先确定账户信息是哪种,我们只输出非另行通知的格式
+        if '(13) 还款账户' in all_text:
+            all_text = all_text.split('(13) 还款账户')[-1]
+            all_text = all_text.replace('　', '').replace(' ', '')
+            matchs_1 = re.findall(r'账号：(.*?)户名', all_text)
+            if matchs_1:
+                words = matchs_1[0]
+                for block in self.pdf_info['1']['blocks']:
+                    if block['type'] != 0:
+                        continue
+                    for line in block['lines']:
+                        for span in line['spans']:
+                            bbox, text = span['bbox'], span['text']
+                            if f'{words}' in text:
+                                account['position'] = bbox
+                                account['words'] = words
+            matchs_2 = re.findall(r'户名：(.*?)开户行', all_text)
+            if matchs_2:
+                words = matchs_2[0]
+                for block in self.pdf_info['1']['blocks']:
+                    if block['type'] != 0:
+                        continue
+                    for line in block['lines']:
+                        for span in line['spans']:
+                            bbox, text = span['bbox'], span['text']
+                            if f'{words}' in text:
+                                account_name['position'] = bbox
+                                account_name['words'] = words
+            matchs_3 = re.findall(r'开户行：(.*?)；', all_text)
+            if matchs_3:
+                words = matchs_3[0]
+                for block in self.pdf_info['1']['blocks']:
+                    if block['type'] != 0:
+                        continue
+                    for line in block['lines']:
+                        for span in line['spans']:
+                            bbox, text = span['bbox'], span['text']
+                            if f'开户行：{words}；' in text.replace('　', ''):
+                                account_bank['position'] = bbox
+                                account_bank['words'] = words
+        return account, account_name, account_bank
+
+    def get_repayment_schedule(self):
+        repayment_schedule = self.item.copy()
+        # 只看第二页
+        repayment_schedule_table = []
+        repayment_schedule_text_list = []
+        table = False
+        for block in self.pdf_info['2']['blocks']:
+            if block['type'] != 0:
+                continue
+            for line in block['lines']:
+                for span in line['spans']:
+                    bbox, text = span['bbox'], span['text']
+                    if '序号' == text:
+                        table = True
+                    if '以上表格中所列的序号并非还款期数' in text:
+                        table = False
+                    if table == True:
+                        repayment_schedule_text_list.append(text)
+
+        for i in range(len(repayment_schedule_text_list)//5):
+
+            line = []
+            # 5表示5列的意思
+            for j in range(5):
+                line.append(repayment_schedule_text_list[i*5+j])
+
+            if str(i+1) == line[1]:
+                break
+
+            repayment_schedule_table.append(line)
+
+        if len(repayment_schedule_table) > 0:
+            repayment_schedule['words'] = repayment_schedule_table
+        return repayment_schedule
+
+    def get_signature_role_1(self):
+        signature_role_1 = self.init_item.copy()
+        # 先定位签字区域
+        texts = []
+        boxes = []
+        page_num = None
+        position = None
+        words = None
+        region = False
+        for i in list(self.pdf_info.keys()):
+            for block in self.pdf_info[i]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if '借款人(抵押人)' in text:
+                            region = True
+                        if '日期' in text:
+                            region = False
+                        if region == True:
+                            page_num = i
+                            texts.append(text)
+                            boxes.append(bbox)
+        if len(texts) > 4:
+            words = '有'
+        else:
+            words = '无'
+        boxes = np.array(boxes).reshape((-1, 2))
+        position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])]
+        signature_role_1['page_num'] = page_num
+        signature_role_1['position'] = position
+        signature_role_1['words'] = words
+        return signature_role_1
+
+    def get_signature_role_2(self):
+        signature_role_2 = self.init_item.copy()
+        # 先定位签字区域
+        texts = []
+        boxes = []
+        page_num = None
+        position = None
+        words = None
+        region = False
+        for i in list(self.pdf_info.keys()):
+            for block in self.pdf_info[i]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if '共同借款人(共同抵押人)' in text:
+                            region = True
+                        if '日期' in text:
+                            region = False
+                        if region == True:
+                            page_num = i
+                            texts.append(text)
+                            boxes.append(bbox)
+        if len(texts) > 4:
+            words = '有'
+        else:
+            words = '无'
+        boxes = np.array(boxes).reshape((-1, 2))
+        position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])]
+        signature_role_2['page_num'] = page_num
+        signature_role_2['position'] = position
+        signature_role_2['words'] = words
+        return signature_role_2
+
+    def get_signature_role_3(self):
+        signature_role_3 = self.init_item.copy()
+        # 先定位签字区域
+        texts = []
+        boxes = []
+        page_num = None
+        position = None
+        words = None
+        region = False
+        for i in list(self.pdf_info.keys()):
+            for block in self.pdf_info[i]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if '保证人1' in text and int(i) != 0:
+                            region = True
+                        if '日期' in text:
+                            region = False
+                        if region == True:
+                            page_num = i
+                            texts.append(text)
+                            boxes.append(bbox)
+        if len(texts) > 4:
+            words = '有'
+        else:
+            words = '无'
+        boxes = np.array(boxes).reshape((-1, 2))
+        position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])]
+        signature_role_3['page_num'] = page_num
+        signature_role_3['position'] = position
+        signature_role_3['words'] = words
+        return signature_role_3
+
+    def get_signature_role_4(self):
+        signature_role_4 = self.init_item.copy()
+        # 先定位签字区域
+        texts = []
+        boxes = []
+        page_num = None
+        position = None
+        words = None
+        region = False
+        for i in list(self.pdf_info.keys()):
+            for block in self.pdf_info[i]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if '保证人2' in text and int(i) != 0:
+                            region = True
+                        if '日期' in text:
+                            region = False
+                        if region == True:
+                            page_num = i
+                            texts.append(text)
+                            boxes.append(bbox)
+        if len(texts) > 4:
+            words = '有'
+        else:
+            words = '无'
+        boxes = np.array(boxes).reshape((-1, 2))
+        position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])]
+        signature_role_4['page_num'] = page_num
+        signature_role_4['position'] = position
+        signature_role_4['words'] = words
+        return signature_role_4
+
+    def get_signature_role_5(self):
+        signature_role_5 = self.init_item.copy()
+        # 先定位签字区域
+        texts = []
+        boxes = []
+        page_num = None
+        position = None
+        words = None
+        region = False
+        for i in list(self.pdf_info.keys()):
+            for block in self.pdf_info[i]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if '见证人签字' in text and int(i) != 0:
+                            region = True
+                        if '年' in text:
+                            region = False
+                        if region == True:
+                            page_num = i
+                            texts.append(text)
+                            boxes.append(bbox)
+        print(texts)
+        if len(texts) > 4:
+            words = '有'
+        else:
+            words = '无'
+        boxes = np.array(boxes).reshape((-1, 2))
+        position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])]
+        signature_role_5['page_num'] = page_num
+        signature_role_5['position'] = position
+        signature_role_5['words'] = words
+        return signature_role_5
+
+    def get_last_page_signature(self, page_num, top, bottom):
+        signature_name = self.item.copy()
+        signature_date = self.item.copy()
+        anchor_top = None
+        anchor_bottom = None
+        for block in self.pdf_info[page_num]['blocks']:
+            if block['type'] != 0:
+                continue
+            for line in block['lines']:
+                for span in line['spans']:
+                    bbox, text = span['bbox'], span['text']
+                    if top in text:
+                        anchor_top = bbox[1]
+                    if bottom in text:
+                        anchor_bottom = bbox[1]
+        # print(top, anchor_top, anchor_bottom)
+        if anchor_top is not None and anchor_bottom is not None:
+            for block in self.pdf_info[page_num]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if '签署日期' in text and int(anchor_top)<np.mean(bbox[1::2])<int(anchor_bottom):
+                            name = text.split(' ')[0]
+                            date = text.split(':')[-1]
+                            signature_name['words'] = name
+                            signature_name['position'] = bbox
+                            signature_date['words'] = date
+                            signature_date['position'] = bbox
+        return signature_name, signature_date
+
+    def get_info(self):
+        """
+            block['type'] == 0 : 表示该元素为图片
+        
+        Returns:
+            dict: Description
+        """
+
+        # 先判断是否为 ASP 产品
+        # 只看第一页，判断是否有 '附加产品融资贷款本金总金额' 这一句话，若有则为 ASP 产品
+        # print(self.pdf_info['0']['blocks'])
+        # for block in self.pdf_info['0']['blocks']:
+        #     if block['type'] != 0:
+        #         continue
+        #     for line in block['lines']:
+        #         for span in line['spans']:
+        #             bbox, text = span['bbox'], span['text']
+        #             if '附加产品融资贷款本金总金额' == text:
+        #                 self.is_asp = True
+        for key in self.ocr_results['0']:
+            bbox, text = self.ocr_results['0'][key]
+            if '附加产品融资贷款本金总金额' in text:
+                self.is_asp = True
+
+        self.gen_init_result(self.is_asp)
+
+        if len(list(self.ocr_results.keys())) <= 8:             # 8.5 版本客户提供的样本出现串页的情况，暂时无法识别
+            # Page 1
+            # 找合同编号
+            contract_no = self.get_contract_no(page_num='0')
+            # print(contract_no)
+            self.init_result['page_1']['合同编号'] = contract_no
+            # 所购车辆价格
+            vehicle_price = self.get_vehicle_price()
+            # print(vehicle_price)
+            self.init_result['page_1']['所购车辆价格'] = vehicle_price
+            # 车架号
+            vin = self.get_vin()
+            # print(vin)
+            self.init_result['page_1']['车架号'] = vin
+            # 贷款本金金额(如果是 ASP产品)则'贷款本金金额'项目中包含'车辆贷款本金金额'和'附加产品融资贷款本金总金额'两个项目
+            upper, lower, asp_1, asp_2 = self.get_loan_principal()
+            # print(upper, lower, asp_1, asp_2)
+            self.init_result['page_1']['贷款本金金额']['大写'] = upper
+            self.init_result['page_1']['贷款本金金额']['小写'] = lower
+            self.init_result['page_1']['贷款本金金额']['车辆贷款本金金额'] = asp_1
+            self.init_result['page_1']['贷款本金金额']['附加产品融资贷款本金总金额'] = asp_2
+            # 贷款期限
+            loan_term = self.get_loan_term()
+            # print(loan_term)
+            self.init_result['page_1']['贷款期限'] = loan_term
+            # 附加产品融资贷款本金总金额明细（ASP-表格）
+            asp_details_table = self.get_asp_details(page_num='0')
+            # print(asp_details_table)
+            self.init_result['page_1']['附加产品融资贷款本金总金额明细'] = asp_details_table
+            # 借款人签字及时间
+            signature = self.get_signature()
+            # print(signature)
+            self.init_result['page_1']['借款人签字及时间'] = signature
+            #######################################
+            # Page 2
+            # 找合同编号
+            contract_no = self.get_contract_no(page_num='0')
+            # print(contract_no)
+            self.init_result['page_2']['合同编号'] = contract_no
+            # 找借款人及抵押人(地址字段原本有空格)
+            borrower_name, borrower_id = self.get_somebody(top='借款人及抵押人：', bottom='共同借款人：')
+            # 这是为了同时兼容 8.1 版本
+            if borrower_name['words'] == None:
+                borrower_name, borrower_id = self.get_somebody(top='借款人及抵押人：', bottom='共同借款人及共同抵押人：')
+            # 这是为了兼容车贷分离版本
+            if borrower_name['words'] == None:
+                borrower_name, borrower_id = self.get_somebody(top='借款人：', bottom='共同借款人及抵押人：')
+            # print(borrower_name, borrower_id)
+            self.init_result['page_2']['借款人及抵押人']['name'] = borrower_name
+            self.init_result['page_2']['借款人及抵押人']['id'] = borrower_id
+            # 找共同借款人及共同抵押人
+            co_borrower_name, co_borrower_id = self.get_somebody(top='共同借款人：', bottom='保证人1：')
+            # print(co_borrower_name, co_borrower_id)
+            self.init_result['page_2']['共同借款人及共同抵押人']['name'] = co_borrower_name
+            self.init_result['page_2']['共同借款人及共同抵押人']['id'] = co_borrower_id
+            # 保证人1
+            first_guarantor_name, first_guarantor_id = self.get_somebody(top='保证人1：', bottom='保证人2：')
+            self.init_result['page_2']['保证人1']['name'] = first_guarantor_name
+            self.init_result['page_2']['保证人1']['id'] = first_guarantor_id
+            # 保证人2
+            second_guarantor_name, second_guarantor_id = self.get_somebody(top='保证人2：', bottom='第一章')
+            self.init_result['page_2']['保证人2']['name'] = second_guarantor_name
+            self.init_result['page_2']['保证人2']['id'] = second_guarantor_id
+            # 所购车辆价格
+            vehicle_price = self.get_vehicle_price(page_num='1')
+            # print(vehicle_price)
+            self.init_result['page_2']['所购车辆价格'] = vehicle_price
+            # 车架号
+            vin = self.get_vin(page_num='1')
+            # print(vin)
+            self.init_result['page_2']['车架号'] = vin
+            # 经销商
+            seller = self.get_seller()
+            # print(seller)
+            self.init_result['page_2']['经销商'] = seller
+            # 贷款本金金额(如果是 ASP产品)则'贷款本金金额'项目中包含'车辆贷款本金金额'和'附加产品融资贷款本金总金额'两个项目
+            upper, lower, asp_1, asp_2 = self.get_loan_principal(page_num='1')
+            # print(upper, lower, asp_1, asp_2)
+            self.init_result['page_2']['贷款本金金额']['大写'] = upper
+            self.init_result['page_2']['贷款本金金额']['小写'] = lower
+            self.init_result['page_2']['贷款本金金额']['车辆贷款本金金额'] = asp_1
+            self.init_result['page_2']['贷款本金金额']['附加产品融资贷款本金总金额'] = asp_2
+            # 贷款期限
+            loan_term = self.get_loan_term(page_num='1')
+            # print(loan_term)
+            self.init_result['page_2']['贷款期限'] = loan_term
+            # 本合同当期的标准利率
+            standard_rate = self.get_standard_rate(page_num='1')
+            # print(standard_rate)
+            self.init_result['page_2']['标准利率'] = standard_rate
+            # 202212 release 新增借款人收款账户
+            account, account_name, account_bank = self.get_borrower_collection_account()
+            # print(account, account_name, account_bank)
+            self.init_result['page_2']['借款人收款账户']['账号'] = account
+            self.init_result['page_2']['借款人收款账户']['户名'] = account_name
+            self.init_result['page_2']['借款人收款账户']['开户行'] = account_bank
+            # 还款账户
+            account, account_name, account_bank = self.get_payback_account()
+            # print(account, account_name, account_bank)
+            self.init_result['page_2']['还款账户']['账号'] = account
+            self.init_result['page_2']['还款账户']['户名'] = account_name
+            self.init_result['page_2']['还款账户']['开户行'] = account_bank
+            #######################################
+            # Page 3
+            # 找合同编号
+            contract_no = self.get_contract_no(page_num='2')
+            self.init_result['page_3']['合同编号'] = contract_no
+            # 还款计划表（表格）
+            repayment_schedule_table = self.get_repayment_schedule()
+            # print(repayment_schedule_table)
+            self.init_result['page_3']['还款计划表'] = repayment_schedule_table
+            #######################################
+            # Page 4
+            # 找合同编号
+            contract_no = self.get_contract_no(page_num='3')
+            # print(contract_no)
+            self.init_result['page_4']['合同编号'] = contract_no
+            # 附加产品融资贷款本金总金额明细（ASP-表格）
+            asp_details_table = self.get_asp_details(page_num='3')
+            # print(asp_details_table)
+            self.init_result['page_4']['附加产品融资贷款本金总金额明细'] = asp_details_table
+            #######################################
+            # Page 5
+            # 找合同编号
+            contract_no = self.get_contract_no(page_num='4')
+            # print(contract_no)
+            self.init_result['page_5']['合同编号'] = contract_no
+            #######################################
+            # Page 6
+            # 找合同编号
+            contract_no = self.get_contract_no(page_num='5')
+            # print(contract_no)
+            self.init_result['page_6']['合同编号'] = contract_no
+            # Page 7
+            # 找合同编号
+            contract_no = self.get_contract_no(page_num='6')
+            self.init_result['page_7']['合同编号'] = contract_no
+            # Page 8
+            # 找合同编号
+            contract_no = self.get_contract_no(page_num='7')
+            self.init_result['page_8']['合同编号'] = contract_no
+            signature_name, signature_date = self.get_last_page_signature(page_num='7',
+                                                top='合同编号', bottom='共同借款人')
+            if signature_name['words'] == None:
+                signature_name, signature_date = self.get_last_page_signature(page_num='7',
+                                                top='合同编号', bottom='共同借款人（抵押人）')
+            # print(signature_name, signature_date)
+            self.init_result['page_8']['主借人签字']['签字'] = signature_name
+            self.init_result['page_8']['主借人签字']['日期'] = signature_date
+            signature_name, signature_date = self.get_last_page_signature(page_num='7',
+                                                top='共同借款人', bottom='保证人1')
+            if signature_name['words'] == None:
+                signature_name, signature_date = self.get_last_page_signature(page_num='7',
+                                                top='共同借款人（抵押人）', bottom='保证人1')
+            # print(signature_name, signature_date)
+            self.init_result['page_8']['共借人签字']['签字'] = signature_name
+            self.init_result['page_8']['共借人签字']['日期'] = signature_date
+            signature_name, signature_date = self.get_last_page_signature(page_num='7',
+                                                top='保证人1', bottom='保证人2')
+            self.init_result['page_8']['保证人1签字']['签字'] = signature_name
+            self.init_result['page_8']['保证人1签字']['日期'] = signature_date
+            signature_name, signature_date = self.get_last_page_signature(page_num='7',
+                                                top='保证人2', bottom='在本人面前亲笔签署本合同')
+            self.init_result['page_8']['保证人2签字']['签字'] = signature_name
+            self.init_result['page_8']['保证人2签字']['日期'] = signature_date
+            signature_name, signature_date = self.get_last_page_signature(page_num='7',
+                                                top='在本人面前亲笔签署本合同', bottom='以下无正文')
+            # print(signature_name, signature_date)
+            self.init_result['page_8']['见证人签字']['签字'] = signature_name
+            self.init_result['page_8']['见证人签字']['日期'] = signature_date
+
+        # 重新定制输出
+        new_results = {"is_asp": self.is_asp,
+                       "page_info": self.init_result
+        }
+        return new_results
\ No newline at end of file
--- a/src/common/electronic_hil_contract/get_char_fsm.py 0 → 100644
View file @8d595a3
+++ b/src/common/electronic_hil_contract/get_char_fsm.py 0 → 100644
View file @8d595a3
+import re
+import numpy as np
+from fuzzywuzzy import fuzz
+from shapely.geometry import Polygon
+
+def caculate_iou(g, p):
+    g = Polygon(np.array(g).reshape((-1, 2)))
+    p = Polygon(np.array(p).reshape((-1, 2)))
+    inter = Polygon(g).intersection(Polygon(p)).area
+    union = g.area + p.area - inter
+    return inter/union
+
+def get_table_info(bbox_1, bbox_2, ocr_result):
+    anchor = [bbox_2[0], bbox_1[1], bbox_2[2], bbox_1[3],
+            bbox_2[4], bbox_1[5], bbox_2[6], bbox_1[7]]
+    table_info = ''
+    for span in ocr_result:
+        iou = caculate_iou(anchor, span[0])
+        if iou > 0:
+            table_info = span[1]
+    return table_info
+
+class Finder:
+
+    def __init__(self, pdf_info):
+        self.pdf_info = pdf_info
+        self.item = {"words": None,
+                     "page": None,
+                     "position": None,
+                    }
+        # 格式化算法输出
+        self.init_result = {"合同编号": self.item,
+                            "承租人-姓名": self.item,
+                            "承租人-证件号码": self.item,
+                            "承租人-法定代表人或授权代表": self.item,
+
+                            "共同承租人-姓名": self.item,
+                            "共同承租人-证件号码": self.item,
+                            "共同承租人-法定代表人或授权代表": self.item,
+
+                            "保证人1-姓名": self.item,
+                            "保证人1-证件号码": self.item,
+                            "保证人1-法定代表人或授权代表": self.item,
+
+                            "保证人2-姓名": self.item,
+                            "保证人2-证件号码": self.item,
+                            "保证人2-法定代表人或授权代表": self.item,
+                            "保证人3-姓名": self.item,
+                            "保证人3-证件号码": self.item,
+                            "保证人3-法定代表人或授权代表": self.item,
+                            "合同编号（正文）": self.item,
+                            "车辆识别代码": self.item,
+                            "车辆卖方（经销商）": self.item,
+                            "车辆原始销售价格（《机动车销售统一发票》所列金额）": self.item,
+                            "车辆附加产品明细表": self.item,
+                            "融资成本总额": self.item,
+                            "租期": self.item,
+                            "付款计划表": self.item,
+                            "承租人收款账户-户名": self.item,
+                            "承租人收款账户-银行账号": self.item,
+                            "承租人收款账户-开户行": self.item,
+                            "承租人扣款账户-户名": self.item,
+                            "承租人扣款账户-银行账号": self.item,
+                            "承租人扣款账户-开户行": self.item,
+                            "签字页-承租人姓名": self.item,
+                            "签字页-承租人签章": self.item,
+
+                            "签字页-共同承租人姓名": self.item,
+                            "签字页-共同承租人签章": self.item,
+
+                            "签字页-保证人1姓名": self.item,
+                            "签字页-保证人1签章": self.item,
+
+                            "签字页-保证人2姓名": self.item,
+                            "签字页-保证人2签章": self.item,
+                            "签字页-保证人3姓名": self.item,
+                            "签字页-保证人3签章": self.item,
+        }
+
+        # 格式化输出 车辆处置协议 要是别的字段
+        self.init_result_1 = {"合同编号": self.item,
+                              "承租人-姓名": self.item,
+                              "承租人-证件号码": self.item,
+                              "销售经销商": self.item,
+                              "合同编号（正文）": self.item,
+                              "签字页-承租人姓名": self.item,
+                              "签字页-承租人证件号码": self.item,
+                              "签字页-承租人签章": self.item,
+                              "签字页-销售经销商": self.item,
+                              "签字页-销售经销商签章": self.item,
+        }
+
+        # 格式化输出 车辆租赁抵押合同
+        self.init_result_2 = {"合同编号": self.item,
+                              "合同编号（正文）": self.item,
+                              "抵押人姓名/名称": self.item,
+                              "抵押人证件号码": self.item,
+                              "抵押人配偶姓名/名称": self.item,
+                              "抵押人配偶证件号码": self.item,
+                              "车辆识别代码": self.item,
+                              "租金总额": self.item,
+                              "融资租赁期限": self.item,
+                              "签字页-抵押人姓名": self.item,
+                              "签字页-抵押人签章": self.item,
+                              "签字页-抵押人配偶姓名": self.item,
+                              "签字页-抵押人配偶签章": self.item,
+        }
+
+    def get_contract_no(self, page_num):
+        """传入页码,查看该页码右上角的编号
+        
+        Args:
+            page_num (string): 
+        
+        Returns:
+            sting: 
+        """
+        contract_no = self.item.copy()
+        # 只看第一页
+        for block in self.pdf_info[page_num]['blocks']:
+            if block['type'] != 0:
+                continue
+            for line in block['lines']:
+                for span in line['spans']:
+                    bbox, text = span['bbox'], span['text']
+                    if '合同编号：' in text:
+                        words = text.split('：')[-1]
+                        contract_no['position'] = bbox
+                        contract_no['page'] = page_num
+                        contract_no['words'] = words
+        if contract_no['words'] == '':
+            for block in self.pdf_info[page_num]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if bbox[1] < contract_no['position'][3] and 'CH' in text:
+                            contract_no['position'] = bbox
+                            contract_no['page'] = page_num
+                            contract_no['words'] = text
+        return contract_no
+
+    def get_vehicle_price(self, page_num='0'):
+        vehicle_price = self.item.copy()
+        for block in self.pdf_info[page_num]['blocks']:
+            if block['type'] != 0:
+                continue
+            for line in block['lines']:
+                for span in line['spans']:
+                    bbox, text = span['bbox'], span['text']
+                    if '所购车辆价格为人民币' in text:
+                        words = text.split('币')[-1]
+                        vehicle_price['position'] = bbox
+                        vehicle_price['words'] = words
+        return vehicle_price
+
+    def get_contract_no_one(self):
+        # 查找正文中的合同编号,有可能存在换行的情况
+        contract_no = self.item.copy()
+        for pno in self.pdf_info:
+            all_text = ''
+            for block in self.pdf_info[pno]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        all_text += text
+            all_text = all_text.replace(' ', '')
+            matchObj = re.search(r'（合同编号：\[(.*?)\]）', all_text)
+            if matchObj:
+                words = matchObj.group(1)
+                contract_no['position'] = None
+                contract_no['page'] = pno
+                # contract_no['words'] = words
+                contract_no['words'] = re.sub("\s", "", words).replace("）", "")
+                return contract_no
+
+            matchObj = re.search(r'编号为(.*?)的', all_text)
+            if matchObj:
+                words = matchObj.group(1).strip()
+                contract_no['position'] = None
+                contract_no['page'] = pno
+                # contract_no['words'] = words
+                contract_no['words'] = re.sub("\s", "", words).replace("）", "")
+                return contract_no
+
+            matchObj = re.search(r'编号为(.*?)）的', all_text)
+            if matchObj:
+                words = matchObj.group(1).strip()
+                contract_no['position'] = None
+                contract_no['page'] = pno
+                # contract_no['words'] = words
+                contract_no['words'] = re.sub("\s", "", words)
+        return contract_no
+
+    def get_key_value(self, key, page_num=None):
+        value = self.item.copy()
+        if page_num is not None:
+            pno = page_num
+            for block in self.pdf_info[pno]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if key in text:
+                            words = text.split('：')[-1].replace("。", "")
+                            value['position'] = bbox
+                            value['page'] = pno
+                            # value['words'] = words
+                            value['words'] = re.sub("\s", "", words)
+        else:
+            for pno in self.pdf_info:
+                for block in self.pdf_info[pno]['blocks']:
+                    if block['type'] != 0:
+                        continue
+                    for line in block['lines']:
+                        for span in line['spans']:
+                            bbox, text = span['bbox'], span['text']
+                            if key in text:
+                                # print(self.pdf_info[pno])
+                                words = text.split('：')[-1].replace("。", "")
+                                value['position'] = bbox
+                                value['page'] = pno
+                                # value['words'] = words
+                                value['words'] = re.sub("\s", "", words)
+        return value
+
+    def get_loan_principal(self, page_num='0'):
+        chinese_keywords = ['壹', '贰', '叁', '肆', '伍', '陆', '柒', '捌', '玖', '拾',
+                            '佰', '仟', '万', '亿', '元', '角', '分', '零', '整']
+        upper = self.item.copy()
+        lower = self.item.copy()
+        asp_1 = self.item.copy()
+        asp_2 = self.item.copy()
+        anchor_bbox = None
+        for block in self.pdf_info[page_num]['blocks']:
+            if block['type'] != 0:
+                continue
+            for line in block['lines']:
+                for span in line['spans']:
+                    bbox, text = span['bbox'], span['text']
+                    if fuzz.ratio(''.join(chinese_keywords), text) > 15:
+                        text = text.split('：')[-1].strip()
+                        upper['position'] = bbox
+                        upper['words'] = text
+                    if '小写：¥' in text:
+                        words = text.split('¥')[-1].strip()
+                        lower['position'] = bbox
+                        lower['words'] = words
+                    if '附加产品融资贷款本金总金额' == text:
+                        anchor_bbox = bbox
+        if anchor_bbox:
+            for block in self.pdf_info[page_num]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if np.mean(bbox[1::2]) < np.mean(anchor_bbox[1::2]) and '人民币：小写：' in text:
+                            words = re.findall(r'人民币：小写：\[(.*)\]', text)[0]
+                            asp_1['position'] = bbox
+                            asp_1['words'] = words
+                        if np.mean(bbox[1::2]) > np.mean(anchor_bbox[1::2]) and '人民币：小写：' in text:
+                            words = re.findall(r'人民币：小写：\[(.*)\]', text)[0]
+                            asp_2['position'] = bbox
+                            asp_2['words'] = words
+        return upper, lower, asp_1, asp_2
+
+    def get_loan_term(self, page_num='0'):
+        loan_term = self.item.copy()
+        all_text = ''
+        for block in self.pdf_info[page_num]['blocks']:
+            if block['type'] != 0:
+                continue
+            for line in block['lines']:
+                for span in line['spans']:
+                    bbox, text = span['bbox'], span['text']
+                    all_text += text
+        matchs = re.search(r'贷款期限(\d+)个月', all_text)
+        if matchs:
+            words = matchs.group(1)
+            for block in self.pdf_info[page_num]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if f'{words}个月' in text:
+                            loan_term['position'] = bbox
+                            loan_term['words'] = words
+        return loan_term
+
+    def get_asp_details(self, page_num):
+        asp_details_table_term = self.item.copy()
+
+        asp_details_table = []
+        asp_details_text_list = []
+        table = False
+        for block in self.pdf_info[page_num]['blocks']:
+            if block['type'] != 0:
+                continue
+            for line in block['lines']:
+                for span in line['spans']:
+                    bbox, text = span['bbox'], span['text']
+                    if '附加产品融资贷款本金总金额明细' == text:
+                        table = True
+                    if '第二条' in text or '征信管理' in text:
+                        table = False
+                    if table == True:
+                        asp_details_text_list.append(text)
+
+        for i in range((len(asp_details_text_list)+2)//3):
+
+            line = []
+            if i == 0:
+                line = [asp_details_text_list[0]]
+            else:
+                for j in range(3):
+                    line.append(asp_details_text_list[i*3-2+j])
+
+            asp_details_table.append(line)
+
+        if len(asp_details_table) > 0:
+            asp_details_table_term['words'] = asp_details_table
+        return asp_details_table_term
+
+    def get_signature(self):
+        signature = self.item.copy()
+
+        for block in self.pdf_info['0']['blocks']:
+            if block['type'] != 0:
+                continue
+            for line in block['lines']:
+                for span in line['spans']:
+                    bbox, text = span['bbox'], span['text']
+                    if '签署日期' in text:
+                        words = text
+                        signature['words'] = words
+                        signature['position'] = bbox
+        return signature
+
+    def get_somebody(self, top, bottom):
+        # 指定上下边界后,返回上下边界内的客户信息
+        _name = self.item.copy()
+        _id = self.item.copy()
+        # 只看第一页，先划定上下边界
+        y_top = 0
+        y_bottom = 0
+        for block in self.pdf_info['1']['blocks']:
+            if block['type'] != 0:
+                continue
+            for line in block['lines']:
+                for span in line['spans']:
+                    bbox, text = span['bbox'], span['text']
+                    if top in text:
+                        y_top = bbox[3]
+                    if bottom in text:
+                        y_bottom = bbox[3]
+        for block in self.pdf_info['1']['blocks']:
+            if block['type'] != 0:
+                continue
+            for line in block['lines']:
+                for span in line['spans']:
+                    bbox, text = span['bbox'], span['text']
+                    if y_top < bbox[3] < y_bottom:
+                        if '姓名/名称' in text:
+                            words = text.split('：')[-1]
+                            _name['position'] = bbox
+                            _name['words'] = words
+                        if '自然人身份证件号码/法人执照号码' in text:
+                            words = text.split('：')[-1]
+                            _id['position'] = bbox
+                            _id['words'] = words
+        return _name, _id
+
+    def get_seller(self):
+        seller = self.item.copy()
+        # 先找到 key
+        anchor_bbox = None
+        for block in self.pdf_info['1']['blocks']:
+            if block['type'] != 0:
+                continue
+            for line in block['lines']:
+                for span in line['spans']:
+                    bbox, text = span['bbox'], span['text']
+                    if '经销商' == text:
+                        anchor_bbox = bbox
+        # 当找到了 key, 则根据 key 去匹配 value
+        if anchor_bbox:
+            half_width = self.pdf_info['1']['width'] * 0.5
+            for block in self.pdf_info['1']['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if anchor_bbox[2]<np.mean(bbox[::2])<half_width and \
+                            anchor_bbox[1]<np.mean(bbox[1::2])<anchor_bbox[3]:
+                            seller['position'] = bbox
+                            seller['words'] = text
+        return seller
+
+    def get_payback_account(self):
+        account = self.item.copy()
+        account_name = self.item.copy()
+        account_bank = self.item.copy()
+        all_text = ''
+        for block in self.pdf_info['1']['blocks']:
+            if block['type'] != 0:
+                continue
+            for line in block['lines']:
+                for span in line['spans']:
+                    bbox, text = span['bbox'], span['text']
+                    all_text += text
+        # 首先确定账户信息是哪种,我们只输出非另行通知的格式
+        if '☑账号' in all_text:
+            all_text = all_text.replace('　', '')
+            matchs_1 = re.findall(r'账号：(.*)户名', all_text)
+            if matchs_1:
+                words = matchs_1[0]
+                for block in self.pdf_info['1']['blocks']:
+                    if block['type'] != 0:
+                        continue
+                    for line in block['lines']:
+                        for span in line['spans']:
+                            bbox, text = span['bbox'], span['text']
+                            if f'{words}' in text:
+                                account['position'] = bbox
+                                account['words'] = words
+            matchs_2 = re.findall(r'户名：(.*)开户行', all_text)
+            if matchs_2:
+                words = matchs_2[0]
+                for block in self.pdf_info['1']['blocks']:
+                    if block['type'] != 0:
+                        continue
+                    for line in block['lines']:
+                        for span in line['spans']:
+                            bbox, text = span['bbox'], span['text']
+                            if f'{words}' in text:
+                                account_name['position'] = bbox
+                                account_name['words'] = words
+            matchs_3 = re.findall(r'开户行：(.*)；', all_text)
+            if matchs_3:
+                words = matchs_3[0]
+                for block in self.pdf_info['1']['blocks']:
+                    if block['type'] != 0:
+                        continue
+                    for line in block['lines']:
+                        for span in line['spans']:
+                            bbox, text = span['bbox'], span['text']
+                            if f'开户行：{words}；' in text.replace('　', ''):
+                                account_bank['position'] = bbox
+                                account_bank['words'] = words
+        return account, account_name, account_bank
+
+    def get_repayment_schedule(self):
+        repayment_schedule = self.item.copy()
+
+        repayment_schedule_text_list = []
+        table = False
+        page = None
+        left = 0
+        right = 0
+        for pno in self.pdf_info:
+            for block in self.pdf_info[pno]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if '剩余融资' in text:
+                            right = bbox[2]
+                        if '以上表格中所列序号' in text:
+                            table = False
+                        if table == True:
+                            # 过滤汉字
+                            if re.compile(r'[\u4e00-\u9fff]').search(text):
+                                continue
+                            # 过滤 1. - 61. 这些标题
+                            if re.findall("\d+", text):
+                                if len(re.findall("\d+", text)) == 1:
+                                    continue
+                            if not left < bbox[0] < right:
+                                continue
+                            repayment_schedule_text_list.append(text)
+
+                        if text.strip() == "61.":
+                            page = pno
+                            table = True
+                            left = bbox[0]
+        # print("repayment_schedule_text_list = ", repayment_schedule_text_list)
+        # repayment_schedule_table = [['序号', '融资租赁成本', '融资租赁费用', '租金', '剩余融资租赁成本']]
+        repayment_schedule_table = [['序号', '租金']]
+        for i in range(len(repayment_schedule_text_list)//4):
+            line = [f'{i+1}.']
+            # 4表示4列的意思
+            for j in range(4):
+                line.append(repayment_schedule_text_list[i*4+j])
+
+            # 只保留序号和租金列
+            line = [line[0].replace('.', ''), line[3]]
+
+            repayment_schedule_table.append(line)
+
+        repayment_schedule['words'] = repayment_schedule_table
+        repayment_schedule['page'] = page
+        return repayment_schedule
+
+    def get_signature_role_1(self):
+        signature_role_1 = self.item.copy()
+        for pno in self.pdf_info:
+            for block in self.pdf_info[pno]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if '签署日期' in text:
+                            signature_role_1['position'] = bbox
+                            signature_role_1['page'] = pno
+                            signature_role_1['words'] = text
+        return signature_role_1
+
+    def get_signature_role_2(self):
+        signature_role_2 = self.init_item.copy()
+        # 先定位签字区域
+        texts = []
+        boxes = []
+        page_num = None
+        position = None
+        words = None
+        region = False
+        for i in list(self.pdf_info.keys()):
+            for block in self.pdf_info[i]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if '共同借款人(共同抵押人)' in text:
+                            region = True
+                        if '日期' in text:
+                            region = False
+                        if region == True:
+                            page_num = i
+                            texts.append(text)
+                            boxes.append(bbox)
+        if len(texts) > 4:
+            words = '有'
+        else:
+            words = '无'
+        boxes = np.array(boxes).reshape((-1, 2))
+        position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])]
+        signature_role_2['page_num'] = page_num
+        signature_role_2['position'] = position
+        signature_role_2['words'] = words
+        return signature_role_2
+
+    def get_signature_role_3(self):
+        signature_role_3 = self.init_item.copy()
+        # 先定位签字区域
+        texts = []
+        boxes = []
+        page_num = None
+        position = None
+        words = None
+        region = False
+        for i in list(self.pdf_info.keys()):
+            for block in self.pdf_info[i]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if '保证人1' in text and int(i) != 0:
+                            region = True
+                        if '日期' in text:
+                            region = False
+                        if region == True:
+                            page_num = i
+                            texts.append(text)
+                            boxes.append(bbox)
+        if len(texts) > 4:
+            words = '有'
+        else:
+            words = '无'
+        boxes = np.array(boxes).reshape((-1, 2))
+        position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])]
+        signature_role_3['page_num'] = page_num
+        signature_role_3['position'] = position
+        signature_role_3['words'] = words
+        return signature_role_3
+
+    def get_signature_role_4(self):
+        signature_role_4 = self.init_item.copy()
+        # 先定位签字区域
+        texts = []
+        boxes = []
+        page_num = None
+        position = None
+        words = None
+        region = False
+        for i in list(self.pdf_info.keys()):
+            for block in self.pdf_info[i]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if '保证人2' in text and int(i) != 0:
+                            region = True
+                        if '日期' in text:
+                            region = False
+                        if region == True:
+                            page_num = i
+                            texts.append(text)
+                            boxes.append(bbox)
+        if len(texts) > 4:
+            words = '有'
+        else:
+            words = '无'
+        boxes = np.array(boxes).reshape((-1, 2))
+        position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])]
+        signature_role_4['page_num'] = page_num
+        signature_role_4['position'] = position
+        signature_role_4['words'] = words
+        return signature_role_4
+
+    def get_signature_role_5(self):
+        signature_role_5 = self.init_item.copy()
+        # 先定位签字区域
+        texts = []
+        boxes = []
+        page_num = None
+        position = None
+        words = None
+        region = False
+        for i in list(self.pdf_info.keys()):
+            for block in self.pdf_info[i]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if '见证人签字' in text and int(i) != 0:
+                            region = True
+                        if '年' in text:
+                            region = False
+                        if region == True:
+                            page_num = i
+                            texts.append(text)
+                            boxes.append(bbox)
+        # print(texts)
+        if len(texts) > 4:
+            words = '有'
+        else:
+            words = '无'
+        boxes = np.array(boxes).reshape((-1, 2))
+        position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])]
+        signature_role_5['page_num'] = page_num
+        signature_role_5['position'] = position
+        signature_role_5['words'] = words
+        return signature_role_5
+
+    def get_last_page_signature(self, page_num, top, bottom):
+        signature_name = self.item.copy()
+        signature_date = self.item.copy()
+        anchor_top = None
+        anchor_bottom = None
+        for block in self.pdf_info[page_num]['blocks']:
+            if block['type'] != 0:
+                continue
+            for line in block['lines']:
+                for span in line['spans']:
+                    bbox, text = span['bbox'], span['text']
+                    if top in text:
+                        anchor_top = bbox[1]
+                    if bottom in text:
+                        anchor_bottom = bbox[1]
+        if anchor_top is not None and anchor_bottom is not None:
+            for block in self.pdf_info[page_num]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if '签署日期' in text and int(anchor_top)<np.mean(bbox[1::2])<int(anchor_bottom):
+                            name = text.split(' ')[0]
+                            date = text.split(':')[-1]
+                            signature_name['words'] = name
+                            signature_name['position'] = bbox
+                            signature_date['words'] = date
+                            signature_name['position'] = bbox
+        return signature_name, signature_date
+
+    def get_electronic_signature(self, top, bottom):
+        signature = self.item.copy()
+        anchor_top = None
+        anchor_bottom = None
+        for pno in self.pdf_info:
+            for block in self.pdf_info[pno]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if top in text:
+                            anchor_top = bbox[1]
+                        if bottom in text:
+                            anchor_bottom = bbox[3] 
+        if anchor_top is not None and anchor_bottom is not None:
+            for pno in self.pdf_info:
+                for block in self.pdf_info[pno]['blocks']:
+                    if block['type'] != 0:
+                        continue
+                    for line in block['lines']:
+                        for span in line['spans']:
+                            bbox, text = span['bbox'], span['text']
+                            # ------------ #
+                            # print("--text = ", text)
+                            if '签署日期' in text and int(anchor_top)<np.mean(bbox[1::2])<int(anchor_bottom):
+                                words = text
+                                signature['words'] = words
+                                signature['page'] = pno
+                                signature['position'] = bbox
+        return signature
+
+    def get_role_info(self, role_key, page_num='0'):
+        name = self.item.copy()
+        id_num = self.item.copy()
+        representative = self.item.copy()
+
+        # 以保证人3 的左上角为定位点
+        anchor = None
+        for block in self.pdf_info[page_num]['blocks']:
+            if block['type'] != 0:
+                continue
+            for line in block['lines']:
+                for span in line['spans']:
+                    bbox, text = span['bbox'], span['text']
+                    # 找到角色姓名
+                    if re.match('保证人3', text) is not None:
+                        anchor = [bbox[0], bbox[1]]
+
+        if anchor is not None:
+            for block in self.pdf_info[page_num]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        # 找到角色姓名
+                        if re.match(role_key, text) is not None:
+                            words = text.split('：')[-1]
+                            name['words'] = words
+                            name['page'] = page_num
+                            name['position'] = bbox
+                        if role_key == '承租人：':
+                            # 找到证件号码且确定位置
+                            if re.match('证件号码：', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
+                                words = text.split('：')[-1]
+                                id_num['words'] = words
+                                id_num['page'] = page_num
+                                id_num['position'] = bbox
+                            # 找到法人代表且确定位置
+                            if re.match('法定代表人或授权代表：', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
+                                words = text.split('：')[-1]
+                                representative['words'] = words
+                                representative['page'] = page_num
+                                representative['position'] = bbox
+                        if role_key == '保证人1：':
+                            # 找到证件号码且确定位置
+                            if re.match('证件号码：', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
+                                words = text.split('：')[-1]
+                                id_num['words'] = words
+                                id_num['page'] = page_num
+                                id_num['position'] = bbox
+                            # 找到法人代表且确定位置
+                            if re.match('法定代表人或授权代表：', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
+                                words = text.split('：')[-1]
+                                representative['words'] = words
+                                representative['page'] = page_num
+                                representative['position'] = bbox
+                        if role_key == '保证人2：':
+                            # 找到证件号码且确定位置
+                            if re.match('证件号码：', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
+                                words = text.split('：')[-1]
+                                id_num['words'] = words
+                                id_num['page'] = page_num
+                                id_num['position'] = bbox
+                            # 找到法人代表且确定位置
+                            if re.match('法定代表人或授权代表：', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
+                                words = text.split('：')[-1]
+                                representative['words'] = words
+                                representative['page'] = page_num
+                                representative['position'] = bbox
+                        if role_key == '保证人3：':
+                            # 找到证件号码且确定位置
+                            if re.match('证件号码：', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
+                                words = text.split('：')[-1]
+                                id_num['words'] = words
+                                id_num['page'] = page_num
+                                id_num['position'] = bbox
+                            # 找到法人代表且确定位置
+                            if re.match('法定代表人或授权代表：', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
+                                words = text.split('：')[-1]
+                                representative['words'] = words
+                                representative['page'] = page_num
+                                representative['position'] = bbox
+        return name, id_num, representative
+
+    def get_table_add_product(self):
+        table_add_product = self.item.copy()
+
+        add_product_page_num = None
+        for pno in self.pdf_info:
+            for block in self.pdf_info[f'{pno}']['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if '车辆附加产品（明细见下表）' in text:
+                            add_product_page_num = pno
+        ocr_results = []
+        for block in self.pdf_info[f'{add_product_page_num}']['blocks']:
+            if block['type'] != 0:
+                continue
+            for line in block['lines']:
+                for span in line['spans']:
+                    bbox, text = span['bbox'], span['text']
+                    xmin, ymin, xmax, ymax = bbox
+                    bbox = [xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax]
+                    ocr_results.append([bbox, text])
+
+        lines = [['项目', '购买价格', '实际融资金额']]
+
+        key_xm = None
+        key_gmjg = None
+        key_sjrzje = None
+        key_total = None
+
+        for index, span in enumerate(ocr_results):
+            if span[1] == '项目':
+                key_xm = index
+            if span[1] == '购买价格':
+                key_gmjg = index
+            if span[1] == '实际融资金额':
+                key_sjrzje = index
+            if span[1] == '总计':
+                key_total = index
+
+        bbox, text = ocr_results[key_xm]
+        rh = abs(bbox[1]-bbox[-1])
+        anchor = np.array(bbox).reshape((-1, 2))
+        anchor[:, 0] += 2*rh
+        anchor[:, 1] += rh
+
+        for i in range(5):
+            for span in ocr_results:
+                iou = caculate_iou(anchor, span[0])
+                if iou > 0.01 and span[1].strip() != '所购':
+                    x = get_table_info(span[0], ocr_results[key_gmjg][0], ocr_results)
+                    y = get_table_info(span[0], ocr_results[key_sjrzje][0], ocr_results)
+                    line = [span[1].replace('\u3000', ' '), x, y]
+                    # print(line)
+                    lines.append(line)
+                    anchor = np.array(span[0]).reshape((-1, 2))
+                    anchor[:, 1] += rh
+
+        total = get_table_info(ocr_results[key_total][0], ocr_results[key_sjrzje][0], ocr_results)
+        lines.append(['总计', '', total])
+        
+        # 所购　BMW悦然焕
+        # 新服务
+
+        # 所购　BMW5年10
+        # 万公里长悦保养套餐
+
+        # 所购　事故维修补偿
+        # 方案
+
+        # 所购 BMW5年10万公里
+        # 长悦保养套餐
+
+        # 所购 MINI4年6万公里长悦
+        # 保养套餐
+
+        filtered_lines = []
+        for line in lines:
+            if line[0][:2] not in ['所购', '项目', '总计']:
+                continue
+            if 'BMW悦然' in line[0]:
+                line[0] = '所购 BMW悦然焕新服务'
+            if 'BMW5年10' in line[0]:
+                line[0] = '所购 BMW5年10万公里长悦保养套餐'
+            if '事故维修补' in line[0]:
+                line[0] = '所购 事故维修补偿方案'
+            if 'MINI4年6万公里长悦' in line[0]:
+                line[0] = '所购 MINI4年6万公里长悦保养套餐'
+            filtered_lines.append(line)
+        table_add_product['words'] = filtered_lines
+        table_add_product['page'] = add_product_page_num
+        table_add_product['position'] = None
+        return table_add_product
+
+    def get_contract_no_dy(self):
+        # 查找抵押合同编号
+        contract_no = self.item.copy()
+
+        key_box = None
+        for pno in self.pdf_info:
+            for block in self.pdf_info[pno]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if '抵押合同编号' in text:
+                            key_box = bbox
+
+        if key_box is not None:
+            for pno in self.pdf_info:
+                for block in self.pdf_info[pno]['blocks']:
+                    if block['type'] != 0:
+                        continue
+                    for line in block['lines']:
+                        for span in line['spans']:
+                            bbox, text = span['bbox'], span['text']
+                            if key_box[1] < np.mean(bbox[1::2]) < key_box[3] and 'CH-' in text:
+                                contract_no['position'] = bbox
+                                contract_no['page'] = pno
+                                contract_no['words'] = text
+        return contract_no
+
+    def get_dyr_name_id(self):
+        name = self.item.copy()
+        _id = self.item.copy()
+
+        key_box = None
+        for pno in self.pdf_info:
+            for block in self.pdf_info[pno]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if text == '抵押人':
+                           key_box = bbox
+                           
+        if key_box is not None:
+            rh = abs(key_box[1]-key_box[3])
+            for pno in self.pdf_info:
+                for block in self.pdf_info[pno]['blocks']:
+                    if block['type'] != 0:
+                        continue
+                    for line in block['lines']:
+                        for span in line['spans']:
+                            bbox, text = span['bbox'], span['text']
+                            if key_box[1] < np.mean(bbox[1::2]) < key_box[3]+rh*3 and '姓名' in text:
+                                words = text.split('：')[-1]
+                                name['position'] = bbox
+                                name['page'] = pno
+                                name['words'] = words
+                            if key_box[1] < np.mean(bbox[1::2]) < key_box[3]+rh*3 and '证件号码' in text:
+                                words = text.split('：')[-1]
+                                _id['position'] = bbox
+                                _id['page'] = pno
+                                _id['words'] = words
+        return name, _id
+
+    def get_dyrpo_name_id(self):
+        name = self.item.copy()
+        _id = self.item.copy()
+
+        key_box = None
+        for pno in self.pdf_info:
+            for block in self.pdf_info[pno]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if text == '抵押人配偶(如适':
+                           key_box = bbox
+
+        if key_box is not None:
+            rh = abs(key_box[1]-key_box[3])
+            for pno in self.pdf_info:
+                for block in self.pdf_info[pno]['blocks']:
+                    if block['type'] != 0:
+                        continue
+                    for line in block['lines']:
+                        for span in line['spans']:
+                            bbox, text = span['bbox'], span['text']
+                            if key_box[1] < np.mean(bbox[1::2]) < key_box[3]+rh*3 and '姓名' in text:
+                                words = text.split('：')[-1]
+                                name['position'] = bbox
+                                name['page'] = pno
+                                name['words'] = words
+                            if key_box[1] < np.mean(bbox[1::2]) < key_box[3]+rh*3 and '证件号码' in text:
+                                words = text.split('：')[-1]
+                                _id['position'] = bbox
+                                _id['page'] = pno
+                                _id['words'] = words
+        return name, _id
+
+    def get_key_value_position(self, key):
+        value = self.item.copy()
+
+        key_box = None
+        for pno in self.pdf_info:
+            for block in self.pdf_info[pno]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if text == key:
+                           key_box = bbox
+
+        if key_box is not None:
+            rh = abs(key_box[1]-key_box[3])
+            for pno in self.pdf_info:
+                for block in self.pdf_info[pno]['blocks']:
+                    if block['type'] != 0:
+                        continue
+                    for line in block['lines']:
+                        for span in line['spans']:
+                            bbox, text = span['bbox'], span['text']
+                            if key_box[1] < np.mean(bbox[1::2]) < key_box[3] and key_box[0] < bbox[0] and abs(key_box[2]-bbox[0]) < rh*10:
+                                words = text
+                                value['position'] = bbox
+                                value['page'] = pno
+                                value['words'] = words
+        return value
+
+    def get_role_info_3_3(self, role_key, page_num='0'):
+        name = self.item.copy()
+        id_num = self.item.copy()
+        representative = self.item.copy()
+
+        # 以保证人2 的左上角为定位点
+        anchor = None
+        for block in self.pdf_info[page_num]['blocks']:
+            if block['type'] != 0:
+                continue
+            for line in block['lines']:
+                for span in line['spans']:
+                    bbox, text = span['bbox'], span['text']
+                    # 找到角色姓名
+                    if re.match('保证人2', text) is not None:
+                        anchor = [bbox[0], bbox[1]]
+
+        if anchor is not None:
+            for block in self.pdf_info[page_num]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        # 找到角色姓名
+                        if re.match(role_key, text) is not None:
+                            words = text.split('：')[-1]
+                            name['words'] = words
+                            name['page'] = page_num
+                            name['position'] = bbox
+                        if role_key == '承租人一：':
+                            # 找到证件号码且确定位置
+                            if re.match('证件号码：', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
+                                words = text.split('：')[-1]
+                                id_num['words'] = words
+                                id_num['page'] = page_num
+                                id_num['position'] = bbox
+                            # 找到法人代表且确定位置
+                            if re.match('法定代表人或授权代表：', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
+                                words = text.split('：')[-1]
+                                representative['words'] = words
+                                representative['page'] = page_num
+                                representative['position'] = bbox
+                        if role_key == '共同承租人：':
+                            # 找到证件号码且确定位置
+                            if re.match('证件号码：', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
+                                words = text.split('：')[-1]
+                                id_num['words'] = words
+                                id_num['page'] = page_num
+                                id_num['position'] = bbox
+                            # 找到法人代表且确定位置
+                            if re.match('法定代表人或授权代表：', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
+                                words = text.split('：')[-1]
+                                representative['words'] = words
+                                representative['page'] = page_num
+                                representative['position'] = bbox
+                        if role_key == '保证人1：':
+                            # 找到证件号码且确定位置
+                            if re.match('证件号码：', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
+                                words = text.split('：')[-1]
+                                id_num['words'] = words
+                                id_num['page'] = page_num
+                                id_num['position'] = bbox
+                            # 找到法人代表且确定位置
+                            if re.match('法定代表人或授权代表：', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
+                                words = text.split('：')[-1]
+                                representative['words'] = words
+                                representative['page'] = page_num
+                                representative['position'] = bbox
+                        if role_key == '保证人2：':
+                            # 找到证件号码且确定位置
+                            if re.match('证件号码：', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
+                                words = text.split('：')[-1]
+                                id_num['words'] = words
+                                id_num['page'] = page_num
+                                id_num['position'] = bbox
+                            # 找到法人代表且确定位置
+                            if re.match('法定代表人或授权代表：', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
+                                words = text.split('：')[-1]
+                                representative['words'] = words
+                                representative['page'] = page_num
+                                representative['position'] = bbox
+        return name, id_num, representative
+
+    def get_value_by_findall(self, prefix, suffix, page_num):
+        value = self.item.copy()
+        all_text = ''
+        pno = page_num
+        for block in self.pdf_info[pno]['blocks']:
+            if block['type'] != 0:
+                continue
+            for line in block['lines']:
+                for span in line['spans']:
+                    bbox, text = span['bbox'], span['text']
+                    all_text += text
+        words_list = re.findall(f"{prefix}(.*?){suffix}", all_text)
+        if len(words_list) > 0:
+            for block in self.pdf_info[pno]['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if words_list[0] in text:
+                            value['position'] = bbox
+                            value['page'] = pno
+                            value['words'] = words_list[0]
+        return value
+
+    def get_info(self):
+        """
+            block['type'] == 0 : 表示该元素为图片
+        
+        Returns:
+            dict: Description
+        """
+        if len(self.pdf_info) > 0:
+            # 取 Page 1 上的合同编号
+            contract_no = self.get_contract_no(page_num='0')
+            self.init_result['合同编号'] = contract_no
+
+            # 粗略判断是否是 ‘车贷分离版本’ 的合同
+            is_cdfl = False
+            for block in self.pdf_info['0']['blocks']:
+                if block['type'] != 0:
+                    continue
+                for line in block['lines']:
+                    for span in line['spans']:
+                        bbox, text = span['bbox'], span['text']
+                        if '共同承租人：' in text:
+                            is_cdfl = True
+
+            if is_cdfl == False:
+                # 从第一页上取四个角色的姓名和证件号码
+                name, id_num, representative = self.get_role_info(role_key='承租人：', page_num='0')
+
+                if name["words"] == None:
+                    name, id_num, representative = self.get_role_info_3_3(role_key='承租人一：', page_num='0')
+                self.init_result['承租人-姓名'] = name
+                self.init_result['承租人-证件号码'] = id_num
+                self.init_result['承租人-法定代表人或授权代表'] = representative
+
+                name, id_num, representative = self.get_role_info(role_key='保证人1：', page_num='0')
+                self.init_result['保证人1-姓名'] = name
+                self.init_result['保证人1-证件号码'] = id_num
+                self.init_result['保证人1-法定代表人或授权代表'] = representative  
+                # if条件判别 对应3_3版本          
+                if name["words"] == None:
+                    name, id_num, representative = self.get_role_info_3_3(role_key='共同承租人：', page_num='0')
+                    self.init_result['共同承租人-姓名'] = name
+                    self.init_result['共同承租人-证件号码'] = id_num
+                    self.init_result['共同承租人-法定代表人或授权代表'] = representative
+
+                name, id_num, representative = self.get_role_info(role_key='保证人2：', page_num='0')
+                self.init_result['保证人2-姓名'] = name
+                self.init_result['保证人2-证件号码'] = id_num
+                self.init_result['保证人2-法定代表人或授权代表'] = representative 
+                # if条件判别 对应3_3版本           
+                if name["words"] == None:
+                    name, id_num, representative = self.get_role_info_3_3(role_key='保证人1：', page_num='0')
+                    self.init_result['保证人2-姓名'] = name
+                    self.init_result['保证人2-证件号码'] = id_num
+                    self.init_result['保证人2-法定代表人或授权代表'] = representative
+
+                name, id_num, representative = self.get_role_info(role_key='保证人3：', page_num='0')
+                self.init_result['保证人3-姓名'] = name
+                self.init_result['保证人3-证件号码'] = id_num
+                self.init_result['保证人3-法定代表人或授权代表'] = representative
+                if name["words"] == None:
+                    name, id_num, representative = self.get_role_info_3_3(role_key='保证人2：', page_num='0')
+                    self.init_result['保证人3-姓名'] = name
+                    self.init_result['保证人3-证件号码'] = id_num
+                    self.init_result['保证人3-法定代表人或授权代表'] = representative
+            else:
+                name, id_num, representative = self.get_role_info_3_3(role_key='承租人一：', page_num='0')
+                self.init_result['承租人-姓名'] = name
+                self.init_result['承租人-证件号码'] = id_num
+                self.init_result['承租人-法定代表人或授权代表'] = representative
+
+                name, id_num, representative = self.get_role_info_3_3(role_key='共同承租人：', page_num='0')
+                self.init_result['共同承租人-姓名'] = name
+                self.init_result['共同承租人-证件号码'] = id_num
+                self.init_result['共同承租人-法定代表人或授权代表'] = representative
+
+                name, id_num, representative = self.get_role_info_3_3(role_key='保证人1：', page_num='0')
+                self.init_result['保证人1-姓名'] = name
+                self.init_result['保证人1-证件号码'] = id_num
+                self.init_result['保证人1-法定代表人或授权代表'] = representative
+
+                name, id_num, representative = self.get_role_info_3_3(role_key='保证人2：', page_num='0')
+                self.init_result['保证人2-姓名'] = name
+                self.init_result['保证人2-证件号码'] = id_num
+                self.init_result['保证人2-法定代表人或授权代表'] = representative
+
+            # 在所有页面中找正文中（第二部分 融资租赁主要条款及付款计划）的那个编号，因为存在换行的情况所以暂时不带位置输出
+            contract_no = self.get_contract_no_one()
+            self.init_result['合同编号（正文）'] = contract_no
+            # 找到车辆识别代码
+            vin = self.get_key_value(key='车辆识别代码：')
+            self.init_result['车辆识别代码'] = vin
+            # 找到经销商(车辆卖方(经销商))
+            seller = self.get_key_value(key='车辆卖方（经销商）：')
+            if seller['words'] == None:
+                seller = self.get_key_value(key='车辆卖方：')
+            self.init_result['车辆卖方（经销商）'] = seller
+            # 找到 —— 车辆原始销售价格
+            vehicle_price = self.get_key_value(key='车辆原始销售价格（《机动车销售统一发票》所列金额）：')
+            self.init_result['车辆原始销售价格（《机动车销售统一发票》所列金额）'] = vehicle_price
+            # 找车辆附加产品明细（表）
+            table_add_product = self.get_table_add_product()
+            self.init_result['车辆附加产品明细表'] = table_add_product
+            # 找融资成本总额
+            financing_cost = self.get_key_value(key='融资成本总额：')
+            self.init_result['融资成本总额'] = financing_cost
+            # 找租期
+            lease_term = self.get_key_value(key='租期：')
+            self.init_result['租期'] = lease_term
+            # 找还款计划(表)
+            repayment_schedule = self.get_repayment_schedule()
+            self.init_result['付款计划表'] = repayment_schedule
+            # 找承租人收款账户户名、银行账号、银行
+            name = self.get_key_value(key='户名：', page_num='4')
+            self.init_result['承租人收款账户-户名'] = name
+            account = self.get_key_value(key='银行账号：', page_num='4')
+            self.init_result['承租人收款账户-银行账号'] = account
+            bank = self.get_key_value(key='开户银行：', page_num='4')
+            self.init_result['承租人收款账户-开户行'] = bank
+            # 找承租人扣款账户户名、银行账号、银行
+            name = self.get_key_value(key='户名：', page_num='5')
+            self.init_result['承租人扣款账户-户名'] = name
+            account = self.get_key_value(key='银行账号：', page_num='5')
+            self.init_result['承租人扣款账户-银行账号'] = account
+            bank = self.get_key_value(key='开户银行：', page_num='5')
+            self.init_result['承租人扣款账户-开户行'] = bank
+
+            # 找签字页上的系列信息
+            # 承租人姓名、签章
+            if is_cdfl == False:
+                name = self.get_key_value(key='承租人姓名：')
+                electronic_signature = self.get_electronic_signature(top='承租人姓名：', bottom='保证人1姓名：')
+
+                if name["words"] == None:
+                    name = self.get_key_value(key='承租人一姓名：')
+                    electronic_signature = self.get_electronic_signature(top='承租人一姓名：', bottom='共同承租人名称：')
+
+                self.init_result['签字页-承租人姓名'] = name
+                self.init_result['签字页-承租人签章'] = electronic_signature
+                # 保证人1姓名、签章
+                name = self.get_key_value(key='保证人1姓名：')
+                electronic_signature = self.get_electronic_signature(top='保证人1姓名：', bottom='保证人2姓名：')
+                self.init_result['签字页-保证人1姓名'] = name
+                self.init_result['签字页-保证人1签章'] = electronic_signature
+                # 这里用的是 name["words"] == ""
+                if name["words"] == "":
+                    name = self.get_key_value(key='共同承租人名称：')
+                    electronic_signature = self.get_electronic_signature(top='共同承租人名称：', bottom='保证人1姓名：')
+                    self.init_result['签字页-共同承租人姓名'] = name
+                    self.init_result['签字页-共同承租人签章'] = electronic_signature
+                # 保证人2姓名、签章
+                name = self.get_key_value(key='保证人2姓名：')
+                electronic_signature = self.get_electronic_signature(top='保证人2姓名：', bottom='保证人3姓名：')
+                self.init_result['签字页-保证人2姓名'] = name
+                self.init_result['签字页-保证人2签章'] = electronic_signature
+                # if判断条件对应3_3版本
+                if name["words"] == "":
+                    name = self.get_key_value(key='保证人1姓名：')
+                    electronic_signature = self.get_electronic_signature(top='保证人1姓名：', bottom='保证人2姓名：')
+                    self.init_result['签字页-保证人1姓名'] = name
+                    self.init_result['签字页-保证人1签章'] = electronic_signature
+                # 保证人3姓名、签章
+                name = self.get_key_value(key='保证人3姓名：')
+                electronic_signature = self.get_electronic_signature(top='保证人3姓名：', bottom='日期：')
+                self.init_result['签字页-保证人3姓名'] = name
+                self.init_result['签字页-保证人3签章'] = electronic_signature
+                # if判断条件对应3_3版本
+                if name["words"] == None:
+                    name = self.get_key_value(key='保证人2姓名：')
+                    electronic_signature = self.get_electronic_signature(top='保证人2姓名：', bottom='日期：')
+                    self.init_result['签字页-保证人2姓名'] = name
+                    self.init_result['签字页-保证人2签章'] = electronic_signature
+            else:
+                name = self.get_key_value(key='承租人一姓名：')
+                electronic_signature = self.get_electronic_signature(top='承租人一姓名：', bottom='共同承租人名称：')
+                self.init_result['签字页-承租人姓名'] = name
+                self.init_result['签字页-承租人签章'] = electronic_signature
+
+                name = self.get_key_value(key='共同承租人名称：')
+                electronic_signature = self.get_electronic_signature(top='共同承租人名称：', bottom='保证人1姓名：')
+                self.init_result['签字页-共同承租人姓名'] = name
+                self.init_result['签字页-共同承租人签章'] = electronic_signature
+
+                name = self.get_key_value(key='保证人1姓名：')
+                electronic_signature = self.get_electronic_signature(top='保证人1姓名：', bottom='保证人2姓名：')
+                self.init_result['签字页-保证人1姓名'] = name
+                self.init_result['签字页-保证人1签章'] = electronic_signature
+
+                name = self.get_key_value(key='保证人2姓名：')
+                electronic_signature = self.get_electronic_signature(top='保证人2姓名：', bottom='保证人3姓名：')
+                self.init_result['签字页-保证人2姓名'] = name
+                self.init_result['签字页-保证人2签章'] = electronic_signature
+
+        return self.init_result
+    
+    def get_info_1(self):
+        if len(self.pdf_info) > 0:
+            contract_no = self.get_contract_no(page_num='0')
+            self.init_result_1['合同编号'] = contract_no
+            # 承租人姓名
+            name = self.get_key_value(key='承租人：', page_num='0')
+            self.init_result_1['承租人-姓名'] = name
+            # 承租人证件号码
+            _id = self.get_key_value(key='证件号码：', page_num='0')
+            self.init_result_1['承租人-证件号码'] = _id
+            # 销售经销商
+            seller = self.get_key_value(key='销售经销商：', page_num='0')
+            if seller['words'] == "":
+                seller = self.get_value_by_findall('销售经销商：', '地址：', page_num='0')
+            self.init_result_1['销售经销商'] = seller
+            # 合同编号（正文）
+            contract_no = self.get_contract_no_one()
+            self.init_result_1['合同编号（正文）'] = contract_no
+            # 签字页-承租人姓名
+            name = self.get_key_value(key='姓名/名称：')
+            self.init_result_1['签字页-承租人姓名'] = name
+            # 签字页-承租人证件号码
+            _id = self.get_key_value(key='自然人身份证件号码/法人执照号码：')
+            self.init_result_1['签字页-承租人证件号码'] = _id
+            # 签字页-承租人签章
+            signature_role_1 = self.get_signature_role_1()
+            self.init_result_1['签字页-承租人签章'] = signature_role_1
+            # 签字页-销售经销商
+            seller = self.get_key_value(key='销售经销商：')
+            if seller['words'] == "":
+                # 销售经销商：深圳市宝创汽车贸易有限公司南山分公司（请授权代表签字并请盖章）
+                seller = self.get_value_by_findall('销售经销商：', '（请授权代表签字并请盖章）', page_num='3')
+            self.init_result_1['签字页-销售经销商'] = seller
+            # 经销商签章
+            pass
+        return self.init_result_1
+
+    def get_info_2(self):
+        if len(self.pdf_info) > 0:
+            contract_no = self.get_contract_no_dy()
+            self.init_result_2['合同编号'] = contract_no
+            # 合同编号（正文）
+            contract_no = self.get_contract_no_one()
+            self.init_result_2['合同编号（正文）'] = contract_no
+            # 抵押人姓名/名称
+            name, _id = self.get_dyr_name_id()
+            self.init_result_2['抵押人姓名/名称'] = name
+            self.init_result_2['抵押人证件号码'] = _id
+            # 抵押人配偶信息
+            name, _id = self.get_dyrpo_name_id()
+            self.init_result_2['抵押人配偶姓名/名称'] = name
+            self.init_result_2['抵押人配偶证件号码'] = _id
+            # 车辆识别代码
+            vin = self.get_key_value(key='车辆识别代码：')
+            self.init_result_2['车辆识别代码'] = vin
+            # 租金总额
+            rent = self.get_key_value_position(key='租金总额')
+            self.init_result_2['租金总额'] = rent
+            # 融资租赁期限
+            lease_term = self.get_key_value_position(key='融资租赁期限')
+            self.init_result_2['融资租赁期限'] = lease_term
+            # 签字页抵押人姓名和签章
+            name = self.get_key_value(key='抵押人姓名：')
+            electronic_signature = self.get_electronic_signature(top='抵押权人盖章', bottom='抵押人配偶姓名：')
+            self.init_result_2['签字页-抵押人姓名'] = name
+            self.init_result_2['签字页-抵押人签章'] = electronic_signature
+            # 签字页抵押人配偶姓名和签章
+            name = self.get_key_value(key='抵押人配偶姓名：')
+            electronic_signature = self.get_electronic_signature(top='抵押人配偶姓名：', bottom='日期')
+            self.init_result_2['签字页-抵押人配偶姓名'] = name
+            self.init_result_2['签字页-抵押人配偶签章'] = electronic_signature
+        return self.init_result_2
\ No newline at end of file
--- a/src/common/electronic_hil_contract/hil_contract_ocr.py
View file @8d595a3
+++ b/src/common/electronic_hil_contract/hil_contract_ocr.py
View file @8d595a3
@@ -6,9 +6,10 @@
 # @Description   :

 from .get_char import Finder
+from .get_char_fsm import Finder as FSMFinder


-def predict(pdf_info, file_cls):
+def predict(pdf_info, file_cls, is_fsm=False):
    """Summary

    Args:
@@ -58,7 +59,11 @@ def predict(pdf_info, file_cls):
        pdf_info = dict()
        for pno, page_info in enumerate(pdf_info_1):
            pdf_info[str(pno)] = page_info
-    f = Finder(pdf_info)
+
+    if is_fsm:
+        f = FSMFinder(pdf_info) 
+    else:
+        f = Finder(pdf_info)
    if file_cls == 0:
        results = f.get_info()
    if file_cls == 1: