# -*- coding: utf-8 -*- # @Author : lk # @Email : 9428.al@gmail.com # @Created Date : 2021-06-29 17:43:46 # @Last Modified : 2021-11-03 16:07:36 # @Description : from .get_char import Finder from .get_char_fsm import Finder as FSMFinder def predict(pdf_info, file_cls, is_fsm=False): """Summary Args: pdf_info (TYPE): Description file_cls (TYPE): file_cls = 0: 售后回租合同; file_cls = 1: 车辆处置协议; file_cls = 2: 车辆租赁抵押合同 Returns: TYPE: Description """ # 0: 售后回租合同 pdf_info_0 = [] for pno in pdf_info: for block in pdf_info[f'{pno}']['blocks']: if block['type'] != 0: continue for line in block['lines']: for span in line['spans']: bbox, text = span['bbox'], span['text'] if '售后回租合同_' in text: pdf_info_0.append(pdf_info[pno]) # 1: 车辆处置协议 pdf_info_1 = [] for pno in pdf_info: for block in pdf_info[f'{pno}']['blocks']: if block['type'] != 0: continue for line in block['lines']: for span in line['spans']: bbox, text = span['bbox'], span['text'] if '售后回租合同附件一' in text: pdf_info_1.append(pdf_info[pno]) # 2: 车辆租赁抵押合同 pdf_info_2 = [] for pno in pdf_info: for block in pdf_info[f'{pno}']['blocks']: if block['type'] != 0: continue for line in block['lines']: for span in line['spans']: bbox, text = span['bbox'], span['text'] if '车辆租赁抵押合同_' in text: pdf_info_2.append(pdf_info[pno]) is_clczxy = False # 如果 pdf_info_1 == 4 页,则说明此时输入包含了车辆处置协议 if len(pdf_info_1) == 4 and file_cls == 1 and len(pdf_info_0) != 0: is_clczxy = True pdf_info = dict() for pno, page_info in enumerate(pdf_info_1): pdf_info[str(pno)] = page_info if is_fsm: f = FSMFinder(pdf_info) else: f = Finder(pdf_info) if file_cls == 0: results = f.get_info() if file_cls == 1: # 提取信息 ———— 车辆处置协议 results = f.get_info_1() if file_cls == 2: # 提取信息 ———— 车辆租赁抵押合同 results = f.get_info_2() # if is_clczxy is True: # for key in results: # if results[key]['page'] is not None: # results[key]['page'] = str(int(results[key]['page']) + 6) for key in results: if results[key]['page'] is not None: results[key]['page'] = 'page_' + str(int(results[key]['page']) + 1) return results