# -*- coding: utf-8 -*- # @Author : lk # @Email : 9428.al@gmail.com # @Created Date : 2021-06-29 17:43:46 # @Last Modified : 2021-09-07 14:11:25 # @Description : from .get_char import Finder def predict(pdf_info): ocr_results = {} for pno in pdf_info: ocr_results[pno] = {} for key, block in enumerate(pdf_info[pno]['blocks']): if block['type'] != 0: continue for line in block['lines']: for span in line['spans']: bbox, text = span['bbox'], span['text'] # print(text) xmin, ymin, xmax, ymax = bbox polygon = [xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax] text = text.replace(":", ":").replace(" ", "") ocr_results[pno][key] = [polygon, text] # 输入是整个 PDF 中的信息 f = Finder(pdf_info, ocr_results=ocr_results) results = f.get_info() return results