afc_contract_ocr.py 1009 Bytes
# -*- coding: utf-8 -*-
# @Author        : lk
# @Email         : 9428.al@gmail.com
# @Created Date  : 2021-06-29 17:43:46
# @Last Modified : 2021-09-07 14:11:25
# @Description   :

from .get_char import Finder


def predict(pdf_info):
    ocr_results = {}
    for pno in pdf_info:
        ocr_results[pno] = {}
        for key, block in enumerate(pdf_info[pno]['blocks']):
            if block['type'] != 0:
                continue
            for line in block['lines']:
                for span in line['spans']:
                    bbox, text = span['bbox'], span['text']
                    # print(text)
                    xmin, ymin, xmax, ymax = bbox
                    polygon = [xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax]
                    text = text.replace(":", ":").replace(" ", "")
                    ocr_results[pno][key] = [polygon, text]

    # 输入是整个 PDF 中的信息
    f = Finder(pdf_info, ocr_results=ocr_results)
    results = f.get_info()

    return results