hmh_ocr.py 256 Bytes
from .retriever import HMHRetriever
from .tools import pdf_info_rebuild

hmh_retriever = HMHRetriever() 

def predict(pdf_info):
    pdf_text_list, _ = pdf_info_rebuild(pdf_info, fix_bbox=False) 
    return hmh_retriever.get_target_fields(pdf_text_list)