fsm_contract_ocr.py 448 Bytes
from .retriever import Retriever
from .const import WEP_FIELD, MSI_FIELD, SC_FIELD, SC2_FIELD
from .tools import pdf_info_rebuild

retriever_list = [Retriever(WEP_FIELD), Retriever(MSI_FIELD), Retriever(SC_FIELD), Retriever(SC2_FIELD)]

def predict(pdf_info, file_type=0):
    retriever =  retriever_list[file_type]
    pdf_text_list, pdf_img_list = pdf_info_rebuild(pdf_info) 
    return retriever.get_target_fields(pdf_text_list, pdf_img_list)