fsm_contract_ocr.py
448 Bytes
from .retriever import Retriever
from .const import WEP_FIELD, MSI_FIELD, SC_FIELD, SC2_FIELD
from .tools import pdf_info_rebuild
retriever_list = [Retriever(WEP_FIELD), Retriever(MSI_FIELD), Retriever(SC_FIELD), Retriever(SC2_FIELD)]
def predict(pdf_info, file_type=0):
retriever = retriever_list[file_type]
pdf_text_list, pdf_img_list = pdf_info_rebuild(pdf_info)
return retriever.get_target_fields(pdf_text_list, pdf_img_list)