test_pdf_tools.py 1.07 KB
# -*- coding: utf-8 -*-
# @Author        : Lyu Kui
# @Email         : 9428.al@gmail.com
# @Create Date   : 2022-07-22 13:10:47
# @Last Modified : 2022-08-24 15:39:55
# @Description   : 


import os
import cv2
import fitz
from turnsole import pdf_to_images      # pip install turnsole PyMuPDF opencv-python==4.4.0.44

if __name__ == "__main__":

    base_dir = '/PATH/TO/YOUR/WORKDIR'

    for (rootDir, dirNames, filenames) in os.walk(base_dir):

        for filename in filenames:
        
            if not filename.endswith('.pdf'):
                continue

            pdf_path = os.path.join(rootDir, filename)
            print(pdf_path)

            images = pdf_to_images(pdf_path)
            images = sum(images, [])

            image_dir = os.path.join(rootDir, filename.replace('.pdf', ''))
            if not os.path.exists(image_dir):
                os.makedirs(image_dir)

            for index, image in enumerate(images):

                save_path = os.path.join(image_dir, filename.replace('.pdf', '')+'-'+str(index)+'.jpg')
                cv2.imwrite(save_path, image)