test_pdf_tools.py
1.07 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# -*- coding: utf-8 -*-
# @Author : Lyu Kui
# @Email : 9428.al@gmail.com
# @Create Date : 2022-07-22 13:10:47
# @Last Modified : 2022-08-24 15:39:55
# @Description :
import os
import cv2
import fitz
from turnsole import pdf_to_images # pip install turnsole PyMuPDF opencv-python==4.4.0.44
if __name__ == "__main__":
base_dir = '/PATH/TO/YOUR/WORKDIR'
for (rootDir, dirNames, filenames) in os.walk(base_dir):
for filename in filenames:
if not filename.endswith('.pdf'):
continue
pdf_path = os.path.join(rootDir, filename)
print(pdf_path)
images = pdf_to_images(pdf_path)
images = sum(images, [])
image_dir = os.path.join(rootDir, filename.replace('.pdf', ''))
if not os.path.exists(image_dir):
os.makedirs(image_dir)
for index, image in enumerate(images):
save_path = os.path.join(image_dir, filename.replace('.pdf', '')+'-'+str(index)+'.jpg')
cv2.imwrite(save_path, image)