Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
pdf_to_img
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
2bf360fb
authored
2020-09-08 11:38:31 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
fix page to png
1 parent
ff70b617
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
13 additions
and
6 deletions
pdf_to_img.py
pdf_to_img.py
View file @
2bf360f
...
...
@@ -17,8 +17,10 @@ args = parser.parse_args()
LOG_BASE
=
'[pdf to img]'
# 页面保存为png图片参数
ZOOM_X
=
ZOOM_Y
=
2.0
trans
=
fitz
.
Matrix
(
ZOOM_X
,
ZOOM_X
)
.
preRotate
(
0
)
# zoom factor 2 in each dimension
ZOOM_X_1
=
ZOOM_Y_1
=
1.0
ZOOM_X_2
=
ZOOM_Y_2
=
2.0
trans_1
=
fitz
.
Matrix
(
ZOOM_X_1
,
ZOOM_X_1
)
.
preRotate
(
0
)
# zoom factor 1 in each dimension
trans_2
=
fitz
.
Matrix
(
ZOOM_X_2
,
ZOOM_X_2
)
.
preRotate
(
0
)
# zoom factor 2 in each dimension
# 特殊filter处理
ADOBE_FILTER_SET
=
{
'FlateDecode'
,
'JPXDecode'
,
'JBIG2Decode'
}
...
...
@@ -38,13 +40,17 @@ class PDFHandler:
self
.
img_dir_path
=
os
.
path
.
join
(
target_path
,
os
.
path
.
splitext
(
os
.
path
.
basename
(
path
))[
0
])
self
.
xref_set
=
set
()
def
get_img_save_path
(
self
,
pno
,
img_index
=
0
,
ext
=
'
pn
g'
):
def
get_img_save_path
(
self
,
pno
,
img_index
=
0
,
ext
=
'
jpe
g'
):
return
os
.
path
.
join
(
self
.
img_dir_path
,
'page_{0}_img_{1}.{2}'
.
format
(
pno
,
img_index
,
ext
))
def
page_to_png
(
self
,
page
):
pm
=
page
.
getPixmap
(
matrix
=
trans
,
alpha
=
False
)
if
page
.
MediaBoxSize
.
x
>
1500
or
page
.
MediaBoxSize
.
y
>
1500
:
pm
=
page
.
getPixmap
(
matrix
=
trans_1
,
alpha
=
False
)
else
:
pm
=
page
.
getPixmap
(
matrix
=
trans_2
,
alpha
=
False
)
img_save_path
=
self
.
get_img_save_path
(
page
.
number
)
pm
.
writePNG
(
img_save_path
)
pm
.
writeImage
(
img_save_path
)
# pm.writePNG(img_save_path)
@staticmethod
def
getimage
(
pix
):
...
...
@@ -247,7 +253,8 @@ def main():
failed_list
=
[]
for
parent
,
dirnames
,
filenames
in
os
.
walk
(
pdf_path
):
# 图片保存目录
target_path
=
os
.
path
.
realpath
(
args
.
output
)
if
args
.
output
else
parent
# target_path = os.path.realpath(args.output) if args.output else parent
target_path
=
parent
.
replace
(
pdf_path
,
args
.
output
)
for
pdf_file
in
filenames
:
if
not
pdf_file
.
endswith
(
'pdf'
)
and
not
pdf_file
.
endswith
(
'PDF'
):
continue
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment