2bf360fb by 周伟奇

fix page to png

1 parent ff70b617
......@@ -17,8 +17,10 @@ args = parser.parse_args()
LOG_BASE = '[pdf to img]'
# 页面保存为png图片参数
ZOOM_X = ZOOM_Y = 2.0
trans = fitz.Matrix(ZOOM_X, ZOOM_X).preRotate(0) # zoom factor 2 in each dimension
ZOOM_X_1 = ZOOM_Y_1 = 1.0
ZOOM_X_2 = ZOOM_Y_2 = 2.0
trans_1 = fitz.Matrix(ZOOM_X_1, ZOOM_X_1).preRotate(0) # zoom factor 1 in each dimension
trans_2 = fitz.Matrix(ZOOM_X_2, ZOOM_X_2).preRotate(0) # zoom factor 2 in each dimension
# 特殊filter处理
ADOBE_FILTER_SET = {'FlateDecode', 'JPXDecode', 'JBIG2Decode'}
......@@ -38,13 +40,17 @@ class PDFHandler:
self.img_dir_path = os.path.join(target_path, os.path.splitext(os.path.basename(path))[0])
self.xref_set = set()
def get_img_save_path(self, pno, img_index=0, ext='png'):
def get_img_save_path(self, pno, img_index=0, ext='jpeg'):
return os.path.join(self.img_dir_path, 'page_{0}_img_{1}.{2}'.format(pno, img_index, ext))
def page_to_png(self, page):
pm = page.getPixmap(matrix=trans, alpha=False)
if page.MediaBoxSize.x > 1500 or page.MediaBoxSize.y > 1500:
pm = page.getPixmap(matrix=trans_1, alpha=False)
else:
pm = page.getPixmap(matrix=trans_2, alpha=False)
img_save_path = self.get_img_save_path(page.number)
pm.writePNG(img_save_path)
pm.writeImage(img_save_path)
# pm.writePNG(img_save_path)
@staticmethod
def getimage(pix):
......@@ -247,7 +253,8 @@ def main():
failed_list = []
for parent, dirnames, filenames in os.walk(pdf_path):
# 图片保存目录
target_path = os.path.realpath(args.output) if args.output else parent
# target_path = os.path.realpath(args.output) if args.output else parent
target_path = parent.replace(pdf_path, args.output)
for pdf_file in filenames:
if not pdf_file.endswith('pdf') and not pdf_file.endswith('PDF'):
continue
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!