2bf360fb by 周伟奇

fix page to png

1 parent ff70b617
...@@ -17,8 +17,10 @@ args = parser.parse_args() ...@@ -17,8 +17,10 @@ args = parser.parse_args()
17 LOG_BASE = '[pdf to img]' 17 LOG_BASE = '[pdf to img]'
18 18
19 # 页面保存为png图片参数 19 # 页面保存为png图片参数
20 ZOOM_X = ZOOM_Y = 2.0 20 ZOOM_X_1 = ZOOM_Y_1 = 1.0
21 trans = fitz.Matrix(ZOOM_X, ZOOM_X).preRotate(0) # zoom factor 2 in each dimension 21 ZOOM_X_2 = ZOOM_Y_2 = 2.0
22 trans_1 = fitz.Matrix(ZOOM_X_1, ZOOM_X_1).preRotate(0) # zoom factor 1 in each dimension
23 trans_2 = fitz.Matrix(ZOOM_X_2, ZOOM_X_2).preRotate(0) # zoom factor 2 in each dimension
22 24
23 # 特殊filter处理 25 # 特殊filter处理
24 ADOBE_FILTER_SET = {'FlateDecode', 'JPXDecode', 'JBIG2Decode'} 26 ADOBE_FILTER_SET = {'FlateDecode', 'JPXDecode', 'JBIG2Decode'}
...@@ -38,13 +40,17 @@ class PDFHandler: ...@@ -38,13 +40,17 @@ class PDFHandler:
38 self.img_dir_path = os.path.join(target_path, os.path.splitext(os.path.basename(path))[0]) 40 self.img_dir_path = os.path.join(target_path, os.path.splitext(os.path.basename(path))[0])
39 self.xref_set = set() 41 self.xref_set = set()
40 42
41 def get_img_save_path(self, pno, img_index=0, ext='png'): 43 def get_img_save_path(self, pno, img_index=0, ext='jpeg'):
42 return os.path.join(self.img_dir_path, 'page_{0}_img_{1}.{2}'.format(pno, img_index, ext)) 44 return os.path.join(self.img_dir_path, 'page_{0}_img_{1}.{2}'.format(pno, img_index, ext))
43 45
44 def page_to_png(self, page): 46 def page_to_png(self, page):
45 pm = page.getPixmap(matrix=trans, alpha=False) 47 if page.MediaBoxSize.x > 1500 or page.MediaBoxSize.y > 1500:
48 pm = page.getPixmap(matrix=trans_1, alpha=False)
49 else:
50 pm = page.getPixmap(matrix=trans_2, alpha=False)
46 img_save_path = self.get_img_save_path(page.number) 51 img_save_path = self.get_img_save_path(page.number)
47 pm.writePNG(img_save_path) 52 pm.writeImage(img_save_path)
53 # pm.writePNG(img_save_path)
48 54
49 @staticmethod 55 @staticmethod
50 def getimage(pix): 56 def getimage(pix):
...@@ -247,7 +253,8 @@ def main(): ...@@ -247,7 +253,8 @@ def main():
247 failed_list = [] 253 failed_list = []
248 for parent, dirnames, filenames in os.walk(pdf_path): 254 for parent, dirnames, filenames in os.walk(pdf_path):
249 # 图片保存目录 255 # 图片保存目录
250 target_path = os.path.realpath(args.output) if args.output else parent 256 # target_path = os.path.realpath(args.output) if args.output else parent
257 target_path = parent.replace(pdf_path, args.output)
251 for pdf_file in filenames: 258 for pdf_file in filenames:
252 if not pdf_file.endswith('pdf') and not pdf_file.endswith('PDF'): 259 if not pdf_file.endswith('pdf') and not pdf_file.endswith('PDF'):
253 continue 260 continue
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!