40 lines
886 B
Python
40 lines
886 B
Python
# -*- coding: utf-8 -*-
|
|
# @Time : 2024/10/21 22:09
|
|
# @Author : 黄子寒
|
|
# @Email : 1064071566@qq.com
|
|
# @File : PDF2Pic.py
|
|
# @Project : EmoLLM
|
|
import fitz # PyMuPDF
|
|
from PIL import Image
|
|
import os
|
|
|
|
# PDF 文件路径和输出图像保存目录
|
|
pdf_file_path = "input.pdf"
|
|
output_image_dir = "output"
|
|
|
|
# 创建输出目录
|
|
if not os.path.exists(output_image_dir):
|
|
os.makedirs(output_image_dir)
|
|
|
|
# 打开 PDF 文件
|
|
pdf_document = fitz.open(pdf_file_path)
|
|
|
|
# 遍历每一页并保存为图像
|
|
for page_number in range(len(pdf_document)):
|
|
# 获取当前页对象
|
|
page = pdf_document.load_page(page_number)
|
|
|
|
# 将页面转换为图像
|
|
zoom = 4
|
|
mat = fitz.Matrix(zoom, zoom)
|
|
pix = page.get_pixmap(matrix=mat)
|
|
|
|
|
|
image_path = os.path.join(output_image_dir, f"{page_number + 1}.png")
|
|
pix.save(image_path)
|
|
|
|
print(f" {image_path}")
|
|
|
|
|
|
pdf_document.close()
|