OliveSensorAPI/IOTLLM/generate_data/EC_process/processPDF/PDF2Pic.py

40 lines
886 B
Python

# -*- coding: utf-8 -*-
# @Time : 2024/10/21 22:09
# @Author : 黄子寒
# @Email : 1064071566@qq.com
# @File : PDF2Pic.py
# @Project : EmoLLM
import fitz # PyMuPDF
from PIL import Image
import os
# PDF 文件路径和输出图像保存目录
pdf_file_path = "input.pdf"
output_image_dir = "output"
# 创建输出目录
if not os.path.exists(output_image_dir):
os.makedirs(output_image_dir)
# 打开 PDF 文件
pdf_document = fitz.open(pdf_file_path)
# 遍历每一页并保存为图像
for page_number in range(len(pdf_document)):
# 获取当前页对象
page = pdf_document.load_page(page_number)
# 将页面转换为图像
zoom = 4
mat = fitz.Matrix(zoom, zoom)
pix = page.get_pixmap(matrix=mat)
image_path = os.path.join(output_image_dir, f"{page_number + 1}.png")
pix.save(image_path)
print(f" {image_path}")
pdf_document.close()