Files
skills/skills/pdf/scripts/convert_pdf_to_images.py
Keith Lazuka 9422aa6c64 Update docx, xlsx, pdf, pptx skills with latest improvements
docx: Add commenting and track-changes support. Reorganize OOXML
tooling into a shared office/ module.

pptx: Streamline SKILL.md, add slide-editing and pptxgenjs guides,
bundle html2pptx as a tgz. Reorganize OOXML tooling into a shared
office/ module.

xlsx: Move recalc script into scripts/ and expand it. Add shared
office/ module for OOXML pack/unpack/validate.

pdf: Improve form-filling workflow with new form-structure extraction
script and updated field-info extraction.
2026-02-03 20:14:20 -05:00

34 lines
1008 B
Python

import os
import sys
from pdf2image import convert_from_path
def convert(pdf_path, output_dir, max_dim=1000):
images = convert_from_path(pdf_path, dpi=200)
for i, image in enumerate(images):
width, height = image.size
if width > max_dim or height > max_dim:
scale_factor = min(max_dim / width, max_dim / height)
new_width = int(width * scale_factor)
new_height = int(height * scale_factor)
image = image.resize((new_width, new_height))
image_path = os.path.join(output_dir, f"page_{i+1}.png")
image.save(image_path)
print(f"Saved page {i+1} as {image_path} (size: {image.size})")
print(f"Converted {len(images)} pages to PNG images")
if __name__ == "__main__":
if len(sys.argv) != 3:
print("Usage: convert_pdf_to_images.py [input pdf] [output directory]")
sys.exit(1)
pdf_path = sys.argv[1]
output_directory = sys.argv[2]
convert(pdf_path, output_directory)