Spaces:

HawkeyeHS
/

PDF_Utility

Sleeping

App Files Files Community

PDF_Utility / app.py

HawkeyeHS

Added functionality to remove pages from a PDF

2713809 verified 5 days ago

raw

history blame contribute delete

7.56 kB

	import os
	import gradio as gr
	from PyPDF2 import PdfWriter, PdfReader
	import zipfile
	import tempfile
	import fitz # PyMuPDF

	def merge_pdfs(pdf_files):
	if not pdf_files:
	return "❌ No PDF files uploaded.", None

	output_dir = tempfile.mkdtemp()
	output_file = os.path.join(output_dir, "merged.pdf")

	doc_out = fitz.open()
	a4_rect = fitz.paper_rect("a4")

	for file in pdf_files:
	src = fitz.open(file.name)
	for page in src:
	page_out = doc_out.new_page(width=a4_rect.width, height=a4_rect.height)
	page_out.show_pdf_page(a4_rect, src, page.number, keep_proportion=True)
	src.close()

	doc_out.save(output_file)
	doc_out.close()
	return "✅ PDFs merged successfully.", output_file


	def compress_pdf(file, dpi_threshold, dpi_target, quality):
	input_path = file.name

	# Create a unique temporary file for the compressed output
	temp_dir = tempfile.mkdtemp()
	output_path = os.path.join(temp_dir, "compressed_output.pdf")

	doc = fitz.open(input_path)

	# Recompress images above dpi_threshold to target DPI with JPEG at quality
	doc.rewrite_images(
	dpi_threshold=dpi_threshold,
	dpi_target=dpi_target,
	quality=quality,
	lossy=True,
	lossless=True,
	bitonal=True,
	color=True,
	gray=True,
	set_to_gray=False,
	)

	# Subset fonts and apply full garbage collection + stream compression
	doc.subset_fonts()
	doc.save(output_path,
	garbage=3,
	deflate=True,
	use_objstms=True)

	status = f"✅ PDF compressed successfully!"
	return status, output_path


	def split_pdf(file_path, start_page, end_page):
	file_name = os.path.basename(file_path)
	base_name = file_name[:-4] # Remove .pdf extension
	output_dir = tempfile.mkdtemp()
	zip_path = os.path.join(output_dir, f"{base_name}_split_pages.zip")

	input_pdf = PdfReader(open(file_path, "rb"))
	total_pages = len(input_pdf.pages)

	# Clamp values within range
	start_page = max(0, min(start_page, total_pages - 1))
	end_page = max(start_page, min(end_page, total_pages - 1))

	zipf = zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED)

	for i in range(start_page, end_page + 1):
	writer = PdfWriter()
	writer.add_page(input_pdf.pages[i])
	split_pdf_path = os.path.join(output_dir, f"{base_name}-page{i+1}.pdf")
	with open(split_pdf_path, "wb") as f_out:
	writer.write(f_out)
	zipf.write(split_pdf_path, arcname=os.path.basename(split_pdf_path))

	zipf.close()
	return zip_path, start_page + 1, end_page + 1

	def remove_pages(file, pages_to_remove):
	if file is None:
	return "❌ No file uploaded.", None

	input_pdf = PdfReader(open(file.name, "rb"))
	writer = PdfWriter()
	total_pages = len(input_pdf.pages)

	# Parse page numbers
	pages_to_remove = pages_to_remove.replace(" ", "")
	remove_set = set()

	try:
	for part in pages_to_remove.split(","):
	if "-" in part:
	start, end = map(int, part.split("-"))
	remove_set.update(range(start, end + 1))
	else:
	remove_set.add(int(part))
	except:
	return "❌ Invalid page format.", None

	# Keep valid pages only
	remove_set = {p for p in remove_set if 0 <= p < total_pages}

	for i in range(total_pages):
	if i not in remove_set:
	writer.add_page(input_pdf.pages[i])

	output_dir = tempfile.mkdtemp()
	output_path = os.path.join(output_dir, "pages_removed.pdf")

	with open(output_path, "wb") as f:
	writer.write(f)

	status = f"✅ Removed pages: {sorted(remove_set)}"
	return status, output_path

	def process_pdf(file, start_page, end_page):
	if file is None:
	return "❌ No file uploaded.", None
	zip_file_path, actual_start, actual_end = split_pdf(file.name, start_page, end_page)
	status = f"✅ File '{file.name}' split from page {actual_start} to {actual_end}."
	return status, zip_file_path

	with gr.Blocks(title="PDF Utility") as demo:
	gr.Markdown("# 📄 PDF Utility App")

	with gr.Tabs():
	with gr.TabItem("Split PDF"):
	gr.Markdown("Upload a PDF, select page range, and click Split PDF to download a ZIP of split pages.")
	with gr.Row():
	file_input = gr.File(label="Upload PDF", file_types=[".pdf"])

	with gr.Row():
	start_page = gr.Number(label="Start Page (0-based)", value=0, precision=0)
	end_page = gr.Number(label="End Page (0-based)", value=0, precision=0)

	split_button = gr.Button("🚀 Split PDF")

	status_text = gr.Textbox(label="Status", lines=2)
	download_link = gr.File(label="Download ZIP")

	split_button.click(
	fn=process_pdf,
	inputs=[file_input, start_page, end_page],
	outputs=[status_text, download_link]
	)

	with gr.TabItem("Compress PDF"):
	gr.Markdown("Upload a PDF and click Compress PDF to download the compressed version.")
	with gr.Row():
	file_input_compress = gr.File(label="Upload PDF", file_types=[".pdf"])

	with gr.Row():
	dpi_threshold = gr.Number(label="DPI Threshold", value=100, precision=0)
	dpi_target = gr.Number(label="Target DPI", value=72, precision=0)
	quality = gr.Number(label="JPEG Quality (1-100)", value=60, precision=0)

	compress_button = gr.Button("🚀 Compress PDF")

	status_text_compress = gr.Textbox(label="Status", lines=2)
	download_link_compress = gr.File(label="Download compressed PDF")

	compress_button.click(
	fn=compress_pdf,
	inputs=[file_input_compress, dpi_threshold, dpi_target, quality],
	outputs=[status_text_compress, download_link_compress]
	)

	with gr.TabItem("Merge PDFs"):
	gr.Markdown("Upload multiple PDFs and click Merge PDFs to download the merged version.")
	pdf_uploads = gr.File(label="Upload PDFs", file_types=[".pdf"], file_count="multiple")

	merge_button = gr.Button("📎 Merge PDF Files")

	merge_status = gr.Textbox(label="Status", lines=2)
	merged_file = gr.File(label="Download Merged PDF")

	merge_button.click(
	fn=merge_pdfs,
	inputs=[pdf_uploads],
	outputs=[merge_status, merged_file]
	)

	with gr.TabItem("Remove Pages"):
	gr.Markdown(
	"Remove one or more pages from a PDF.\n\n"
	"Examples: `2`, `1,3,5`, `2-6`, `1,3,5-7`\n\n"
	"⚠️ Page numbers are 0-based."
	)

	remove_file = gr.File(label="Upload PDF", file_types=[".pdf"])

	pages_input = gr.Textbox(
	label="Pages to remove",
	placeholder="e.g. 1,3,5-7"
	)

	remove_button = gr.Button("🗑 Remove Pages")

	remove_status = gr.Textbox(label="Status", lines=2)
	removed_pdf = gr.File(label="Download PDF")

	remove_button.click(
	fn=remove_pages,
	inputs=[remove_file, pages_input],
	outputs=[remove_status, removed_pdf]
	)


	demo.launch()