Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| from PyPDF2 import PdfWriter, PdfReader | |
| import zipfile | |
| import tempfile | |
| import fitz # PyMuPDF | |
| def merge_pdfs(pdf_files): | |
| if not pdf_files: | |
| return "β No PDF files uploaded.", None | |
| output_dir = tempfile.mkdtemp() | |
| output_file = os.path.join(output_dir, "merged.pdf") | |
| doc_out = fitz.open() | |
| a4_rect = fitz.paper_rect("a4") | |
| for file in pdf_files: | |
| src = fitz.open(file.name) | |
| for page in src: | |
| page_out = doc_out.new_page(width=a4_rect.width, height=a4_rect.height) | |
| page_out.show_pdf_page(a4_rect, src, page.number, keep_proportion=True) | |
| src.close() | |
| doc_out.save(output_file) | |
| doc_out.close() | |
| return "β PDFs merged successfully.", output_file | |
| def compress_pdf(file, dpi_threshold, dpi_target, quality): | |
| input_path = file.name | |
| # Create a unique temporary file for the compressed output | |
| temp_dir = tempfile.mkdtemp() | |
| output_path = os.path.join(temp_dir, "compressed_output.pdf") | |
| doc = fitz.open(input_path) | |
| # Recompress images above dpi_threshold to target DPI with JPEG at quality | |
| doc.rewrite_images( | |
| dpi_threshold=dpi_threshold, | |
| dpi_target=dpi_target, | |
| quality=quality, | |
| lossy=True, | |
| lossless=True, | |
| bitonal=True, | |
| color=True, | |
| gray=True, | |
| set_to_gray=False, | |
| ) | |
| # Subset fonts and apply full garbage collection + stream compression | |
| doc.subset_fonts() | |
| doc.save(output_path, | |
| garbage=3, | |
| deflate=True, | |
| use_objstms=True) | |
| status = f"β PDF compressed successfully!" | |
| return status, output_path | |
| def split_pdf(file_path, start_page, end_page): | |
| file_name = os.path.basename(file_path) | |
| base_name = file_name[:-4] # Remove .pdf extension | |
| output_dir = tempfile.mkdtemp() | |
| zip_path = os.path.join(output_dir, f"{base_name}_split_pages.zip") | |
| input_pdf = PdfReader(open(file_path, "rb")) | |
| total_pages = len(input_pdf.pages) | |
| # Clamp values within range | |
| start_page = max(0, min(start_page, total_pages - 1)) | |
| end_page = max(start_page, min(end_page, total_pages - 1)) | |
| zipf = zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) | |
| for i in range(start_page, end_page + 1): | |
| writer = PdfWriter() | |
| writer.add_page(input_pdf.pages[i]) | |
| split_pdf_path = os.path.join(output_dir, f"{base_name}-page{i+1}.pdf") | |
| with open(split_pdf_path, "wb") as f_out: | |
| writer.write(f_out) | |
| zipf.write(split_pdf_path, arcname=os.path.basename(split_pdf_path)) | |
| zipf.close() | |
| return zip_path, start_page + 1, end_page + 1 | |
| def remove_pages(file, pages_to_remove): | |
| if file is None: | |
| return "β No file uploaded.", None | |
| input_pdf = PdfReader(open(file.name, "rb")) | |
| writer = PdfWriter() | |
| total_pages = len(input_pdf.pages) | |
| # Parse page numbers | |
| pages_to_remove = pages_to_remove.replace(" ", "") | |
| remove_set = set() | |
| try: | |
| for part in pages_to_remove.split(","): | |
| if "-" in part: | |
| start, end = map(int, part.split("-")) | |
| remove_set.update(range(start, end + 1)) | |
| else: | |
| remove_set.add(int(part)) | |
| except: | |
| return "β Invalid page format.", None | |
| # Keep valid pages only | |
| remove_set = {p for p in remove_set if 0 <= p < total_pages} | |
| for i in range(total_pages): | |
| if i not in remove_set: | |
| writer.add_page(input_pdf.pages[i]) | |
| output_dir = tempfile.mkdtemp() | |
| output_path = os.path.join(output_dir, "pages_removed.pdf") | |
| with open(output_path, "wb") as f: | |
| writer.write(f) | |
| status = f"β Removed pages: {sorted(remove_set)}" | |
| return status, output_path | |
| def process_pdf(file, start_page, end_page): | |
| if file is None: | |
| return "β No file uploaded.", None | |
| zip_file_path, actual_start, actual_end = split_pdf(file.name, start_page, end_page) | |
| status = f"β File '{file.name}' split from page {actual_start} to {actual_end}." | |
| return status, zip_file_path | |
| with gr.Blocks(title="PDF Utility") as demo: | |
| gr.Markdown("# π PDF Utility App") | |
| with gr.Tabs(): | |
| with gr.TabItem("Split PDF"): | |
| gr.Markdown("Upload a PDF, select page range, and click **Split PDF** to download a ZIP of split pages.") | |
| with gr.Row(): | |
| file_input = gr.File(label="Upload PDF", file_types=[".pdf"]) | |
| with gr.Row(): | |
| start_page = gr.Number(label="Start Page (0-based)", value=0, precision=0) | |
| end_page = gr.Number(label="End Page (0-based)", value=0, precision=0) | |
| split_button = gr.Button("π Split PDF") | |
| status_text = gr.Textbox(label="Status", lines=2) | |
| download_link = gr.File(label="Download ZIP") | |
| split_button.click( | |
| fn=process_pdf, | |
| inputs=[file_input, start_page, end_page], | |
| outputs=[status_text, download_link] | |
| ) | |
| with gr.TabItem("Compress PDF"): | |
| gr.Markdown("Upload a PDF and click **Compress PDF** to download the compressed version.") | |
| with gr.Row(): | |
| file_input_compress = gr.File(label="Upload PDF", file_types=[".pdf"]) | |
| with gr.Row(): | |
| dpi_threshold = gr.Number(label="DPI Threshold", value=100, precision=0) | |
| dpi_target = gr.Number(label="Target DPI", value=72, precision=0) | |
| quality = gr.Number(label="JPEG Quality (1-100)", value=60, precision=0) | |
| compress_button = gr.Button("π Compress PDF") | |
| status_text_compress = gr.Textbox(label="Status", lines=2) | |
| download_link_compress = gr.File(label="Download compressed PDF") | |
| compress_button.click( | |
| fn=compress_pdf, | |
| inputs=[file_input_compress, dpi_threshold, dpi_target, quality], | |
| outputs=[status_text_compress, download_link_compress] | |
| ) | |
| with gr.TabItem("Merge PDFs"): | |
| gr.Markdown("Upload multiple PDFs and click **Merge PDFs** to download the merged version.") | |
| pdf_uploads = gr.File(label="Upload PDFs", file_types=[".pdf"], file_count="multiple") | |
| merge_button = gr.Button("π Merge PDF Files") | |
| merge_status = gr.Textbox(label="Status", lines=2) | |
| merged_file = gr.File(label="Download Merged PDF") | |
| merge_button.click( | |
| fn=merge_pdfs, | |
| inputs=[pdf_uploads], | |
| outputs=[merge_status, merged_file] | |
| ) | |
| with gr.TabItem("Remove Pages"): | |
| gr.Markdown( | |
| "Remove one or more pages from a PDF.\n\n" | |
| "**Examples:** `2`, `1,3,5`, `2-6`, `1,3,5-7`\n\n" | |
| "β οΈ Page numbers are **0-based**." | |
| ) | |
| remove_file = gr.File(label="Upload PDF", file_types=[".pdf"]) | |
| pages_input = gr.Textbox( | |
| label="Pages to remove", | |
| placeholder="e.g. 1,3,5-7" | |
| ) | |
| remove_button = gr.Button("π Remove Pages") | |
| remove_status = gr.Textbox(label="Status", lines=2) | |
| removed_pdf = gr.File(label="Download PDF") | |
| remove_button.click( | |
| fn=remove_pages, | |
| inputs=[remove_file, pages_input], | |
| outputs=[remove_status, removed_pdf] | |
| ) | |
| demo.launch() |