import spaces import gradio as gr import torch from PIL import Image from transformers import AutoProcessor from longcat_image.models import LongCatImageTransformer2DModel from longcat_image.pipelines import LongCatImageEditPipeline import numpy as np # Load model directly at startup device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model_id = 'meituan-longcat/LongCat-Image-Edit' print(f"π Loading model from {model_id}...") # Load text processor text_processor = AutoProcessor.from_pretrained( model_id, subfolder='tokenizer' ) # Load transformer transformer = LongCatImageTransformer2DModel.from_pretrained( model_id, subfolder='transformer', torch_dtype=torch.bfloat16, use_safetensors=True ).to(device) # Load pipeline pipe = LongCatImageEditPipeline.from_pretrained( model_id, transformer=transformer, text_processor=text_processor, ) pipe.to(device, torch.bfloat16) print(f"β Model loaded successfully on {device}") @spaces.GPU(duration=120) def edit_image( input_image: Image.Image, prompt: str, negative_prompt: str, guidance_scale: float, num_inference_steps: int, seed: int, progress=gr.Progress() ): """Edit image based on text prompt""" if input_image is None: raise gr.Error("Please upload an image first") if not prompt or prompt.strip() == "": raise gr.Error("Please enter an edit instruction") try: progress(0.1, desc="Preparing image...") # Convert to RGB if needed if input_image.mode != 'RGB': input_image = input_image.convert('RGB') progress(0.2, desc="Generating edited image...") # Set random seed for reproducibility generator = torch.Generator("cuda" if torch.cuda.is_available() else "cpu").manual_seed(seed) # Run the pipeline with torch.inference_mode(): output = pipe( input_image, prompt, negative_prompt=negative_prompt, guidance_scale=guidance_scale, num_inference_steps=num_inference_steps, num_images_per_prompt=1, generator=generator ) progress(1.0, desc="Done!") edited_image = output.images[0] return edited_image except Exception as e: raise gr.Error(f"Error during image editing: {str(e)}") # Example prompts example_prompts = [ ["ε°η«εζη", "", 4.5, 50, 42], ["Change the cat to a dog", "", 4.5, 50, 42], ["ε°θζ―εζζ΅·ζ»©", "", 4.5, 50, 43], ["Make it nighttime", "", 4.5, 50, 44], ["ε°εΎη转ζ’δΈΊζ²Ήη»ι£ζ Ό", "", 4.5, 50, 45], ] # Build Gradio interface with gr.Blocks(fill_height=True) as demo: gr.HTML("""
Transform your images with AI-powered editing using natural language instructions
Built with anycoder
β‘ Powered by Zero-GPU | π€ Model: meituan-longcat/LongCat-Image-Edit
β±οΈ Note: Zero-GPU provides 120 seconds of GPU time per request. Model is loaded at startup from Hugging Face Hub. Processing typically takes 30-60 seconds depending on settings.
Powered by LongCat Image Edit with Zero-GPU | Built with anycoder