import spaces import gradio as gr import torch from PIL import Image from transformers import AutoProcessor from longcat_image.models import LongCatImageTransformer2DModel from longcat_image.pipelines import LongCatImageEditPipeline import numpy as np # Load model directly at startup device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model_id = 'meituan-longcat/LongCat-Image-Edit' print(f"🔄 Loading model from {model_id}...") # Load text processor text_processor = AutoProcessor.from_pretrained( model_id, subfolder='tokenizer' ) # Load transformer transformer = LongCatImageTransformer2DModel.from_pretrained( model_id, subfolder='transformer', torch_dtype=torch.bfloat16, use_safetensors=True ).to(device) # Load pipeline pipe = LongCatImageEditPipeline.from_pretrained( model_id, transformer=transformer, text_processor=text_processor, ) pipe.to(device, torch.bfloat16) print(f"✅ Model loaded successfully on {device}") @spaces.GPU(duration=120) def edit_image( input_image: Image.Image, prompt: str, negative_prompt: str, guidance_scale: float, num_inference_steps: int, seed: int, progress=gr.Progress() ): """Edit image based on text prompt""" if input_image is None: raise gr.Error("Please upload an image first") if not prompt or prompt.strip() == "": raise gr.Error("Please enter an edit instruction") try: progress(0.1, desc="Preparing image...") # Convert to RGB if needed if input_image.mode != 'RGB': input_image = input_image.convert('RGB') progress(0.2, desc="Generating edited image...") # Set random seed for reproducibility generator = torch.Generator("cuda" if torch.cuda.is_available() else "cpu").manual_seed(seed) # Run the pipeline with torch.inference_mode(): output = pipe( input_image, prompt, negative_prompt=negative_prompt, guidance_scale=guidance_scale, num_inference_steps=num_inference_steps, num_images_per_prompt=1, generator=generator ) progress(1.0, desc="Done!") edited_image = output.images[0] return edited_image except Exception as e: raise gr.Error(f"Error during image editing: {str(e)}") # Example prompts example_prompts = [ ["将猫变成狗", "", 4.5, 50, 42], ["Change the cat to a dog", "", 4.5, 50, 42], ["将背景变成海滩", "", 4.5, 50, 43], ["Make it nighttime", "", 4.5, 50, 44], ["将图片转换为油画风格", "", 4.5, 50, 45], ] # Build Gradio interface with gr.Blocks(fill_height=True) as demo: gr.HTML("""

🎨 LongCat Image Edit

Transform your images with AI-powered editing using natural language instructions

Built with anycoder

⚡ Powered by Zero-GPU | 🤗 Model: meituan-longcat/LongCat-Image-Edit

""") with gr.Row(): with gr.Column(scale=1): gr.Markdown("### 📤 Input") input_image = gr.Image( label="Upload Image", type="pil", sources=["upload", "clipboard"], height=400 ) prompt = gr.Textbox( label="Edit Instruction", placeholder="Describe how you want to edit the image (e.g., '将猫变成狗' or 'Change the cat to a dog')", lines=3 ) with gr.Accordion("⚙️ Advanced Settings", open=False): negative_prompt = gr.Textbox( label="Negative Prompt (Optional)", placeholder="What you don't want in the image", lines=2 ) guidance_scale = gr.Slider( minimum=1.0, maximum=10.0, value=4.5, step=0.5, label="Guidance Scale", info="Higher values = stronger adherence to prompt" ) num_inference_steps = gr.Slider( minimum=20, maximum=100, value=50, step=5, label="Inference Steps", info="More steps = higher quality but slower" ) seed = gr.Slider( minimum=0, maximum=999999, value=42, step=1, label="Random Seed", info="Use same seed for reproducible results" ) edit_btn = gr.Button("✨ Edit Image", variant="primary", size="lg") gr.Markdown("""

⏱️ Note: Zero-GPU provides 120 seconds of GPU time per request. Model is loaded at startup from Hugging Face Hub. Processing typically takes 30-60 seconds depending on settings.

""") with gr.Column(scale=1): gr.Markdown("### 🎯 Output") output_image = gr.Image( label="Edited Image", type="pil", height=400, buttons=["download"] ) gr.Markdown("### 💡 Tips") gr.Markdown(""" - Upload a clear, well-lit image for best results - Be specific in your edit instructions - Supports both English and Chinese prompts - Try different guidance scales for varied results - Higher inference steps = better quality (but slower) - GPU time is limited - optimize your settings for speed - Model loads automatically from Hugging Face Hub """) # Examples section gr.Markdown("### 📝 Example Prompts") gr.Examples( examples=example_prompts, inputs=[prompt, negative_prompt, guidance_scale, num_inference_steps, seed], label="Click to try these examples" ) # Event handlers edit_btn.click( fn=edit_image, inputs=[ input_image, prompt, negative_prompt, guidance_scale, num_inference_steps, seed ], outputs=output_image, api_visibility="public" ) # Footer gr.HTML("""

""") # Launch the app if __name__ == "__main__": demo.launch( theme=gr.themes.Soft( primary_hue="blue", secondary_hue="indigo", neutral_hue="slate", font=gr.themes.GoogleFont("Inter"), text_size="lg", spacing_size="lg", radius_size="md" ), footer_links=[ {"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"} ] )