Spaces:

anycoderapps
/

LongCat-Image-Edit

Running on Zero

File size: 8,503 Bytes

import spaces
import gradio as gr
import torch
from PIL import Image
from transformers import AutoProcessor
from longcat_image.models import LongCatImageTransformer2DModel
from longcat_image.pipelines import LongCatImageEditPipeline
import numpy as np

# Load model directly at startup
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_id = 'meituan-longcat/LongCat-Image-Edit'

print(f"🔄 Loading model from {model_id}...")

# Load text processor
text_processor = AutoProcessor.from_pretrained(
    model_id, 
    subfolder='tokenizer'
)

# Load transformer
transformer = LongCatImageTransformer2DModel.from_pretrained(
    model_id, 
    subfolder='transformer',
    torch_dtype=torch.bfloat16, 
    use_safetensors=True
).to(device)

# Load pipeline
pipe = LongCatImageEditPipeline.from_pretrained(
    model_id,
    transformer=transformer,
    text_processor=text_processor,
)
pipe.to(device, torch.bfloat16)

print(f"✅ Model loaded successfully on {device}")

@spaces.GPU(duration=120)
def edit_image(
    input_image: Image.Image,
    prompt: str,
    negative_prompt: str,
    guidance_scale: float,
    num_inference_steps: int,
    seed: int,
    progress=gr.Progress()
):
    """Edit image based on text prompt"""
    
    if input_image is None:
        raise gr.Error("Please upload an image first")
    
    if not prompt or prompt.strip() == "":
        raise gr.Error("Please enter an edit instruction")
    
    try:
        progress(0.1, desc="Preparing image...")
        
        # Convert to RGB if needed
        if input_image.mode != 'RGB':
            input_image = input_image.convert('RGB')
        
        progress(0.2, desc="Generating edited image...")
        
        # Set random seed for reproducibility
        generator = torch.Generator("cuda" if torch.cuda.is_available() else "cpu").manual_seed(seed)
        
        # Run the pipeline
        with torch.inference_mode():
            output = pipe(
                input_image,
                prompt,
                negative_prompt=negative_prompt,
                guidance_scale=guidance_scale,
                num_inference_steps=num_inference_steps,
                num_images_per_prompt=1,
                generator=generator
            )
        
        progress(1.0, desc="Done!")
        
        edited_image = output.images[0]
        
        return edited_image
        
    except Exception as e:
        raise gr.Error(f"Error during image editing: {str(e)}")

# Example prompts
example_prompts = [
    ["将猫变成狗", "", 4.5, 50, 42],
    ["Change the cat to a dog", "", 4.5, 50, 42],
    ["将背景变成海滩", "", 4.5, 50, 43],
    ["Make it nighttime", "", 4.5, 50, 44],
    ["将图片转换为油画风格", "", 4.5, 50, 45],
]

# Build Gradio interface
with gr.Blocks(fill_height=True) as demo:
    gr.HTML("""
        <div style="text-align: center; margin-bottom: 20px;">
            <h1>🎨 LongCat Image Edit</h1>
            <p style="font-size: 16px; color: #666;">
                Transform your images with AI-powered editing using natural language instructions
            </p>
            <p style="font-size: 14px; margin-top: 10px;">
                Built with <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: #4A90E2; text-decoration: none;">anycoder</a>
            </p>
            <p style="font-size: 12px; color: #888; margin-top: 5px;">
                ⚡ Powered by Zero-GPU | 🤗 Model: <a href="https://huggingface.co/meituan-longcat/LongCat-Image-Edit" target="_blank" style="color: #4A90E2;">meituan-longcat/LongCat-Image-Edit</a>
            </p>
        </div>
    """)
    
    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("### 📤 Input")
            input_image = gr.Image(
                label="Upload Image",
                type="pil",
                sources=["upload", "clipboard"],
                height=400
            )
            
            prompt = gr.Textbox(
                label="Edit Instruction",
                placeholder="Describe how you want to edit the image (e.g., '将猫变成狗' or 'Change the cat to a dog')",
                lines=3
            )
            
            with gr.Accordion("⚙️ Advanced Settings", open=False):
                negative_prompt = gr.Textbox(
                    label="Negative Prompt (Optional)",
                    placeholder="What you don't want in the image",
                    lines=2
                )
                
                guidance_scale = gr.Slider(
                    minimum=1.0,
                    maximum=10.0,
                    value=4.5,
                    step=0.5,
                    label="Guidance Scale",
                    info="Higher values = stronger adherence to prompt"
                )
                
                num_inference_steps = gr.Slider(
                    minimum=20,
                    maximum=100,
                    value=50,
                    step=5,
                    label="Inference Steps",
                    info="More steps = higher quality but slower"
                )
                
                seed = gr.Slider(
                    minimum=0,
                    maximum=999999,
                    value=42,
                    step=1,
                    label="Random Seed",
                    info="Use same seed for reproducible results"
                )
            
            edit_btn = gr.Button("✨ Edit Image", variant="primary", size="lg")
            
            gr.Markdown("""
            <div style="padding: 10px; background-color: #f0f7ff; border-radius: 8px; margin-top: 10px;">
                <p style="margin: 0; font-size: 12px; color: #555;">
                    ⏱️ <strong>Note:</strong> Zero-GPU provides 120 seconds of GPU time per request. 
                    Model is loaded at startup from Hugging Face Hub.
                    Processing typically takes 30-60 seconds depending on settings.
                </p>
            </div>
            """)
            
        with gr.Column(scale=1):
            gr.Markdown("### 🎯 Output")
            output_image = gr.Image(
                label="Edited Image",
                type="pil",
                height=400,
                buttons=["download"]
            )
            
            gr.Markdown("### 💡 Tips")
            gr.Markdown("""
            - Upload a clear, well-lit image for best results
            - Be specific in your edit instructions
            - Supports both English and Chinese prompts
            - Try different guidance scales for varied results
            - Higher inference steps = better quality (but slower)
            - GPU time is limited - optimize your settings for speed
            - Model loads automatically from Hugging Face Hub
            """)
    
    # Examples section
    gr.Markdown("### 📝 Example Prompts")
    gr.Examples(
        examples=example_prompts,
        inputs=[prompt, negative_prompt, guidance_scale, num_inference_steps, seed],
        label="Click to try these examples"
    )
    
    # Event handlers
    edit_btn.click(
        fn=edit_image,
        inputs=[
            input_image,
            prompt,
            negative_prompt,
            guidance_scale,
            num_inference_steps,
            seed
        ],
        outputs=output_image,
        api_visibility="public"
    )
    
    # Footer
    gr.HTML("""
        <div style="text-align: center; margin-top: 40px; padding: 20px; border-top: 1px solid #eee;">
            <p style="color: #666; font-size: 14px;">
                Powered by <a href="https://huggingface.co/meituan-longcat/LongCat-Image-Edit" target="_blank" style="color: #4A90E2;">LongCat Image Edit</a> with Zero-GPU | 
                <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: #4A90E2;">Built with anycoder</a>
            </p>
        </div>
    """)

# Launch the app
if __name__ == "__main__":
    demo.launch(
        theme=gr.themes.Soft(
            primary_hue="blue",
            secondary_hue="indigo",
            neutral_hue="slate",
            font=gr.themes.GoogleFont("Inter"),
            text_size="lg",
            spacing_size="lg",
            radius_size="md"
        ),
        footer_links=[
            {"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"}
        ]
    )