akhaliq's picture
akhaliq HF Staff
Update app.py
d6c9b79 verified
raw
history blame
9.11 kB
import spaces
import gradio as gr
import torch
from PIL import Image
from transformers import AutoProcessor
from longcat_image.models import LongCatImageTransformer2DModel
from longcat_image.pipelines import LongCatImageEditPipeline
import numpy as np
import os
# Global variables for model
pipe = None
device = None
def initialize_model():
"""Initialize the model on first use"""
global pipe, device
if pipe is not None:
return
try:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_id = 'meituan-longcat/LongCat-Image-Edit'
print(f"πŸ”„ Loading model from {model_id}...")
# Load text processor
text_processor = AutoProcessor.from_pretrained(
model_id,
subfolder='tokenizer'
)
# Load transformer
transformer = LongCatImageTransformer2DModel.from_pretrained(
model_id,
subfolder='transformer',
torch_dtype=torch.bfloat16,
use_safetensors=True
).to(device)
# Load pipeline
pipe = LongCatImageEditPipeline.from_pretrained(
model_id,
transformer=transformer,
text_processor=text_processor,
)
pipe.to(device, torch.bfloat16)
print(f"βœ… Model loaded successfully on {device}")
except Exception as e:
print(f"❌ Error loading model: {e}")
raise
@spaces.GPU(duration=120)
def edit_image(
input_image: Image.Image,
prompt: str,
negative_prompt: str,
guidance_scale: float,
num_inference_steps: int,
seed: int,
progress=gr.Progress()
):
"""Edit image based on text prompt"""
if input_image is None:
raise gr.Error("Please upload an image first")
if not prompt or prompt.strip() == "":
raise gr.Error("Please enter an edit instruction")
try:
# Initialize model if not already loaded
initialize_model()
progress(0.1, desc="Preparing image...")
# Convert to RGB if needed
if input_image.mode != 'RGB':
input_image = input_image.convert('RGB')
progress(0.2, desc="Generating edited image...")
# Set random seed for reproducibility
generator = torch.Generator("cuda" if torch.cuda.is_available() else "cpu").manual_seed(seed)
# Run the pipeline
with torch.inference_mode():
output = pipe(
input_image,
prompt,
negative_prompt=negative_prompt,
guidance_scale=guidance_scale,
num_inference_steps=num_inference_steps,
num_images_per_prompt=1,
generator=generator
)
progress(1.0, desc="Done!")
edited_image = output.images[0]
return edited_image
except Exception as e:
raise gr.Error(f"Error during image editing: {str(e)}")
# Example prompts
example_prompts = [
["ε°†ηŒ«ε˜ζˆη‹—", "", 4.5, 50, 42],
["Change the cat to a dog", "", 4.5, 50, 42],
["ε°†θƒŒζ™―ε˜ζˆζ΅·ζ»©", "", 4.5, 50, 43],
["Make it nighttime", "", 4.5, 50, 44],
["ε°†ε›Ύη‰‡θ½¬ζ’δΈΊζ²Ήη”»ι£Žζ Ό", "", 4.5, 50, 45],
]
# Build Gradio interface
with gr.Blocks(fill_height=True) as demo:
gr.HTML("""
<div style="text-align: center; margin-bottom: 20px;">
<h1>🎨 LongCat Image Edit</h1>
<p style="font-size: 16px; color: #666;">
Transform your images with AI-powered editing using natural language instructions
</p>
<p style="font-size: 14px; margin-top: 10px;">
Built with <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: #4A90E2; text-decoration: none;">anycoder</a>
</p>
<p style="font-size: 12px; color: #888; margin-top: 5px;">
⚑ Powered by Zero-GPU | πŸ€— Model: <a href="https://huggingface.co/meituan-longcat/LongCat-Image-Edit" target="_blank" style="color: #4A90E2;">meituan-longcat/LongCat-Image-Edit</a>
</p>
</div>
""")
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### πŸ“€ Input")
input_image = gr.Image(
label="Upload Image",
type="pil",
sources=["upload", "clipboard"],
height=400
)
prompt = gr.Textbox(
label="Edit Instruction",
placeholder="Describe how you want to edit the image (e.g., 'ε°†ηŒ«ε˜ζˆη‹—' or 'Change the cat to a dog')",
lines=3
)
with gr.Accordion("βš™οΈ Advanced Settings", open=False):
negative_prompt = gr.Textbox(
label="Negative Prompt (Optional)",
placeholder="What you don't want in the image",
lines=2
)
guidance_scale = gr.Slider(
minimum=1.0,
maximum=10.0,
value=4.5,
step=0.5,
label="Guidance Scale",
info="Higher values = stronger adherence to prompt"
)
num_inference_steps = gr.Slider(
minimum=20,
maximum=100,
value=50,
step=5,
label="Inference Steps",
info="More steps = higher quality but slower"
)
seed = gr.Slider(
minimum=0,
maximum=999999,
value=42,
step=1,
label="Random Seed",
info="Use same seed for reproducible results"
)
edit_btn = gr.Button("✨ Edit Image", variant="primary", size="lg")
gr.Markdown("""
<div style="padding: 10px; background-color: #f0f7ff; border-radius: 8px; margin-top: 10px;">
<p style="margin: 0; font-size: 12px; color: #555;">
⏱️ <strong>Note:</strong> Zero-GPU provides 120 seconds of GPU time per request.
First run may take longer as the model loads from Hugging Face Hub.
Processing typically takes 30-60 seconds depending on settings.
</p>
</div>
""")
with gr.Column(scale=1):
gr.Markdown("### 🎯 Output")
output_image = gr.Image(
label="Edited Image",
type="pil",
height=400,
buttons=["download"]
)
gr.Markdown("### πŸ’‘ Tips")
gr.Markdown("""
- Upload a clear, well-lit image for best results
- Be specific in your edit instructions
- Supports both English and Chinese prompts
- Try different guidance scales for varied results
- Higher inference steps = better quality (but slower)
- GPU time is limited - optimize your settings for speed
- Model loads automatically from Hugging Face Hub
""")
# Examples section
gr.Markdown("### πŸ“ Example Prompts")
gr.Examples(
examples=example_prompts,
inputs=[prompt, negative_prompt, guidance_scale, num_inference_steps, seed],
label="Click to try these examples"
)
# Event handlers
edit_btn.click(
fn=edit_image,
inputs=[
input_image,
prompt,
negative_prompt,
guidance_scale,
num_inference_steps,
seed
],
outputs=output_image,
api_name="edit_image"
)
# Footer
gr.HTML("""
<div style="text-align: center; margin-top: 40px; padding: 20px; border-top: 1px solid #eee;">
<p style="color: #666; font-size: 14px;">
Powered by <a href="https://huggingface.co/meituan-longcat/LongCat-Image-Edit" target="_blank" style="color: #4A90E2;">LongCat Image Edit</a> with Zero-GPU |
<a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: #4A90E2;">Built with anycoder</a>
</p>
</div>
""")
# Launch the app
if __name__ == "__main__":
demo.launch(
theme=gr.themes.Soft(
primary_hue="blue",
secondary_hue="indigo",
neutral_hue="slate",
font=gr.themes.GoogleFont("Inter"),
text_size="lg",
spacing_size="lg",
radius_size="md"
),
footer_links=[
{"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"}
]
)