akhaliq's picture
akhaliq HF Staff
Update app.py
99582fb verified
raw
history blame
8.5 kB
import spaces
import gradio as gr
import torch
from PIL import Image
from transformers import AutoProcessor
from longcat_image.models import LongCatImageTransformer2DModel
from longcat_image.pipelines import LongCatImageEditPipeline
import numpy as np
# Load model directly at startup
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_id = 'meituan-longcat/LongCat-Image-Edit'
print(f"πŸ”„ Loading model from {model_id}...")
# Load text processor
text_processor = AutoProcessor.from_pretrained(
model_id,
subfolder='tokenizer'
)
# Load transformer
transformer = LongCatImageTransformer2DModel.from_pretrained(
model_id,
subfolder='transformer',
torch_dtype=torch.bfloat16,
use_safetensors=True
).to(device)
# Load pipeline
pipe = LongCatImageEditPipeline.from_pretrained(
model_id,
transformer=transformer,
text_processor=text_processor,
)
pipe.to(device, torch.bfloat16)
print(f"βœ… Model loaded successfully on {device}")
@spaces.GPU(duration=120)
def edit_image(
input_image: Image.Image,
prompt: str,
negative_prompt: str,
guidance_scale: float,
num_inference_steps: int,
seed: int,
progress=gr.Progress()
):
"""Edit image based on text prompt"""
if input_image is None:
raise gr.Error("Please upload an image first")
if not prompt or prompt.strip() == "":
raise gr.Error("Please enter an edit instruction")
try:
progress(0.1, desc="Preparing image...")
# Convert to RGB if needed
if input_image.mode != 'RGB':
input_image = input_image.convert('RGB')
progress(0.2, desc="Generating edited image...")
# Set random seed for reproducibility
generator = torch.Generator("cuda" if torch.cuda.is_available() else "cpu").manual_seed(seed)
# Run the pipeline
with torch.inference_mode():
output = pipe(
input_image,
prompt,
negative_prompt=negative_prompt,
guidance_scale=guidance_scale,
num_inference_steps=num_inference_steps,
num_images_per_prompt=1,
generator=generator
)
progress(1.0, desc="Done!")
edited_image = output.images[0]
return edited_image
except Exception as e:
raise gr.Error(f"Error during image editing: {str(e)}")
# Example prompts
example_prompts = [
["ε°†ηŒ«ε˜ζˆη‹—", "", 4.5, 50, 42],
["Change the cat to a dog", "", 4.5, 50, 42],
["ε°†θƒŒζ™―ε˜ζˆζ΅·ζ»©", "", 4.5, 50, 43],
["Make it nighttime", "", 4.5, 50, 44],
["ε°†ε›Ύη‰‡θ½¬ζ’δΈΊζ²Ήη”»ι£Žζ Ό", "", 4.5, 50, 45],
]
# Build Gradio interface
with gr.Blocks(fill_height=True) as demo:
gr.HTML("""
<div style="text-align: center; margin-bottom: 20px;">
<h1>🎨 LongCat Image Edit</h1>
<p style="font-size: 16px; color: #666;">
Transform your images with AI-powered editing using natural language instructions
</p>
<p style="font-size: 14px; margin-top: 10px;">
Built with <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: #4A90E2; text-decoration: none;">anycoder</a>
</p>
<p style="font-size: 12px; color: #888; margin-top: 5px;">
⚑ Powered by Zero-GPU | πŸ€— Model: <a href="https://huggingface.co/meituan-longcat/LongCat-Image-Edit" target="_blank" style="color: #4A90E2;">meituan-longcat/LongCat-Image-Edit</a>
</p>
</div>
""")
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### πŸ“€ Input")
input_image = gr.Image(
label="Upload Image",
type="pil",
sources=["upload", "clipboard"],
height=400
)
prompt = gr.Textbox(
label="Edit Instruction",
placeholder="Describe how you want to edit the image (e.g., 'ε°†ηŒ«ε˜ζˆη‹—' or 'Change the cat to a dog')",
lines=3
)
with gr.Accordion("βš™οΈ Advanced Settings", open=False):
negative_prompt = gr.Textbox(
label="Negative Prompt (Optional)",
placeholder="What you don't want in the image",
lines=2
)
guidance_scale = gr.Slider(
minimum=1.0,
maximum=10.0,
value=4.5,
step=0.5,
label="Guidance Scale",
info="Higher values = stronger adherence to prompt"
)
num_inference_steps = gr.Slider(
minimum=20,
maximum=100,
value=50,
step=5,
label="Inference Steps",
info="More steps = higher quality but slower"
)
seed = gr.Slider(
minimum=0,
maximum=999999,
value=42,
step=1,
label="Random Seed",
info="Use same seed for reproducible results"
)
edit_btn = gr.Button("✨ Edit Image", variant="primary", size="lg")
gr.Markdown("""
<div style="padding: 10px; background-color: #f0f7ff; border-radius: 8px; margin-top: 10px;">
<p style="margin: 0; font-size: 12px; color: #555;">
⏱️ <strong>Note:</strong> Zero-GPU provides 120 seconds of GPU time per request.
Model is loaded at startup from Hugging Face Hub.
Processing typically takes 30-60 seconds depending on settings.
</p>
</div>
""")
with gr.Column(scale=1):
gr.Markdown("### 🎯 Output")
output_image = gr.Image(
label="Edited Image",
type="pil",
height=400,
buttons=["download"]
)
gr.Markdown("### πŸ’‘ Tips")
gr.Markdown("""
- Upload a clear, well-lit image for best results
- Be specific in your edit instructions
- Supports both English and Chinese prompts
- Try different guidance scales for varied results
- Higher inference steps = better quality (but slower)
- GPU time is limited - optimize your settings for speed
- Model loads automatically from Hugging Face Hub
""")
# Examples section
gr.Markdown("### πŸ“ Example Prompts")
gr.Examples(
examples=example_prompts,
inputs=[prompt, negative_prompt, guidance_scale, num_inference_steps, seed],
label="Click to try these examples"
)
# Event handlers
edit_btn.click(
fn=edit_image,
inputs=[
input_image,
prompt,
negative_prompt,
guidance_scale,
num_inference_steps,
seed
],
outputs=output_image,
api_visibility="public"
)
# Footer
gr.HTML("""
<div style="text-align: center; margin-top: 40px; padding: 20px; border-top: 1px solid #eee;">
<p style="color: #666; font-size: 14px;">
Powered by <a href="https://huggingface.co/meituan-longcat/LongCat-Image-Edit" target="_blank" style="color: #4A90E2;">LongCat Image Edit</a> with Zero-GPU |
<a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: #4A90E2;">Built with anycoder</a>
</p>
</div>
""")
# Launch the app
if __name__ == "__main__":
demo.launch(
theme=gr.themes.Soft(
primary_hue="blue",
secondary_hue="indigo",
neutral_hue="slate",
font=gr.themes.GoogleFont("Inter"),
text_size="lg",
spacing_size="lg",
radius_size="md"
),
footer_links=[
{"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"}
]
)