Spaces:

anycoderapps
/

LongCat-Image-Edit

Running on Zero

App Files Files Community

LongCat-Image-Edit / app.py

akhaliq HF Staff

Update app.py

d6c9b79 verified 10 days ago

raw

history blame

9.11 kB

	import spaces
	import gradio as gr
	import torch
	from PIL import Image
	from transformers import AutoProcessor
	from longcat_image.models import LongCatImageTransformer2DModel
	from longcat_image.pipelines import LongCatImageEditPipeline
	import numpy as np
	import os

	# Global variables for model
	pipe = None
	device = None

	def initialize_model():
	"""Initialize the model on first use"""
	global pipe, device

	if pipe is not None:
	return

	try:
	device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
	model_id = 'meituan-longcat/LongCat-Image-Edit'

	print(f"🔄 Loading model from {model_id}...")

	# Load text processor
	text_processor = AutoProcessor.from_pretrained(
	model_id,
	subfolder='tokenizer'
	)

	# Load transformer
	transformer = LongCatImageTransformer2DModel.from_pretrained(
	model_id,
	subfolder='transformer',
	torch_dtype=torch.bfloat16,
	use_safetensors=True
	).to(device)

	# Load pipeline
	pipe = LongCatImageEditPipeline.from_pretrained(
	model_id,
	transformer=transformer,
	text_processor=text_processor,
	)
	pipe.to(device, torch.bfloat16)

	print(f"✅ Model loaded successfully on {device}")

	except Exception as e:
	print(f"❌ Error loading model: {e}")
	raise

	@spaces.GPU(duration=120)
	def edit_image(
	input_image: Image.Image,
	prompt: str,
	negative_prompt: str,
	guidance_scale: float,
	num_inference_steps: int,
	seed: int,
	progress=gr.Progress()
	):
	"""Edit image based on text prompt"""

	if input_image is None:
	raise gr.Error("Please upload an image first")

	if not prompt or prompt.strip() == "":
	raise gr.Error("Please enter an edit instruction")

	try:
	# Initialize model if not already loaded
	initialize_model()

	progress(0.1, desc="Preparing image...")

	# Convert to RGB if needed
	if input_image.mode != 'RGB':
	input_image = input_image.convert('RGB')

	progress(0.2, desc="Generating edited image...")

	# Set random seed for reproducibility
	generator = torch.Generator("cuda" if torch.cuda.is_available() else "cpu").manual_seed(seed)

	# Run the pipeline
	with torch.inference_mode():
	output = pipe(
	input_image,
	prompt,
	negative_prompt=negative_prompt,
	guidance_scale=guidance_scale,
	num_inference_steps=num_inference_steps,
	num_images_per_prompt=1,
	generator=generator
	)

	progress(1.0, desc="Done!")

	edited_image = output.images[0]

	return edited_image

	except Exception as e:
	raise gr.Error(f"Error during image editing: {str(e)}")

	# Example prompts
	example_prompts = [
	["将猫变成狗", "", 4.5, 50, 42],
	["Change the cat to a dog", "", 4.5, 50, 42],
	["将背景变成海滩", "", 4.5, 50, 43],
	["Make it nighttime", "", 4.5, 50, 44],
	["将图片转换为油画风格", "", 4.5, 50, 45],
	]

	# Build Gradio interface
	with gr.Blocks(fill_height=True) as demo:
	gr.HTML("""
	<div style="text-align: center; margin-bottom: 20px;">
	<h1>🎨 LongCat Image Edit</h1>
	<p style="font-size: 16px; color: #666;">
	Transform your images with AI-powered editing using natural language instructions
	</p>
	<p style="font-size: 14px; margin-top: 10px;">
	Built with <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: #4A90E2; text-decoration: none;">anycoder</a>
	</p>
	<p style="font-size: 12px; color: #888; margin-top: 5px;">
	⚡ Powered by Zero-GPU \| 🤗 Model: <a href="https://huggingface.co/meituan-longcat/LongCat-Image-Edit" target="_blank" style="color: #4A90E2;">meituan-longcat/LongCat-Image-Edit</a>
	</p>
	</div>
	""")

	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("### 📤 Input")
	input_image = gr.Image(
	label="Upload Image",
	type="pil",
	sources=["upload", "clipboard"],
	height=400
	)

	prompt = gr.Textbox(
	label="Edit Instruction",
	placeholder="Describe how you want to edit the image (e.g., '将猫变成狗' or 'Change the cat to a dog')",
	lines=3
	)

	with gr.Accordion("⚙️ Advanced Settings", open=False):
	negative_prompt = gr.Textbox(
	label="Negative Prompt (Optional)",
	placeholder="What you don't want in the image",
	lines=2
	)

	guidance_scale = gr.Slider(
	minimum=1.0,
	maximum=10.0,
	value=4.5,
	step=0.5,
	label="Guidance Scale",
	info="Higher values = stronger adherence to prompt"
	)

	num_inference_steps = gr.Slider(
	minimum=20,
	maximum=100,
	value=50,
	step=5,
	label="Inference Steps",
	info="More steps = higher quality but slower"
	)

	seed = gr.Slider(
	minimum=0,
	maximum=999999,
	value=42,
	step=1,
	label="Random Seed",
	info="Use same seed for reproducible results"
	)

	edit_btn = gr.Button("✨ Edit Image", variant="primary", size="lg")

	gr.Markdown("""
	<div style="padding: 10px; background-color: #f0f7ff; border-radius: 8px; margin-top: 10px;">
	<p style="margin: 0; font-size: 12px; color: #555;">
	⏱️ <strong>Note:</strong> Zero-GPU provides 120 seconds of GPU time per request.
	First run may take longer as the model loads from Hugging Face Hub.
	Processing typically takes 30-60 seconds depending on settings.
	</p>
	</div>
	""")

	with gr.Column(scale=1):
	gr.Markdown("### 🎯 Output")
	output_image = gr.Image(
	label="Edited Image",
	type="pil",
	height=400,
	buttons=["download"]
	)

	gr.Markdown("### 💡 Tips")
	gr.Markdown("""
	- Upload a clear, well-lit image for best results
	- Be specific in your edit instructions
	- Supports both English and Chinese prompts
	- Try different guidance scales for varied results
	- Higher inference steps = better quality (but slower)
	- GPU time is limited - optimize your settings for speed
	- Model loads automatically from Hugging Face Hub
	""")

	# Examples section
	gr.Markdown("### 📝 Example Prompts")
	gr.Examples(
	examples=example_prompts,
	inputs=[prompt, negative_prompt, guidance_scale, num_inference_steps, seed],
	label="Click to try these examples"
	)

	# Event handlers
	edit_btn.click(
	fn=edit_image,
	inputs=[
	input_image,
	prompt,
	negative_prompt,
	guidance_scale,
	num_inference_steps,
	seed
	],
	outputs=output_image,
	api_name="edit_image"
	)

	# Footer
	gr.HTML("""
	<div style="text-align: center; margin-top: 40px; padding: 20px; border-top: 1px solid #eee;">
	<p style="color: #666; font-size: 14px;">
	Powered by <a href="https://huggingface.co/meituan-longcat/LongCat-Image-Edit" target="_blank" style="color: #4A90E2;">LongCat Image Edit</a> with Zero-GPU \|
	<a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: #4A90E2;">Built with anycoder</a>
	</p>
	</div>
	""")

	# Launch the app
	if __name__ == "__main__":
	demo.launch(
	theme=gr.themes.Soft(
	primary_hue="blue",
	secondary_hue="indigo",
	neutral_hue="slate",
	font=gr.themes.GoogleFont("Inter"),
	text_size="lg",
	spacing_size="lg",
	radius_size="md"
	),
	footer_links=[
	{"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"}
	]
	)