Spaces:
Running
on
Zero
Running
on
Zero
| """Z-Image-Turbo v1.0 - Stable release (9 steps, SDPA, prompt polishing, transform steps fix)""" | |
| import os | |
| import torch | |
| import spaces | |
| import gradio as gr | |
| import requests | |
| import io | |
| from PIL import Image | |
| from diffusers import DiffusionPipeline, ZImageImg2ImgPipeline | |
| from huggingface_hub import InferenceClient | |
| # Enable optimized backends (SDPA uses FlashAttention when available) | |
| torch.backends.cuda.enable_flash_sdp(True) | |
| torch.backends.cuda.enable_mem_efficient_sdp(True) | |
| torch.backends.cudnn.benchmark = True | |
| # Prompt polishing using HF Inference API | |
| def polish_prompt(original_prompt, mode="generate"): | |
| """Expand short prompts into detailed, high-quality prompts using AI.""" | |
| if not original_prompt or not original_prompt.strip(): | |
| if mode == "transform": | |
| return "high quality, enhanced details, professional finish" | |
| return "Ultra HD, 4K, cinematic composition, highly detailed" | |
| api_key = os.environ.get("HF_TOKEN") | |
| if not api_key: | |
| return original_prompt | |
| if mode == "transform": | |
| system_prompt = """You are a prompt optimizer for AI image-to-image transformation. | |
| The user wants to transform an existing image. Rewrite their input into a precise, technical prompt | |
| that describes the target style, technique, and visual qualities. | |
| Focus on: artistic style, color palette, lighting style, texture, rendering technique, mood. | |
| Keep it under 100 words. Be specific about visual attributes. | |
| Do not describe a scene - describe HOW the image should look. | |
| Output only the improved prompt, no explanation.""" | |
| else: | |
| system_prompt = """You are a prompt optimizer for AI image generation. | |
| Rewrite the user's input into a detailed, expressive prompt that will produce stunning images. | |
| Keep it under 150 words. Be descriptive about lighting, atmosphere, style, and details. | |
| Do not explain - just output the improved prompt directly.""" | |
| try: | |
| client = InferenceClient(api_key=api_key) | |
| completion = client.chat.completions.create( | |
| model="Qwen/Qwen2.5-72B-Instruct", | |
| max_tokens=200, | |
| messages=[ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": original_prompt} | |
| ], | |
| ) | |
| polished = completion.choices[0].message.content | |
| return polished.strip().replace("\n", " ") | |
| except Exception as e: | |
| print(f"Prompt polish error: {e}") | |
| return original_prompt | |
| print("Loading Z-Image-Turbo pipeline...") | |
| # Load text-to-image pipeline | |
| pipe_t2i = DiffusionPipeline.from_pretrained( | |
| "Tongyi-MAI/Z-Image-Turbo", | |
| ) | |
| pipe_t2i.to("cuda", torch.bfloat16) | |
| # Create img2img pipeline sharing components (no duplicate loading) | |
| pipe_i2i = ZImageImg2ImgPipeline( | |
| transformer=pipe_t2i.transformer, | |
| vae=pipe_t2i.vae, | |
| text_encoder=pipe_t2i.text_encoder, | |
| tokenizer=pipe_t2i.tokenizer, | |
| scheduler=pipe_t2i.scheduler, | |
| ) | |
| print("Pipelines ready!") | |
| STYLES = ["None", "Photorealistic", "Cinematic", "Anime", "Digital Art", | |
| "Oil Painting", "Watercolor", "3D Render", "Fantasy", "Sci-Fi"] | |
| STYLE_SUFFIXES = { | |
| "None": "", | |
| "Photorealistic": ", photorealistic, ultra detailed, 8k, professional photography", | |
| "Cinematic": ", cinematic lighting, movie scene, dramatic atmosphere, film grain", | |
| "Anime": ", anime style, vibrant colors, cel shaded, studio ghibli inspired", | |
| "Digital Art": ", digital art, artstation trending, concept art, highly detailed", | |
| "Oil Painting": ", oil painting style, classical art, brush strokes visible", | |
| "Watercolor": ", watercolor painting, soft edges, artistic, delicate colors", | |
| "3D Render": ", 3D render, octane render, unreal engine 5, ray tracing", | |
| "Fantasy": ", fantasy art, magical, ethereal glow, mystical atmosphere", | |
| "Sci-Fi": ", science fiction, futuristic, advanced technology, neon accents", | |
| } | |
| RATIOS = [ | |
| "1:1 Square (1024x1024)", "16:9 Landscape (1344x768)", "9:16 Portrait (768x1344)", | |
| "4:3 Standard (1152x896)", "3:4 Vertical (896x1152)", "21:9 Cinematic (1536x640)", | |
| "3:2 Photo (1216x832)", "2:3 Photo Portrait (832x1216)", "1:1 XL (1536x1536)", | |
| "16:9 XL (1920x1088)", "9:16 XL (1088x1920)", "4:3 XL (1536x1152)", | |
| "3:4 XL (1152x1536)", "1:1 MAX (2048x2048)", "16:9 MAX (2048x1152)", | |
| "9:16 MAX (1152x2048)", "4:3 MAX (2048x1536)", "3:4 MAX (1536x2048)", | |
| ] | |
| RATIO_DIMS = { | |
| "1:1 Square (1024x1024)": (1024, 1024), "16:9 Landscape (1344x768)": (1344, 768), | |
| "9:16 Portrait (768x1344)": (768, 1344), "4:3 Standard (1152x896)": (1152, 896), | |
| "3:4 Vertical (896x1152)": (896, 1152), "21:9 Cinematic (1536x640)": (1536, 640), | |
| "3:2 Photo (1216x832)": (1216, 832), "2:3 Photo Portrait (832x1216)": (832, 1216), | |
| "1:1 XL (1536x1536)": (1536, 1536), "16:9 XL (1920x1088)": (1920, 1088), | |
| "9:16 XL (1088x1920)": (1088, 1920), "4:3 XL (1536x1152)": (1536, 1152), | |
| "3:4 XL (1152x1536)": (1152, 1536), "1:1 MAX (2048x2048)": (2048, 2048), | |
| "16:9 MAX (2048x1152)": (2048, 1152), "9:16 MAX (1152x2048)": (1152, 2048), | |
| "4:3 MAX (2048x1536)": (2048, 1536), "3:4 MAX (1536x2048)": (1536, 2048), | |
| } | |
| EXAMPLES_GENERATE = [ | |
| ["Ancient dragon perched on a crumbling Gothic cathedral at dusk, stormy purple sky with lightning bolts in the distance", "Fantasy", "1:1 Square (1024x1024)", 9, 42, True], | |
| ["Bioluminescent jellyfish drifting through a dark alien ocean, twin moons glowing above the water surface", "Digital Art", "9:16 Portrait (768x1344)", 9, 42, True], | |
| ["Elderly craftsman with weathered hands repairing an intricate brass clockwork mechanism, warm workshop light", "Photorealistic", "4:3 Standard (1152x896)", 9, 42, True], | |
| ["Cyberpunk samurai warrior standing in a neon-lit rainy alley, glowing armor circuits reflected in puddles", "Sci-Fi", "3:4 Vertical (896x1152)", 9, 42, True], | |
| ["Victorian lady in burgundy silk gown standing in a grand European ballroom, crystal chandeliers above marble floors, warm golden sunlight streaming through tall arched windows", "Oil Painting", "4:3 XL (1536x1152)", 9, 42, True], | |
| ["Abandoned Tokyo streets slowly reclaimed by wild nature, pink cherry blossoms covering cracked pavement, a small deer grazing near rusty cars, thick morning fog rolling between overgrown skyscrapers, green vines climbing broken windows, peaceful post-apocalyptic silence", "Cinematic", "16:9 XL (1920x1088)", 9, 42, True], | |
| ["Alien desert planet with giant floating rock islands in the sky connected by rope bridges, camel caravans walking on the sandy ground far below, dramatic sunset with orange purple and gold colors across the sky, epic wide landscape view", "Fantasy", "16:9 MAX (2048x1152)", 9, 42, True], | |
| ["Cozy witch cottage interior on a stormy autumn night, iron cauldrons bubbling with green smoke, wooden shelves packed with glowing potion bottles and ancient spell books, a sleepy black cat curled by the stone fireplace, bundles of dried herbs and garlic hanging from dark oak ceiling beams, warm amber candlelight flickering throughout the room", "Digital Art", "1:1 MAX (2048x2048)", 9, 42, True], | |
| ] | |
| # Transform examples: [prompt, style, strength, steps, seed, randomize] | |
| EXAMPLES_TRANSFORM = [ | |
| ["Transform into ultra realistic photograph with sharp details and natural lighting", "Photorealistic", 0.7, 9, 42, True], | |
| ["Dramatic movie scene with cinematic lighting and film grain texture", "Cinematic", 0.65, 9, 42, True], | |
| ["Japanese anime style with vibrant colors and cel shading", "Anime", 0.75, 9, 42, True], | |
| ["Digital concept art style, trending on artstation", "Digital Art", 0.6, 9, 42, True], | |
| ["Classical oil painting with visible brush strokes and rich colors", "Oil Painting", 0.7, 9, 42, True], | |
| ["Soft watercolor painting with delicate washes and gentle edges", "Watercolor", 0.65, 9, 42, True], | |
| ["High quality 3D render with ray tracing and realistic materials", "3D Render", 0.7, 9, 42, True], | |
| ["Magical fantasy art with ethereal glow and mystical atmosphere", "Fantasy", 0.65, 9, 42, True], | |
| ["Futuristic sci-fi style with neon accents and advanced technology", "Sci-Fi", 0.7, 9, 42, True], | |
| ["Enhanced version with improved details and quality", "None", 0.4, 9, 42, True], | |
| ] | |
| def upload_to_hf_cdn(image): | |
| if image is None: | |
| return "No image to share" | |
| try: | |
| buf = io.BytesIO() | |
| image.save(buf, format='PNG') | |
| buf.seek(0) | |
| response = requests.post( | |
| "https://huggingface.co/uploads", | |
| headers={"Content-Type": "image/png"}, | |
| data=buf.getvalue(), | |
| ) | |
| if response.status_code == 200: | |
| return response.text.strip() | |
| return f"Upload failed: {response.status_code}" | |
| except Exception as e: | |
| return f"Error: {str(e)}" | |
| def do_polish_prompt(prompt, style, do_polish, mode="generate"): | |
| """Polish prompt before generation (runs on CPU, before GPU allocation).""" | |
| if not prompt or not prompt.strip(): | |
| return "", "" | |
| base_prompt = prompt.strip() | |
| # Polish if enabled | |
| if do_polish: | |
| polished = polish_prompt(base_prompt, mode=mode) | |
| else: | |
| polished = base_prompt | |
| # Add style suffix for final prompt | |
| final_prompt = polished + STYLE_SUFFIXES.get(style, "") | |
| return final_prompt, polished | |
| def do_polish_transform_prompt(prompt, style, do_polish): | |
| """Polish prompt for transformation (style-focused).""" | |
| if not do_polish: | |
| base = prompt.strip() if prompt else "high quality image" | |
| final = base + STYLE_SUFFIXES.get(style, "") | |
| return final, "" | |
| return do_polish_prompt(prompt, style, True, mode="transform") | |
| def generate(full_prompt, polished_display, ratio, steps, seed, randomize, progress=gr.Progress(track_tqdm=True)): | |
| if randomize: | |
| seed = torch.randint(0, 2**32 - 1, (1,)).item() | |
| seed = int(seed) | |
| if not full_prompt or not full_prompt.strip(): | |
| return None, seed | |
| w, h = RATIO_DIMS.get(ratio, (1024, 1024)) | |
| generator = torch.Generator("cuda").manual_seed(seed) | |
| image = pipe_t2i( | |
| prompt=full_prompt, | |
| height=h, | |
| width=w, | |
| num_inference_steps=int(steps), | |
| guidance_scale=0.0, | |
| generator=generator, | |
| ).images[0] | |
| return image, seed | |
| def transform(input_image, full_prompt, polished_display, strength, steps, seed, randomize, progress=gr.Progress(track_tqdm=True)): | |
| if input_image is None: | |
| return None, 0 | |
| if randomize: | |
| seed = torch.randint(0, 2**32 - 1, (1,)).item() | |
| seed = int(seed) | |
| if not full_prompt or not full_prompt.strip(): | |
| full_prompt = "high quality image, enhanced details" | |
| # Resize to supported dimensions | |
| input_image = input_image.convert("RGB") | |
| w, h = input_image.size | |
| # Round to nearest multiple of 16 | |
| w = (w // 16) * 16 | |
| h = (h // 16) * 16 | |
| w = max(512, min(2048, w)) | |
| h = max(512, min(2048, h)) | |
| input_image = input_image.resize((w, h), Image.LANCZOS) | |
| # Adjust steps to compensate for strength (actual_steps = internal_steps * strength) | |
| # So we need internal_steps = desired_steps / strength | |
| strength = float(strength) | |
| effective_steps = max(4, int(steps / strength)) if strength > 0 else int(steps) | |
| generator = torch.Generator("cuda").manual_seed(seed) | |
| image = pipe_i2i( | |
| prompt=full_prompt, | |
| image=input_image, | |
| strength=strength, | |
| num_inference_steps=effective_steps, | |
| guidance_scale=0.0, | |
| generator=generator, | |
| ).images[0] | |
| return image, seed | |
| css = """ | |
| /* Blue theme colors */ | |
| .gradio-container { | |
| background: linear-gradient(135deg, #e8f4fc 0%, #d4e9f7 100%) !important; | |
| } | |
| /* Tab styling */ | |
| .tabs { | |
| background: transparent !important; | |
| } | |
| .tab-nav { | |
| background: transparent !important; | |
| border: none !important; | |
| justify-content: center !important; | |
| gap: 8px !important; | |
| margin-bottom: 16px !important; | |
| } | |
| .tab-nav > button { | |
| background: #bfdbfe !important; | |
| color: #3b82f6 !important; | |
| border: none !important; | |
| border-radius: 12px !important; | |
| padding: 12px 28px !important; | |
| font-weight: 500 !important; | |
| font-size: 15px !important; | |
| } | |
| .tab-nav > button:hover { | |
| background: #93c5fd !important; | |
| } | |
| .tab-nav > button.selected, | |
| .tab-nav > button[aria-selected="true"], | |
| button.tab-nav.selected, | |
| [role="tab"][aria-selected="true"] { | |
| background: #1d4ed8 !important; | |
| color: white !important; | |
| font-weight: 700 !important; | |
| box-shadow: 0 4px 12px rgba(29, 78, 216, 0.4) !important; | |
| } | |
| /* Button styling */ | |
| button.primary { | |
| background: linear-gradient(135deg, #3b82f6 0%, #2563eb 100%) !important; | |
| border: none !important; | |
| border-radius: 10px !important; | |
| font-weight: 600 !important; | |
| padding: 12px 24px !important; | |
| } | |
| button.secondary { | |
| background: linear-gradient(135deg, #60a5fa 0%, #3b82f6 100%) !important; | |
| color: white !important; | |
| border: none !important; | |
| border-radius: 8px !important; | |
| } | |
| /* Content blocks */ | |
| .block { | |
| background: white !important; | |
| border-radius: 12px !important; | |
| box-shadow: 0 2px 8px rgba(0,0,0,0.08) !important; | |
| } | |
| .tabitem { | |
| background: transparent !important; | |
| padding: 20px !important; | |
| } | |
| /* Example prompts left align */ | |
| table.examples tbody tr td:nth-child(1), | |
| table.examples tbody tr td:nth-child(1) *, | |
| div[class*="examples"] table tbody tr td:first-child, | |
| div[class*="examples"] table tbody tr td:first-child *, | |
| [class*="example"] td:first-child, | |
| [class*="example"] td:first-child * { | |
| text-align: left !important; | |
| } | |
| /* Header styling */ | |
| h1 { | |
| color: #1e40af !important; | |
| font-size: 2.2em !important; | |
| margin-bottom: 8px !important; | |
| } | |
| /* Footer without box */ | |
| .footer-no-box { | |
| background: transparent !important; | |
| border: none !important; | |
| box-shadow: none !important; | |
| padding: 0 !important; | |
| } | |
| """ | |
| with gr.Blocks(title="Z-Image Generation & Transformation Demo", css=css) as demo: | |
| gr.HTML(""" | |
| <div style="text-align: center; padding: 20px 0 10px 0;"> | |
| <h1 style="color: #1e40af; font-size: 2.2em; margin-bottom: 8px;">⚡ Z-Image Generation & Transformation Demo</h1> | |
| <p style="color: #475569; font-size: 1.1em; margin: 0;">Next-Gen Diffusion Transformer for Image Generation & Editing</p> | |
| </div> | |
| """) | |
| with gr.Tabs(): | |
| # TAB 1: Generate Image | |
| with gr.Tab("🎨 Generate"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| gen_prompt = gr.Textbox(label="Prompt", placeholder="Describe your image...", lines=3) | |
| gen_polish = gr.Checkbox(label="✨ Polish Prompt (AI-enhanced)", value=False) | |
| gen_style = gr.Dropdown(choices=STYLES, value="None", label="Style") | |
| gen_ratio = gr.Dropdown(choices=RATIOS, value="1:1 Square (1024x1024)", label="Aspect Ratio") | |
| gen_steps = gr.Slider(minimum=4, maximum=16, value=9, step=1, label="Steps") | |
| with gr.Row(): | |
| gen_seed = gr.Number(label="Seed", value=42, precision=0) | |
| gen_randomize = gr.Checkbox(label="Random Seed", value=True) | |
| gen_btn = gr.Button("Generate", variant="primary") | |
| with gr.Column(): | |
| gen_output = gr.Image(label="Generated Image", type="pil", format="png", interactive=False) | |
| gen_polished_prompt = gr.Textbox(label="Polished Prompt", interactive=False, visible=True, lines=3) | |
| gen_seed_out = gr.Number(label="Seed Used", interactive=False) | |
| with gr.Row(): | |
| gen_share_btn = gr.Button("📤 Share Image Link", variant="secondary") | |
| gen_share_link = gr.Textbox(label="Image Link", interactive=False, show_copy_button=True) | |
| # Hidden state to pass polished prompt to generate | |
| gen_full_prompt = gr.State("") | |
| gr.Examples(examples=EXAMPLES_GENERATE, inputs=[gen_prompt, gen_style, gen_ratio, gen_steps, gen_seed, gen_randomize]) | |
| # Chain: First polish prompt (CPU), then generate (GPU) | |
| gen_btn.click( | |
| fn=do_polish_prompt, | |
| inputs=[gen_prompt, gen_style, gen_polish], | |
| outputs=[gen_full_prompt, gen_polished_prompt] | |
| ).then( | |
| fn=generate, | |
| inputs=[gen_full_prompt, gen_polished_prompt, gen_ratio, gen_steps, gen_seed, gen_randomize], | |
| outputs=[gen_output, gen_seed_out] | |
| ) | |
| gen_prompt.submit( | |
| fn=do_polish_prompt, | |
| inputs=[gen_prompt, gen_style, gen_polish], | |
| outputs=[gen_full_prompt, gen_polished_prompt] | |
| ).then( | |
| fn=generate, | |
| inputs=[gen_full_prompt, gen_polished_prompt, gen_ratio, gen_steps, gen_seed, gen_randomize], | |
| outputs=[gen_output, gen_seed_out] | |
| ) | |
| gen_share_btn.click(fn=upload_to_hf_cdn, inputs=[gen_output], outputs=[gen_share_link]) | |
| # TAB 2: Transform Image | |
| with gr.Tab("✨ Transform"): | |
| gr.Markdown("**Transform an existing image** - Upload an image and describe how you want it transformed. Lower strength = subtle changes, higher = major transformation.") | |
| with gr.Row(): | |
| with gr.Column(): | |
| trans_input = gr.Image(label="Upload Image", type="pil") | |
| trans_prompt = gr.Textbox(label="Transformation Prompt", placeholder="Describe the transformation (e.g., 'oil painting style, vibrant colors')", lines=2) | |
| trans_polish = gr.Checkbox(label="✨ Polish Prompt (style-focused AI enhancement)", value=False) | |
| trans_style = gr.Dropdown(choices=STYLES, value="None", label="Style") | |
| trans_strength = gr.Slider(minimum=0.1, maximum=1.0, value=0.6, step=0.05, label="Strength (0.1=subtle, 1.0=complete change)") | |
| trans_steps = gr.Slider(minimum=4, maximum=16, value=9, step=1, label="Steps") | |
| with gr.Row(): | |
| trans_seed = gr.Number(label="Seed", value=42, precision=0) | |
| trans_randomize = gr.Checkbox(label="Random Seed", value=True) | |
| trans_btn = gr.Button("Transform", variant="primary") | |
| with gr.Column(): | |
| trans_output = gr.Image(label="Transformed Image", type="pil", format="png", interactive=False) | |
| trans_polished_prompt = gr.Textbox(label="Polished Prompt", interactive=False, visible=True, lines=3) | |
| trans_seed_out = gr.Number(label="Seed Used", interactive=False) | |
| with gr.Row(): | |
| trans_share_btn = gr.Button("📤 Share Image Link", variant="secondary") | |
| trans_share_link = gr.Textbox(label="Image Link", interactive=False, show_copy_button=True) | |
| # Hidden state to pass polished prompt to transform | |
| trans_full_prompt = gr.State("") | |
| gr.Markdown("### Example Transformation Prompts") | |
| gr.Examples(examples=EXAMPLES_TRANSFORM, inputs=[trans_prompt, trans_style, trans_strength, trans_steps, trans_seed, trans_randomize]) | |
| # Chain: First polish prompt (CPU), then transform (GPU) | |
| trans_btn.click( | |
| fn=do_polish_transform_prompt, | |
| inputs=[trans_prompt, trans_style, trans_polish], | |
| outputs=[trans_full_prompt, trans_polished_prompt] | |
| ).then( | |
| fn=transform, | |
| inputs=[trans_input, trans_full_prompt, trans_polished_prompt, trans_strength, trans_steps, trans_seed, trans_randomize], | |
| outputs=[trans_output, trans_seed_out] | |
| ) | |
| trans_share_btn.click(fn=upload_to_hf_cdn, inputs=[trans_output], outputs=[trans_share_link]) | |
| # Footer | |
| gr.HTML( | |
| """ | |
| <div style="text-align: center; width: 100%; opacity: 0.7; font-size: 0.9em; padding: 1rem 0;"> | |
| <strong>Model:</strong> <a href="https://huggingface.co/Tongyi-MAI/Z-Image-Turbo" target="_blank" style="color: #3b82f6;">Tongyi-MAI/Z-Image-Turbo</a> (Apache 2.0 License) • | |
| <strong>Demo, Design and Improvements by:</strong> <a href="https://huggingface.co/lulavc" target="_blank" style="color: #3b82f6;">@lulavc</a> | |
| </div> | |
| """, | |
| elem_classes="footer-no-box" | |
| ) | |
| demo.launch() | |