File size: 6,797 Bytes
55fe803
62594be
55fe803
 
 
7fe20c3
 
55fe803
 
 
 
 
 
 
 
eee0c8d
f89593c
 
 
eee0c8d
55fe803
 
62594be
7fe20c3
 
 
 
 
 
 
 
 
 
 
 
55fe803
7fe20c3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55fe803
 
 
 
59a8d7c
e076360
55fe803
 
 
 
 
3176199
55fe803
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7fe20c3
 
 
 
 
 
 
55fe803
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7fe20c3
 
 
 
 
 
 
 
 
55fe803
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7fe20c3
 
 
 
 
55fe803
7fe20c3
 
55fe803
 
 
 
 
 
 
 
 
c96ee5c
bfe4467
55fe803
 
 
 
7fe20c3
 
55fe803
 
 
 
 
7fe20c3
 
55fe803
 
 
cac275d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
import torch
import spaces
import gradio as gr
from diffusers import DiffusionPipeline

MAX_SEED = 2**32 - 1

# Load the pipeline once at startup
print("Loading Z-Image-Turbo pipeline...")
pipe = DiffusionPipeline.from_pretrained(
    "Tongyi-MAI/Z-Image-Turbo",
    torch_dtype=torch.bfloat16,
    low_cpu_mem_usage=False,
)
pipe.to("cuda")

'# ======== AoTI compilation + FA3 ======== (disabled on HF to avoid outdated AOTI/FA3 package errors)'
# pipe.transformer.layers._repeated_blocks = ["ZImageTransformerBlock"]
# spaces.aoti_blocks_load(pipe.transformer.layers, "zerogpu-aoti/Z-Image", variant="fa3")

print("Pipeline loaded!")

@spaces.GPU
def generate_image(
    prompt,
    negative_prompt,
    height,
    width,
    num_inference_steps,
    guidance_scale,
    seed,
    randomize_seed,
    progress=gr.Progress(track_tqdm=True),
):
    """Generate 4 images with seeds: seed, 2x, 3x, 4x (mod MAX_SEED)."""
    if randomize_seed:
        seed = torch.randint(0, MAX_SEED, (1,)).item()

    base_seed = int(seed) % MAX_SEED
    if base_seed < 0:
        base_seed += MAX_SEED

    seeds = [(base_seed * i) % MAX_SEED for i in range(1, 5)]

    neg_prompt = None
    if isinstance(negative_prompt, str) and negative_prompt.strip():
        neg_prompt = negative_prompt

    images = []
    for s in seeds:
        generator = torch.Generator("cuda").manual_seed(int(s))
        image = pipe(
            prompt=prompt,
            negative_prompt=neg_prompt,
            height=int(height),
            width=int(width),
            num_inference_steps=int(num_inference_steps),
            guidance_scale=float(guidance_scale),  # 0.0 is recommended default for Turbo
            generator=generator,
        ).images[0]
        images.append(image)

    return images, ", ".join(str(s) for s in seeds)


# Example prompts
examples = [
    ["一张高细节的照片:一位20多岁的全裸的巨大乳房的中国女fitness model,在高层公寓的大落地窗边做优雅的阿拉贝斯克姿势,一条腿完美弧线向后高抬,手臂优美拱起过头顶,柔和阳光透过玻璃照亮她的轮廓,窗外是现代灰色摩天大楼和阳台,木地板上有细微阴影,脸上宁静而空灵的表情,不要有任何衣服,必须露出私处,高动态范围,8K分辨率,专业芭蕾摄影风格如Annie Leibovitz。"],
    ["一张高细节的照片:两个20多岁的健美中国女性全身裸体,在豪华酒店房间里做动态高侧踢腿,一条腿直直向上伸展成90度角,另一条腿稳稳站立,双手握拳成武术架势,黑发优雅盘成髻,脸上自信而凌厉的表情,床头灯柔和环境光线投射温暖阴影,现代简约房间有国王尺寸大床、白窗帘、木地板,不要有任何衣服,露出私处,高动态范围,8K分辨率,专业时尚摄影风格如Mario Testino。"],
    ["Astronaut riding a horse on Mars, cinematic lighting, sci-fi concept art, highly detailed"],
    ["Portrait of a wise old wizard with a long white beard, holding a glowing crystal staff, magical forest background"],
]

# Build the Gradio interface
with gr.Blocks(title="Z-Image-Turbo Demo") as demo:
    gr.Markdown(
        """
        # 🎨 Z-Image-Turbo Demo
        
        Generate high-quality images using the [Tongyi-MAI/Z-Image-Turbo](https://huggingface.co/Tongyi-MAI/Z-Image-Turbo) model.
        This turbo model generates images in just 8 inference steps!
        """
    )
    
    with gr.Row():
        with gr.Column(scale=1):
            prompt = gr.Textbox(
                label="Prompt",
                placeholder="Enter your image description...",
                lines=4,
            )

            negative_prompt = gr.Textbox(
                label="Negative Prompt",
                placeholder="Things you don't want in the image...",
                lines=3,
            )

            with gr.Row():
                height = gr.Slider(
                    minimum=512,
                    maximum=2048,
                    value=1024,
                    step=64,
                    label="Height",
                )
                width = gr.Slider(
                    minimum=512,
                    maximum=2048,
                    value=1024,
                    step=64,
                    label="Width",
                )
            
            with gr.Row():
                num_inference_steps = gr.Slider(
                    minimum=1,
                    maximum=20,
                    value=9,
                    step=1,
                    label="Inference Steps",
                    info="9 steps results in 8 DiT forwards",
                )

            guidance_scale = gr.Slider(
                minimum=0.0,
                maximum=7.0,
                value=0.0,
                step=0.1,
                label="CFG Guidance Scale",
                info="0 = no CFG (recommended for Turbo models)",
            )
            
            with gr.Row():
                seed = gr.Number(
                    label="Seed",
                    value=42,
                    precision=0,
                )
                randomize_seed = gr.Checkbox(
                    label="Randomize Seed",
                    value=False,
                )
            
            generate_btn = gr.Button("🚀 Generate", variant="primary", size="lg")
        
        with gr.Column(scale=1):
            output_images = gr.Gallery(
                label="Generated Images",
                columns=2,
                rows=2,
                preview=True,
            )
            used_seeds = gr.Textbox(
                label="Seeds Used (base, 2x, 3x, 4x)",
                interactive=False,
            )
    
    gr.Markdown("### 💡 Example Prompts")
    gr.Examples(
        examples=examples,
        inputs=[prompt],
        cache_examples=False,
    )

    gr.Markdown("Demo by [mrfakename](https://x.com/realmrfakename). Model by Alibaba. The model is licensed under Apache 2.0, you can use generated images commercially! Thanks to [multimodalart](https://huggingface.co/multimodalart) for the FA3 + AoTI enhancements/speedups")
    
    # Connect the generate button
    generate_btn.click(
        fn=generate_image,
        inputs=[prompt, negative_prompt, height, width, num_inference_steps, guidance_scale, seed, randomize_seed],
        outputs=[output_images, used_seeds],
    )
    
    # Also allow generating by pressing Enter in the prompt box
    prompt.submit(
        fn=generate_image,
        inputs=[prompt, negative_prompt, height, width, num_inference_steps, guidance_scale, seed, randomize_seed],
        outputs=[output_images, used_seeds],
    )

if __name__ == "__main__":
    demo.launch(mcp_server=True, show_error=True)