Spaces:
Runtime error
Runtime error
Upload 2 files
Browse files- app.py +5 -10
- custom_pipeline.py +17 -32
app.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import numpy as np
|
| 3 |
import random
|
|
|
|
| 4 |
import torch
|
| 5 |
import time
|
| 6 |
from diffusers import DiffusionPipeline, AutoencoderTiny
|
|
@@ -8,7 +9,6 @@ from diffusers.models.attention_processor import AttnProcessor2_0
|
|
| 8 |
from custom_pipeline import FluxWithCFGPipeline
|
| 9 |
|
| 10 |
torch.backends.cuda.matmul.allow_tf32 = True
|
| 11 |
-
torch.backends.cudnn.benchmark = True
|
| 12 |
|
| 13 |
# Constants
|
| 14 |
MAX_SEED = np.iinfo(np.int32).max
|
|
@@ -18,7 +18,7 @@ DEFAULT_HEIGHT = 1024
|
|
| 18 |
DEFAULT_INFERENCE_STEPS = 1
|
| 19 |
|
| 20 |
# Device and model setup
|
| 21 |
-
dtype = torch.
|
| 22 |
pipe = FluxWithCFGPipeline.from_pretrained(
|
| 23 |
"black-forest-labs/FLUX.1-schnell", torch_dtype=dtype
|
| 24 |
)
|
|
@@ -28,16 +28,11 @@ pipe.load_lora_weights('hugovntr/flux-schnell-realism', weight_name='schnell-rea
|
|
| 28 |
pipe.set_adapters(["better"], adapter_weights=[1.0])
|
| 29 |
pipe.fuse_lora(adapter_name=["better"], lora_scale=1.0)
|
| 30 |
pipe.unload_lora_weights()
|
| 31 |
-
pipe.enable_xformers_memory_efficient_attention()
|
| 32 |
-
pipe.unet.to(memory_format=torch.channels_last)
|
| 33 |
-
pipe.vae.to(memory_format=torch.channels_last)
|
| 34 |
-
|
| 35 |
-
pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead")
|
| 36 |
-
pipe.text_encoder = torch.compile(pipe.text_encoder, mode="reduce-overhead")
|
| 37 |
|
| 38 |
torch.cuda.empty_cache()
|
| 39 |
|
| 40 |
# Inference function
|
|
|
|
| 41 |
def generate_image(prompt, seed=24, width=DEFAULT_WIDTH, height=DEFAULT_HEIGHT, randomize_seed=False, num_inference_steps=2, progress=gr.Progress(track_tqdm=True)):
|
| 42 |
if randomize_seed:
|
| 43 |
seed = random.randint(0, MAX_SEED)
|
|
@@ -78,7 +73,7 @@ with gr.Blocks() as demo:
|
|
| 78 |
with gr.Column(scale=2.5):
|
| 79 |
result = gr.Image(label="Generated Image", show_label=False, interactive=False)
|
| 80 |
with gr.Column(scale=1):
|
| 81 |
-
prompt = gr.
|
| 82 |
label="Prompt",
|
| 83 |
placeholder="Describe the image you want to generate...",
|
| 84 |
lines=3,
|
|
@@ -91,7 +86,7 @@ with gr.Blocks() as demo:
|
|
| 91 |
with gr.Column("Advanced Options"):
|
| 92 |
with gr.Row():
|
| 93 |
realtime = gr.Checkbox(label="Realtime Toggler", info="If TRUE then uses more GPU but create image in realtime.", value=False)
|
| 94 |
-
latency = gr.
|
| 95 |
with gr.Row():
|
| 96 |
seed = gr.Number(label="Seed", value=42)
|
| 97 |
randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import numpy as np
|
| 3 |
import random
|
| 4 |
+
import spaces
|
| 5 |
import torch
|
| 6 |
import time
|
| 7 |
from diffusers import DiffusionPipeline, AutoencoderTiny
|
|
|
|
| 9 |
from custom_pipeline import FluxWithCFGPipeline
|
| 10 |
|
| 11 |
torch.backends.cuda.matmul.allow_tf32 = True
|
|
|
|
| 12 |
|
| 13 |
# Constants
|
| 14 |
MAX_SEED = np.iinfo(np.int32).max
|
|
|
|
| 18 |
DEFAULT_INFERENCE_STEPS = 1
|
| 19 |
|
| 20 |
# Device and model setup
|
| 21 |
+
dtype = torch.float16
|
| 22 |
pipe = FluxWithCFGPipeline.from_pretrained(
|
| 23 |
"black-forest-labs/FLUX.1-schnell", torch_dtype=dtype
|
| 24 |
)
|
|
|
|
| 28 |
pipe.set_adapters(["better"], adapter_weights=[1.0])
|
| 29 |
pipe.fuse_lora(adapter_name=["better"], lora_scale=1.0)
|
| 30 |
pipe.unload_lora_weights()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
torch.cuda.empty_cache()
|
| 33 |
|
| 34 |
# Inference function
|
| 35 |
+
@spaces.GPU(duration=25)
|
| 36 |
def generate_image(prompt, seed=24, width=DEFAULT_WIDTH, height=DEFAULT_HEIGHT, randomize_seed=False, num_inference_steps=2, progress=gr.Progress(track_tqdm=True)):
|
| 37 |
if randomize_seed:
|
| 38 |
seed = random.randint(0, MAX_SEED)
|
|
|
|
| 73 |
with gr.Column(scale=2.5):
|
| 74 |
result = gr.Image(label="Generated Image", show_label=False, interactive=False)
|
| 75 |
with gr.Column(scale=1):
|
| 76 |
+
prompt = gr.Text(
|
| 77 |
label="Prompt",
|
| 78 |
placeholder="Describe the image you want to generate...",
|
| 79 |
lines=3,
|
|
|
|
| 86 |
with gr.Column("Advanced Options"):
|
| 87 |
with gr.Row():
|
| 88 |
realtime = gr.Checkbox(label="Realtime Toggler", info="If TRUE then uses more GPU but create image in realtime.", value=False)
|
| 89 |
+
latency = gr.Text(label="Latency")
|
| 90 |
with gr.Row():
|
| 91 |
seed = gr.Number(label="Seed", value=42)
|
| 92 |
randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
|
custom_pipeline.py
CHANGED
|
@@ -3,29 +3,20 @@ import numpy as np
|
|
| 3 |
from diffusers import FluxPipeline, FlowMatchEulerDiscreteScheduler
|
| 4 |
from typing import Any, Dict, List, Optional, Union
|
| 5 |
from PIL import Image
|
| 6 |
-
from torch.cuda import graphs
|
| 7 |
|
| 8 |
-
#
|
| 9 |
-
torch.backends.cuda.matmul.allow_tf32 = True
|
| 10 |
-
torch.backends.cudnn.allow_tf32 = True
|
| 11 |
-
torch.backends.cudnn.benchmark = True
|
| 12 |
-
|
| 13 |
-
# Constants with optimized values
|
| 14 |
BASE_SEQ_LEN = 256
|
| 15 |
MAX_SEQ_LEN = 4096
|
| 16 |
BASE_SHIFT = 0.5
|
| 17 |
MAX_SHIFT = 1.2
|
| 18 |
-
BATCH_SIZE = 4 # Optimal batch size for A100
|
| 19 |
|
| 20 |
-
|
| 21 |
def calculate_timestep_shift(image_seq_len: int) -> float:
|
| 22 |
-
|
| 23 |
-
MAX_SEQ_LEN = 4096
|
| 24 |
-
BASE_SHIFT = 0.5
|
| 25 |
-
MAX_SHIFT = 1.2
|
| 26 |
m = (MAX_SHIFT - BASE_SHIFT) / (MAX_SEQ_LEN - BASE_SEQ_LEN)
|
| 27 |
b = BASE_SHIFT - m * BASE_SEQ_LEN
|
| 28 |
-
|
|
|
|
| 29 |
|
| 30 |
def prepare_timesteps(
|
| 31 |
scheduler: FlowMatchEulerDiscreteScheduler,
|
|
@@ -35,25 +26,19 @@ def prepare_timesteps(
|
|
| 35 |
sigmas: Optional[List[float]] = None,
|
| 36 |
mu: Optional[float] = None,
|
| 37 |
) -> (torch.Tensor, int):
|
| 38 |
-
"""
|
| 39 |
-
if
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
scheduler.set_timesteps(sigmas=sigmas, device=device)
|
| 51 |
-
else:
|
| 52 |
-
scheduler.set_timesteps(num_inference_steps, device=device, mu=mu)
|
| 53 |
-
|
| 54 |
-
timesteps = scheduler.timesteps.to(memory_format=torch.channels_last)
|
| 55 |
num_inference_steps = len(timesteps)
|
| 56 |
-
|
| 57 |
return timesteps, num_inference_steps
|
| 58 |
|
| 59 |
# FLUX pipeline function
|
|
|
|
| 3 |
from diffusers import FluxPipeline, FlowMatchEulerDiscreteScheduler
|
| 4 |
from typing import Any, Dict, List, Optional, Union
|
| 5 |
from PIL import Image
|
|
|
|
| 6 |
|
| 7 |
+
# Constants for shift calculation
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
BASE_SEQ_LEN = 256
|
| 9 |
MAX_SEQ_LEN = 4096
|
| 10 |
BASE_SHIFT = 0.5
|
| 11 |
MAX_SHIFT = 1.2
|
|
|
|
| 12 |
|
| 13 |
+
# Helper functions
|
| 14 |
def calculate_timestep_shift(image_seq_len: int) -> float:
|
| 15 |
+
"""Calculates the timestep shift (mu) based on the image sequence length."""
|
|
|
|
|
|
|
|
|
|
| 16 |
m = (MAX_SHIFT - BASE_SHIFT) / (MAX_SEQ_LEN - BASE_SEQ_LEN)
|
| 17 |
b = BASE_SHIFT - m * BASE_SEQ_LEN
|
| 18 |
+
mu = image_seq_len * m + b
|
| 19 |
+
return mu
|
| 20 |
|
| 21 |
def prepare_timesteps(
|
| 22 |
scheduler: FlowMatchEulerDiscreteScheduler,
|
|
|
|
| 26 |
sigmas: Optional[List[float]] = None,
|
| 27 |
mu: Optional[float] = None,
|
| 28 |
) -> (torch.Tensor, int):
|
| 29 |
+
"""Prepares the timesteps for the diffusion process."""
|
| 30 |
+
if timesteps is not None and sigmas is not None:
|
| 31 |
+
raise ValueError("Only one of `timesteps` or `sigmas` can be passed.")
|
| 32 |
+
|
| 33 |
+
if timesteps is not None:
|
| 34 |
+
scheduler.set_timesteps(timesteps=timesteps, device=device)
|
| 35 |
+
elif sigmas is not None:
|
| 36 |
+
scheduler.set_timesteps(sigmas=sigmas, device=device)
|
| 37 |
+
else:
|
| 38 |
+
scheduler.set_timesteps(num_inference_steps, device=device, mu=mu)
|
| 39 |
+
|
| 40 |
+
timesteps = scheduler.timesteps
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
num_inference_steps = len(timesteps)
|
|
|
|
| 42 |
return timesteps, num_inference_steps
|
| 43 |
|
| 44 |
# FLUX pipeline function
|