Spaces:

Ryukijano
/

Fastest-image-generation

Runtime error

App Files Files Community

Ryukijano commited on Dec 9, 2024

Commit

3d9f174

verified ·

1 Parent(s): eef4bc9

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -12

app.py CHANGED Viewed

@@ -20,7 +20,9 @@ DEFAULT_INFERENCE_STEPS = 1
 # Device and model setup
 dtype = torch.float16
-# Download the LoRA weights using hf_hub_download
 lora_weights_path = hf_hub_download(
     repo_id="hugovntr/flux-schnell-realism",
     filename="schnell-realism_v2.3.safetensors",
@@ -30,17 +32,17 @@ pipe = FluxWithCFGPipeline.from_pretrained(
     "black-forest-labs/FLUX.1-schnell", torch_dtype=dtype
 )
 pipe.vae = AutoencoderTiny.from_pretrained("madebyollin/taef1", torch_dtype=dtype)
-pipe.to("cuda")
-# Load the LoRA weights using the downloaded path
 pipe.load_lora_weights(lora_weights_path, adapter_name="better")
 pipe.set_adapters(["better"], adapter_weights=[1.0])
 pipe.fuse_lora(adapter_name=["better"], lora_scale=1.0)
 pipe.unload_lora_weights()
 # Memory optimizations
-pipe.transformer.to(memory_format=torch.channels_last)  # Channels last
-pipe.enable_xformers_memory_efficient_attention()  # Flash Attention
 # CUDA Graph setup
 static_inputs = None
@@ -51,7 +53,6 @@ def setup_cuda_graph(prompt, height, width, num_inference_steps):
     global static_inputs, static_model, graph
     batch_size = 1 if isinstance(prompt, str) else len(prompt)
-    device = "cuda"
     num_images_per_prompt = 1
     prompt_embeds, pooled_prompt_embeds, text_ids = pipe.encode_prompt(
@@ -91,11 +92,11 @@ def setup_cuda_graph(prompt, height, width, num_inference_steps):
     guidance = torch.full([1], 3.5, device=device, dtype=torch.float16).expand(latents.shape[0]) if pipe.transformer.config.guidance_embeds else None
     static_inputs = {
-        "hidden_states": latents,
-        "timestep": timesteps,
-        "guidance": guidance,
-        "pooled_projections": pooled_prompt_embeds,
-        "encoder_hidden_states": prompt_embeds,
         "txt_ids": text_ids,
         "img_ids": latent_image_ids,
         "joint_attention_kwargs": None,
@@ -108,7 +109,7 @@ def setup_cuda_graph(prompt, height, width, num_inference_steps):
         static_output = static_model(**static_inputs)
 # Inference function
-@spaces.GPU(duration=25)
 def generate_image(prompt, seed=24, width=DEFAULT_WIDTH, height=DEFAULT_HEIGHT, randomize_seed=False, num_inference_steps=2, progress=gr.Progress(track_tqdm=True)):
     global static_inputs, graph

 # Device and model setup
 dtype = torch.float16
+device = "cuda"  # Explicitly set device to CUDA
+# Download the LoRA weights
 lora_weights_path = hf_hub_download(
     repo_id="hugovntr/flux-schnell-realism",
     filename="schnell-realism_v2.3.safetensors",
     "black-forest-labs/FLUX.1-schnell", torch_dtype=dtype
 )
 pipe.vae = AutoencoderTiny.from_pretrained("madebyollin/taef1", torch_dtype=dtype)
+pipe.to(device)  # Move the pipeline to CUDA
+# Load the LoRA weights
 pipe.load_lora_weights(lora_weights_path, adapter_name="better")
 pipe.set_adapters(["better"], adapter_weights=[1.0])
 pipe.fuse_lora(adapter_name=["better"], lora_scale=1.0)
 pipe.unload_lora_weights()
 # Memory optimizations
+pipe.transformer.to(memory_format=torch.channels_last)
+pipe.enable_xformers_memory_efficient_attention()
 # CUDA Graph setup
 static_inputs = None
     global static_inputs, static_model, graph
     batch_size = 1 if isinstance(prompt, str) else len(prompt)
     num_images_per_prompt = 1
     prompt_embeds, pooled_prompt_embeds, text_ids = pipe.encode_prompt(
     guidance = torch.full([1], 3.5, device=device, dtype=torch.float16).expand(latents.shape[0]) if pipe.transformer.config.guidance_embeds else None
     static_inputs = {
+        "hidden_states": latents.to(device),
+        "timestep": timesteps.to(device),
+        "guidance": guidance.to(device) if guidance is not None else None,
+        "pooled_projections": pooled_prompt_embeds.to(device),
+        "encoder_hidden_states": prompt_embeds.to(device),
         "txt_ids": text_ids,
         "img_ids": latent_image_ids,
         "joint_attention_kwargs": None,
         static_output = static_model(**static_inputs)
 # Inference function
+# @spaces.GPU(duration=25) # Remove decorator
 def generate_image(prompt, seed=24, width=DEFAULT_WIDTH, height=DEFAULT_HEIGHT, randomize_seed=False, num_inference_steps=2, progress=gr.Progress(track_tqdm=True)):
     global static_inputs, graph