Spaces:

lulavc
/

Z-Image-Turbo

Running on Zero

App Files Files

lulavc commited on 2 days ago

Commit

9cd69ee

verified ·

1 Parent(s): b9d8080

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +22 -64

app.py CHANGED Viewed

@@ -773,6 +773,9 @@ OUTPUT THE PROMPT NOW (nothing else):"""
 # ZEROGPU AOTI CONFIGURATION
 # =============================================================================
 # Inductor configuration optimized for diffusion transformers
 INDUCTOR_CONFIGS = {
     "conv_1x1_as_mm": True,
@@ -791,7 +794,9 @@ MIN_SEQ_LEN = 15360   # 1536x640 -> 192x80 -> 15,360
 MAX_SEQ_LEN = 65536   # 2048x2048 -> 256x256 -> 65,536
 # Environment variable to enable/disable AoTI compilation
-ENABLE_AOTI = os.environ.get("ENABLE_AOTI", "true").lower() == "true"
 logger.info("Loading Z-Image-Turbo pipeline...")
@@ -809,76 +814,26 @@ except Exception as e:
     logger.warning(f"FA3 not available, using default SDPA attention: {e}")
 # =============================================================================
-# AOTI COMPILATION FUNCTION
 # =============================================================================
-@spaces.GPU(duration=1500)
-def compile_transformer_aoti():
-    """
-    Compile transformer ahead-of-time for 1.3x-1.8x speedup.
-    Runs once at Space startup, takes ~5-10 minutes.
-    """
-    logger.info("Starting AoTI compilation for transformer...")
     try:
-        # Step 1: Capture example inputs
-        logger.info("Step 1/4: Capturing example inputs...")
-        with spaces.aoti_capture(pipe_t2i.transformer) as call:
-            pipe_t2i(
-                "example prompt for compilation",
                 height=1024,
                 width=1024,
                 num_inference_steps=1,
-                guidance_scale=0.0,
             )
-        # Step 2: Define dynamic shapes for multi-resolution support
-        logger.info("Step 2/4: Configuring dynamic shapes...")
-        logger.info(f"Captured kwargs keys: {list(call.kwargs.keys())}")
-        from torch.export import Dim
-        from torch.utils._pytree import tree_map
-        # Define dynamic dimensions
-        batch_dim = Dim("batch", min=1, max=4)
-        seq_len_dim = Dim("seq_len", min=MIN_SEQ_LEN, max=MAX_SEQ_LEN)
-        # Create dynamic shapes from captured kwargs
-        dynamic_shapes = tree_map(lambda v: None, call.kwargs)
-        # Apply dynamic dims to variable-size tensors
-        if "hidden_states" in call.kwargs:
-            dynamic_shapes["hidden_states"] = {0: batch_dim, 1: seq_len_dim}
-        if "img_ids" in call.kwargs:
-            dynamic_shapes["img_ids"] = {0: batch_dim, 1: seq_len_dim}
-        # Step 3: Export the model
-        logger.info("Step 3/4: Exporting model with torch.export...")
-        exported = torch.export.export(
-            pipe_t2i.transformer,
-            args=call.args,
-            kwargs=call.kwargs,
-            dynamic_shapes=dynamic_shapes,
-        )
-        # Step 4: Compile with inductor
-        logger.info("Step 4/4: Compiling with PyTorch Inductor (this takes several minutes)...")
-        compiled = spaces.aoti_compile(exported, INDUCTOR_CONFIGS)
-        logger.info("AoTI compilation completed successfully!")
-        return compiled
-    except Exception as e:
-        logger.error(f"AoTI compilation failed: {type(e).__name__}: {str(e)}")
-        logger.warning("Falling back to non-compiled transformer")
-        return None
-# =============================================================================
-# APPLY AOTI COMPILATION
-# =============================================================================
-if ENABLE_AOTI:
-    try:
-        compiled_transformer = compile_transformer_aoti()
         if compiled_transformer is not None:
             spaces.aoti_apply(compiled_transformer, pipe_t2i.transformer)
             logger.info("AoTI transformer applied successfully (1.3x-1.8x speedup expected)")
@@ -909,7 +864,10 @@ pipe_i2i = ZImageImg2ImgPipeline(
     scheduler=pipe_t2i.scheduler,
 )
-logger.info("Pipelines ready! (TF32 + FA3 + AoTI Transformer + VAE compile)")
 STYLES = ["None", "Photorealistic", "Cinematic", "Anime", "Digital Art",
           "Oil Painting", "Watercolor", "3D Render", "Fantasy", "Sci-Fi"]

 # ZEROGPU AOTI CONFIGURATION
 # =============================================================================
+# Import the corrected AoTI compilation function
+from aoti import compile_transformer_aoti
 # Inductor configuration optimized for diffusion transformers
 INDUCTOR_CONFIGS = {
     "conv_1x1_as_mm": True,
 MAX_SEQ_LEN = 65536   # 2048x2048 -> 256x256 -> 65,536
 # Environment variable to enable/disable AoTI compilation
+# Disabled by default - Z-Image-Turbo transformer uses positional args (x, t, cap_feats)
+# which requires special handling in torch.export. Enable with ENABLE_AOTI=true once fixed.
+ENABLE_AOTI = os.environ.get("ENABLE_AOTI", "false").lower() == "true"
 logger.info("Loading Z-Image-Turbo pipeline...")
     logger.warning(f"FA3 not available, using default SDPA attention: {e}")
 # =============================================================================
+# APPLY AOTI COMPILATION
 # =============================================================================
+if ENABLE_AOTI:
     try:
+        # Use the corrected compile function that handles positional args properly
+        @spaces.GPU(duration=1500)
+        def _compile_wrapper():
+            return compile_transformer_aoti(
+                pipe=pipe_t2i,
+                example_prompt="example prompt for compilation",
                 height=1024,
                 width=1024,
                 num_inference_steps=1,
+                inductor_configs=INDUCTOR_CONFIGS,
+                min_seq_len=MIN_SEQ_LEN,
+                max_seq_len=MAX_SEQ_LEN,
             )
+        compiled_transformer = _compile_wrapper()
         if compiled_transformer is not None:
             spaces.aoti_apply(compiled_transformer, pipe_t2i.transformer)
             logger.info("AoTI transformer applied successfully (1.3x-1.8x speedup expected)")
     scheduler=pipe_t2i.scheduler,
 )
+if ENABLE_AOTI:
+    logger.info("Pipelines ready! (TF32 + FA3 + AoTI Transformer + VAE compile)")
+else:
+    logger.info("Pipelines ready! (TF32 + FA3 + VAE compile) - AoTI disabled")
 STYLES = ["None", "Photorealistic", "Cinematic", "Anime", "Digital Art",
           "Oil Painting", "Watercolor", "3D Render", "Fantasy", "Sci-Fi"]