Spaces:

Alovestocode
/

router-router-zero

Running on Zero

App Files Files Community

Alovestocode commited on Nov 7

Commit

1910748

verified ·

1 Parent(s): c924012

Refactor: Use APIRouter with include_router, improve Gradio UI with status messages, remove HTML console

Browse files

Files changed (1) hide show

app.py +56 -38

app.py CHANGED Viewed

@@ -5,7 +5,7 @@ from functools import lru_cache
 from typing import List, Optional, Tuple
 import torch
-from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 try:
@@ -289,14 +289,10 @@ def _generate_with_gpu(
     temperature: float = DEFAULT_TEMPERATURE,
     top_p: float = DEFAULT_TOP_P,
 ) -> str:
-    """Generate function wrapped with ZeroGPU decorator. Must be defined before FastAPI app for ZeroGPU detection."""
     return _generate(prompt, max_new_tokens, temperature, top_p)
-fastapi_app = FastAPI(title="Router Model API", version="1.0.0")
-@fastapi_app.get("/health")
 def healthcheck() -> dict[str, str]:
     return {
         "status": "ok",
@@ -305,16 +301,11 @@ def healthcheck() -> dict[str, str]:
     }
-@fastapi_app.on_event("startup")
 def warm_start() -> None:
     """Warm start is disabled for ZeroGPU - model loads on first request."""
-    # ZeroGPU functions decorated with @spaces.GPU cannot be called during startup.
-    # They must be called within request handlers. Skip warm start for ZeroGPU.
     print("Warm start skipped for ZeroGPU. Model will load on first request.")
-    return
-@fastapi_app.post("/v1/generate", response_model=GenerateResponse)
 def generate_endpoint(payload: GeneratePayload) -> GenerateResponse:
     try:
         text = _generate_with_gpu(
@@ -333,15 +324,41 @@ def generate_endpoint(payload: GeneratePayload) -> GenerateResponse:
 # Gradio interface for ZeroGPU detection - ZeroGPU requires Gradio SDK
 import gradio as gr
-@spaces.GPU(duration=300)
-def gradio_generate(
     prompt: str,
     max_new_tokens: int = MAX_NEW_TOKENS,
     temperature: float = DEFAULT_TEMPERATURE,
     top_p: float = DEFAULT_TOP_P,
-) -> str:
-    """Gradio interface function with GPU decorator for ZeroGPU detection."""
-    return _generate(prompt, max_new_tokens, temperature, top_p)
 # Create Gradio Blocks app to mount FastAPI routes properly
 with gr.Blocks(
@@ -428,10 +445,11 @@ with gr.Blocks(
             gr.Markdown("### 📤 Output")
             output = gr.Textbox(
                 label="Generated Response",
-                lines=20,
                 placeholder="Generated response will appear here...",
                 show_copy_button=True,
             )
             with gr.Accordion("📚 API Information", open=False):
                 gr.Markdown("""
@@ -453,39 +471,39 @@ with gr.Blocks(
     # Event handlers
     generate_btn.click(
-        fn=gradio_generate,
         inputs=[prompt_input, max_tokens_input, temp_input, top_p_input],
-        outputs=output,
     )
     clear_btn.click(
-        fn=lambda: ("", ""),
-        outputs=[prompt_input, output],
     )
     # Note: API routes will be added after Blocks context to avoid interfering with Gradio's static assets
 # Enable queued execution so ZeroGPU can schedule GPU work reliably
 gradio_app.queue(max_size=8)
-# Mount FastAPI routes onto Gradio's underlying FastAPI app
-# This allows API endpoints to work alongside Gradio UI
-# We mount FastAPI as a sub-application to avoid conflicts
-try:
-    from starlette.routing import Mount
-    # Mount FastAPI app at root - Starlette will check routes in order
-    # Gradio's routes (like /_app/*) will be checked first, then FastAPI routes
-    gradio_app.app.mount("/", fastapi_app)
-    print("FastAPI routes mounted onto Gradio app successfully")
-except Exception as e:
-    print(f"Warning: Could not mount FastAPI routes: {e}")
-    import traceback
-    traceback.print_exc()
-# Set app to Gradio for Spaces compatibility (sdk: gradio requires Gradio app)
-# Spaces will handle running the server automatically
 app = gradio_app
 if __name__ == "__main__":  # pragma: no cover
-    # For local testing only - Spaces handles server startup
     app.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))

 from typing import List, Optional, Tuple
 import torch
+from fastapi import APIRouter, HTTPException
 from pydantic import BaseModel
 try:
     temperature: float = DEFAULT_TEMPERATURE,
     top_p: float = DEFAULT_TOP_P,
 ) -> str:
+    """Generate function wrapped with ZeroGPU decorator."""
     return _generate(prompt, max_new_tokens, temperature, top_p)
 def healthcheck() -> dict[str, str]:
     return {
         "status": "ok",
     }
 def warm_start() -> None:
     """Warm start is disabled for ZeroGPU - model loads on first request."""
     print("Warm start skipped for ZeroGPU. Model will load on first request.")
 def generate_endpoint(payload: GeneratePayload) -> GenerateResponse:
     try:
         text = _generate_with_gpu(
 # Gradio interface for ZeroGPU detection - ZeroGPU requires Gradio SDK
 import gradio as gr
+STATUS_IDLE = "Status: awaiting prompt."
+def _format_status(message: str, *, success: bool) -> str:
+    prefix = "✅" if success else "❌"
+    return f"{prefix} {message}"
+def gradio_generate_handler(
     prompt: str,
     max_new_tokens: int = MAX_NEW_TOKENS,
     temperature: float = DEFAULT_TEMPERATURE,
     top_p: float = DEFAULT_TOP_P,
+) -> tuple[str, str]:
+    """Wrapper used by the Gradio UI with friendly status messages."""
+    if not prompt.strip():
+        return (
+            "ERROR: Prompt must not be empty.",
+            _format_status("Prompt required before generating.", success=False),
+        )
+    try:
+        # Reuse the same GPU-decorated generator as the API so behaviour matches.
+        text = _generate_with_gpu(
+            prompt=prompt,
+            max_new_tokens=max_new_tokens,
+            temperature=temperature,
+            top_p=top_p,
+        )
+    except Exception as exc:  # pragma: no cover - runtime/hardware dependent
+        print(f"UI generation failed: {exc}")
+        return (
+            f"ERROR: {exc}",
+            _format_status("Generation failed. Check logs for details.", success=False),
+        )
+    return text, _format_status("Plan generated successfully.", success=True)
 # Create Gradio Blocks app to mount FastAPI routes properly
 with gr.Blocks(
             gr.Markdown("### 📤 Output")
             output = gr.Textbox(
                 label="Generated Response",
+                lines=22,
                 placeholder="Generated response will appear here...",
                 show_copy_button=True,
             )
+            status_display = gr.Markdown(STATUS_IDLE)
             with gr.Accordion("📚 API Information", open=False):
                 gr.Markdown("""
     # Event handlers
     generate_btn.click(
+        fn=gradio_generate_handler,
         inputs=[prompt_input, max_tokens_input, temp_input, top_p_input],
+        outputs=[output, status_display],
     )
     clear_btn.click(
+        fn=lambda: ("", "", STATUS_IDLE),
+        outputs=[prompt_input, output, status_display],
     )
     # Note: API routes will be added after Blocks context to avoid interfering with Gradio's static assets
+# Attach API routes directly onto Gradio's FastAPI instance
+api_router = APIRouter()
+@api_router.get("/health")
+def api_health() -> dict[str, str]:
+    return healthcheck()
+@api_router.post("/v1/generate", response_model=GenerateResponse)
+def api_generate(payload: GeneratePayload) -> GenerateResponse:
+    return generate_endpoint(payload)
+gradio_app.app.include_router(api_router)
+warm_start()
 # Enable queued execution so ZeroGPU can schedule GPU work reliably
 gradio_app.queue(max_size=8)
 app = gradio_app
 if __name__ == "__main__":  # pragma: no cover
     app.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))