MobileLLM-Pro-UnZero

Sleeping

App Files Files Community

akhaliq HF Staff commited on Oct 16

Commit

221127d

verified ·

1 Parent(s): 98da568

Update app.py

Browse files

Files changed (1) hide show

app.py +66 -18

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import os
 import time
-from typing import List, Dict
 import torch
 import gradio as gr
@@ -32,6 +32,54 @@ if HF_TOKEN:
         print(f"Warning: Could not login to Hugging Face: {e}")
 # =========================
 # Chat Model Wrapper
 # =========================
@@ -158,15 +206,14 @@ def clear_chat():
 def chat_fn(message, history, system_prompt, temperature):
     """Non-streaming chat handler (returns tuples)."""
     if not chat_model.model_loaded:
         return history + [[message, "Please wait for the model to load or reload the space."]]
-    # Convert tuples history -> list of role dicts
-    formatted_history = []
-    for user_msg, assistant_msg in history:
-        formatted_history.append({"role": "user", "content": user_msg})
-        if assistant_msg:
-            formatted_history.append({"role": "assistant", "content": assistant_msg})
     # Generate full response once
     response = chat_model.generate_response(message, formatted_history, system_prompt, temperature)
@@ -177,16 +224,15 @@ def chat_fn(message, history, system_prompt, temperature):
 def chat_stream_fn(message, history, system_prompt, temperature):
     """Streaming chat handler (tuples): generate once, then chunk out."""
     if not chat_model.model_loaded:
         yield history + [[message, "Please wait for the model to load or reload the space."]]
         return
-    # Convert tuples history -> list of role dicts
-    formatted_history = []
-    for user_msg, assistant_msg in history:
-        formatted_history.append({"role": "user", "content": user_msg})
-        if assistant_msg:
-            formatted_history.append({"role": "assistant", "content": assistant_msg})
     # Generate full response (GPU)
     full_response = chat_model.generate_response(
@@ -196,7 +242,6 @@ def chat_stream_fn(message, history, system_prompt, temperature):
     # Start new row and progressively fill assistant side
     base = history + [[message, ""]]
     if not isinstance(full_response, str):
-        # In case of an error string (already str), we still stream it
         full_response = str(full_response)
     step = max(8, len(full_response) // 40)  # ~40 chunks
@@ -274,8 +319,9 @@ with gr.Blocks(
                 info="Show responses as they're being generated",
             )
-    # Chatbot in TUPLES mode
     chatbot = gr.Chatbot(
         label="Chat History",
         height=500,
         show_copy_button=True,
@@ -291,17 +337,19 @@ with gr.Blocks(
         submit_btn = gr.Button("Send", variant="primary", scale=1)
         clear_btn = gr.Button("Clear", scale=0)
-    # Wire events
     msg.submit(
         handle_chat,
         inputs=[msg, chatbot, system_prompt, temperature, streaming],
         outputs=[chatbot],
-    )
     submit_btn.click(
         handle_chat,
         inputs=[msg, chatbot, system_prompt, temperature, streaming],
         outputs=[chatbot],
-    )
     clear_btn.click(
         clear_chat,
         outputs=[chatbot, msg],

 import os
 import time
+from typing import List, Dict, Tuple, Any
 import torch
 import gradio as gr
         print(f"Warning: Could not login to Hugging Face: {e}")
+# =========================
+# Utilities
+# =========================
+def tuples_from_messages(messages: List[Dict[str, Any]]) -> List[List[str]]:
+    """
+    Convert a Chatbot(type='messages') style history into tuples format
+    [[user, assistant], ...]. If already tuples-like, return as-is.
+    """
+    if not messages:
+        return []
+    # If already tuples-like (list with elements of length 2), return
+    if isinstance(messages[0], (list, tuple)) and len(messages[0]) == 2:
+        return [list(x) for x in messages]
+    # Otherwise, convert from [{"role": "...", "content": "..."}, ...]
+    pairs: List[List[str]] = []
+    last_user: str | None = None
+    for m in messages:
+        role = m.get("role")
+        content = m.get("content", "")
+        if role == "user":
+            last_user = content
+        elif role == "assistant":
+            if last_user is None:
+                # If assistant appears first (odd state), pair with empty user
+                pairs.append(["", content])
+            else:
+                pairs.append([last_user, content])
+                last_user = None
+    # If there's a dangling user without assistant, pair with empty string
+    if last_user is not None:
+        pairs.append([last_user, ""])
+    return pairs
+def messages_from_tuples(history_tuples: List[List[str]]) -> List[Dict[str, str]]:
+    """
+    Convert tuples [[user, assistant], ...] into list of role dictionaries:
+    [{"role": "user", ...}, {"role": "assistant", ...}, ...]
+    """
+    messages: List[Dict[str, str]] = []
+    for u, a in history_tuples:
+        messages.append({"role": "user", "content": u})
+        if a:
+            messages.append({"role": "assistant", "content": a})
+    return messages
 # =========================
 # Chat Model Wrapper
 # =========================
 def chat_fn(message, history, system_prompt, temperature):
     """Non-streaming chat handler (returns tuples)."""
+    # DEFENSIVE: ensure tuples format
+    history = tuples_from_messages(history)
     if not chat_model.model_loaded:
         return history + [[message, "Please wait for the model to load or reload the space."]]
+    # Convert tuples -> role dicts for the model
+    formatted_history = messages_from_tuples(history)
     # Generate full response once
     response = chat_model.generate_response(message, formatted_history, system_prompt, temperature)
 def chat_stream_fn(message, history, system_prompt, temperature):
     """Streaming chat handler (tuples): generate once, then chunk out."""
+    # DEFENSIVE: ensure tuples format
+    history = tuples_from_messages(history)
     if not chat_model.model_loaded:
         yield history + [[message, "Please wait for the model to load or reload the space."]]
         return
+    # Convert tuples -> role dicts for the model
+    formatted_history = messages_from_tuples(history)
     # Generate full response (GPU)
     full_response = chat_model.generate_response(
     # Start new row and progressively fill assistant side
     base = history + [[message, ""]]
     if not isinstance(full_response, str):
         full_response = str(full_response)
     step = max(8, len(full_response) // 40)  # ~40 chunks
                 info="Show responses as they're being generated",
             )
+    # Chatbot in TUPLES mode (explicit)
     chatbot = gr.Chatbot(
+        type="tuples",
         label="Chat History",
         height=500,
         show_copy_button=True,
         submit_btn = gr.Button("Send", variant="primary", scale=1)
         clear_btn = gr.Button("Clear", scale=0)
+    # Wire events (also clear the input box after send)
     msg.submit(
         handle_chat,
         inputs=[msg, chatbot, system_prompt, temperature, streaming],
         outputs=[chatbot],
+    ).then(lambda: "", None, msg)
     submit_btn.click(
         handle_chat,
         inputs=[msg, chatbot, system_prompt, temperature, streaming],
         outputs=[chatbot],
+    ).then(lambda: "", None, msg)
     clear_btn.click(
         clear_chat,
         outputs=[chatbot, msg],