Spaces:

monurcan
/

efficient_test_time_scaling

Sleeping

App Files Files Community

monurcan commited on Oct 3

Commit

d0df48e

1 Parent(s): 959d8e4

xx

Browse files

Files changed (2) hide show

.gitignore +2 -1
app.py +81 -41

.gitignore CHANGED Viewed

	@@ -1 +1,2 @@
1	- /env/*


1	+ /env/*
2	+ __pycache__/

app.py CHANGED Viewed

@@ -29,20 +29,51 @@ def progress_bar_html(label: str) -> str:
 model_name = "HuggingFaceTB/SmolVLM2-256M-Video-Instruct"
-def model_inference(input_dict, history, hf_token: gr.OAuthToken):
     """
     Use Hugging Face InferenceClient (streaming) to perform the multimodal chat completion.
     Signature matches ChatInterface call pattern: (input_dict, history, *additional_inputs)
     The OAuth token (from gr.LoginButton) is passed as `hf_token`.
     """
     text = input_dict.get("text", "")
     files = input_dict.get("files", []) or []
     if text == "" and not files:
-        gr.Error("Please input a query and optionally image(s).")
         return
     if text == "" and files:
-        gr.Error("Please input a text query along with the image(s).")
         return
     # Build the content list: images (as URLs or data URLs) followed by the text
@@ -71,50 +102,58 @@ def model_inference(input_dict, history, hf_token: gr.OAuthToken):
     messages = [{"role": "user", "content": content_list}]
     if hf_token is None or not getattr(hf_token, "token", None):
-        gr.Error(
-            "Please login with a Hugging Face account (use the Login button in the sidebar)."
-        )
         return
-    client = InferenceClient(token=hf_token.token, model=model_name)
-    response = ""
-    yield progress_bar_html("Processing...")
-    # The API may stream tokens. Try to iterate the streaming generator and extract token deltas.
-    try:
-        stream = client.chat.completions.create(messages=messages, stream=True)
-    except TypeError:
-        # older/newer client variants: try the alternative method name
-        stream = client.chat_completion(messages=messages, stream=True)
-    for chunk in stream:
-        # chunk can be an object with attributes or a dict depending on client version
-        token = ""
         try:
-            # attempt dict-style
-            if isinstance(chunk, dict):
-                choices = chunk.get("choices")
-                if choices and len(choices) > 0:
-                    delta = choices[0].get("delta", {})
-                    token = delta.get("content") or ""
-            else:
-                # attribute-style
-                choices = getattr(chunk, "choices", None)
-                if choices and len(choices) > 0:
-                    delta = getattr(choices[0], "delta", None)
-                    if isinstance(delta, dict):
-                        token = delta.get("content") or ""
-                    else:
-                        token = getattr(delta, "content", "")
-        except Exception:
-            token = ""
-        if token:
-            # escape incremental token to avoid raw HTML breaking the chat box
-            response += html.escape(token)
-            time.sleep(0.001)
             yield response
 examples = [
@@ -150,7 +189,8 @@ with gr.Blocks() as demo:
         additional_inputs=[login_btn],
     )
-    chatbot.render()
 if __name__ == "__main__":

 model_name = "HuggingFaceTB/SmolVLM2-256M-Video-Instruct"
+def model_inference(input_dict, history, *additional_inputs):
     """
     Use Hugging Face InferenceClient (streaming) to perform the multimodal chat completion.
     Signature matches ChatInterface call pattern: (input_dict, history, *additional_inputs)
     The OAuth token (from gr.LoginButton) is passed as `hf_token`.
     """
+    # Extract hf_token from additional_inputs in a robust way (gradio sometimes passes extra args)
+    hf_token = None
+    for ai in additional_inputs:
+        if ai is None:
+            continue
+        # gradio may pass a small object with attribute `token`
+        if hasattr(ai, "token"):
+            hf_token = ai
+            break
+        # or a dict-like with a token key
+        if isinstance(ai, dict) and "token" in ai:
+            class _T:
+                pass
+            obj = _T()
+            obj.token = ai.get("token")
+            hf_token = obj
+            break
+        # or the token itself could be passed as a string
+        if isinstance(ai, str):
+            class _T2:
+                pass
+            obj = _T2()
+            obj.token = ai
+            hf_token = obj
+            break
     text = input_dict.get("text", "")
     files = input_dict.get("files", []) or []
     if text == "" and not files:
+        # yield an error text so the streaming generator produces at least one value
+        yield "Please input a query and optionally image(s)."
         return
     if text == "" and files:
+        yield "Please input a text query along with the image(s)."
         return
     # Build the content list: images (as URLs or data URLs) followed by the text
     messages = [{"role": "user", "content": content_list}]
     if hf_token is None or not getattr(hf_token, "token", None):
+        yield "Please login with a Hugging Face account (use the Login button in the sidebar)."
         return
+    try:
+        client = InferenceClient(token=hf_token.token, model=model_name)
+        response = ""
+        yield progress_bar_html("Processing...")
+        # The API may stream tokens. Try to iterate the streaming generator and extract token deltas.
         try:
+            stream = client.chat.completions.create(messages=messages, stream=True)
+        except TypeError:
+            # older/newer client variants: try the alternative method name
+            stream = client.chat_completion(messages=messages, stream=True)
+        for chunk in stream:
+            # chunk can be an object with attributes or a dict depending on client version
+            token = ""
+            try:
+                # attempt dict-style
+                if isinstance(chunk, dict):
+                    choices = chunk.get("choices")
+                    if choices and len(choices) > 0:
+                        delta = choices[0].get("delta", {})
+                        token = delta.get("content") or ""
+                else:
+                    # attribute-style
+                    choices = getattr(chunk, "choices", None)
+                    if choices and len(choices) > 0:
+                        delta = getattr(choices[0], "delta", None)
+                        if isinstance(delta, dict):
+                            token = delta.get("content") or ""
+                        else:
+                            token = getattr(delta, "content", "")
+            except Exception:
+                token = ""
+            if token:
+                # escape incremental token to avoid raw HTML breaking the chat box
+                response += html.escape(token)
+                time.sleep(0.001)
+                yield response
+        # ensure we yield at least one final message so the async iterator doesn't see StopIteration
+        if response:
             yield response
+        else:
+            yield "(no text was returned by the model)"
+    except Exception as e:
+        # don't let exceptions escape the generator; yield them so Gradio can display them
+        yield f"Error during inference: {e}"
 examples = [
         additional_inputs=[login_btn],
     )
+    # ChatInterface is already created inside the Blocks context; calling render() can duplicate it
+    # so we avoid calling chatbot.render() here.
 if __name__ == "__main__":