monurcan commited on
Commit
d0df48e
·
1 Parent(s): 959d8e4
Files changed (2) hide show
  1. .gitignore +2 -1
  2. app.py +81 -41
.gitignore CHANGED
@@ -1 +1,2 @@
1
- /env/*
 
 
1
+ /env/*
2
+ __pycache__/
app.py CHANGED
@@ -29,20 +29,51 @@ def progress_bar_html(label: str) -> str:
29
  model_name = "HuggingFaceTB/SmolVLM2-256M-Video-Instruct"
30
 
31
 
32
- def model_inference(input_dict, history, hf_token: gr.OAuthToken):
33
  """
34
  Use Hugging Face InferenceClient (streaming) to perform the multimodal chat completion.
35
  Signature matches ChatInterface call pattern: (input_dict, history, *additional_inputs)
36
  The OAuth token (from gr.LoginButton) is passed as `hf_token`.
37
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  text = input_dict.get("text", "")
39
  files = input_dict.get("files", []) or []
40
 
41
  if text == "" and not files:
42
- gr.Error("Please input a query and optionally image(s).")
 
43
  return
44
  if text == "" and files:
45
- gr.Error("Please input a text query along with the image(s).")
46
  return
47
 
48
  # Build the content list: images (as URLs or data URLs) followed by the text
@@ -71,50 +102,58 @@ def model_inference(input_dict, history, hf_token: gr.OAuthToken):
71
  messages = [{"role": "user", "content": content_list}]
72
 
73
  if hf_token is None or not getattr(hf_token, "token", None):
74
- gr.Error(
75
- "Please login with a Hugging Face account (use the Login button in the sidebar)."
76
- )
77
  return
78
 
79
- client = InferenceClient(token=hf_token.token, model=model_name)
 
80
 
81
- response = ""
82
- yield progress_bar_html("Processing...")
83
 
84
- # The API may stream tokens. Try to iterate the streaming generator and extract token deltas.
85
- try:
86
- stream = client.chat.completions.create(messages=messages, stream=True)
87
- except TypeError:
88
- # older/newer client variants: try the alternative method name
89
- stream = client.chat_completion(messages=messages, stream=True)
90
-
91
- for chunk in stream:
92
- # chunk can be an object with attributes or a dict depending on client version
93
- token = ""
94
  try:
95
- # attempt dict-style
96
- if isinstance(chunk, dict):
97
- choices = chunk.get("choices")
98
- if choices and len(choices) > 0:
99
- delta = choices[0].get("delta", {})
100
- token = delta.get("content") or ""
101
- else:
102
- # attribute-style
103
- choices = getattr(chunk, "choices", None)
104
- if choices and len(choices) > 0:
105
- delta = getattr(choices[0], "delta", None)
106
- if isinstance(delta, dict):
107
- token = delta.get("content") or ""
108
- else:
109
- token = getattr(delta, "content", "")
110
- except Exception:
111
- token = ""
112
 
113
- if token:
114
- # escape incremental token to avoid raw HTML breaking the chat box
115
- response += html.escape(token)
116
- time.sleep(0.001)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  yield response
 
 
 
 
 
118
 
119
 
120
  examples = [
@@ -150,7 +189,8 @@ with gr.Blocks() as demo:
150
  additional_inputs=[login_btn],
151
  )
152
 
153
- chatbot.render()
 
154
 
155
 
156
  if __name__ == "__main__":
 
29
  model_name = "HuggingFaceTB/SmolVLM2-256M-Video-Instruct"
30
 
31
 
32
+ def model_inference(input_dict, history, *additional_inputs):
33
  """
34
  Use Hugging Face InferenceClient (streaming) to perform the multimodal chat completion.
35
  Signature matches ChatInterface call pattern: (input_dict, history, *additional_inputs)
36
  The OAuth token (from gr.LoginButton) is passed as `hf_token`.
37
  """
38
+ # Extract hf_token from additional_inputs in a robust way (gradio sometimes passes extra args)
39
+ hf_token = None
40
+ for ai in additional_inputs:
41
+ if ai is None:
42
+ continue
43
+ # gradio may pass a small object with attribute `token`
44
+ if hasattr(ai, "token"):
45
+ hf_token = ai
46
+ break
47
+ # or a dict-like with a token key
48
+ if isinstance(ai, dict) and "token" in ai:
49
+
50
+ class _T:
51
+ pass
52
+
53
+ obj = _T()
54
+ obj.token = ai.get("token")
55
+ hf_token = obj
56
+ break
57
+ # or the token itself could be passed as a string
58
+ if isinstance(ai, str):
59
+
60
+ class _T2:
61
+ pass
62
+
63
+ obj = _T2()
64
+ obj.token = ai
65
+ hf_token = obj
66
+ break
67
+
68
  text = input_dict.get("text", "")
69
  files = input_dict.get("files", []) or []
70
 
71
  if text == "" and not files:
72
+ # yield an error text so the streaming generator produces at least one value
73
+ yield "Please input a query and optionally image(s)."
74
  return
75
  if text == "" and files:
76
+ yield "Please input a text query along with the image(s)."
77
  return
78
 
79
  # Build the content list: images (as URLs or data URLs) followed by the text
 
102
  messages = [{"role": "user", "content": content_list}]
103
 
104
  if hf_token is None or not getattr(hf_token, "token", None):
105
+ yield "Please login with a Hugging Face account (use the Login button in the sidebar)."
 
 
106
  return
107
 
108
+ try:
109
+ client = InferenceClient(token=hf_token.token, model=model_name)
110
 
111
+ response = ""
112
+ yield progress_bar_html("Processing...")
113
 
114
+ # The API may stream tokens. Try to iterate the streaming generator and extract token deltas.
 
 
 
 
 
 
 
 
 
115
  try:
116
+ stream = client.chat.completions.create(messages=messages, stream=True)
117
+ except TypeError:
118
+ # older/newer client variants: try the alternative method name
119
+ stream = client.chat_completion(messages=messages, stream=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
120
 
121
+ for chunk in stream:
122
+ # chunk can be an object with attributes or a dict depending on client version
123
+ token = ""
124
+ try:
125
+ # attempt dict-style
126
+ if isinstance(chunk, dict):
127
+ choices = chunk.get("choices")
128
+ if choices and len(choices) > 0:
129
+ delta = choices[0].get("delta", {})
130
+ token = delta.get("content") or ""
131
+ else:
132
+ # attribute-style
133
+ choices = getattr(chunk, "choices", None)
134
+ if choices and len(choices) > 0:
135
+ delta = getattr(choices[0], "delta", None)
136
+ if isinstance(delta, dict):
137
+ token = delta.get("content") or ""
138
+ else:
139
+ token = getattr(delta, "content", "")
140
+ except Exception:
141
+ token = ""
142
+
143
+ if token:
144
+ # escape incremental token to avoid raw HTML breaking the chat box
145
+ response += html.escape(token)
146
+ time.sleep(0.001)
147
+ yield response
148
+
149
+ # ensure we yield at least one final message so the async iterator doesn't see StopIteration
150
+ if response:
151
  yield response
152
+ else:
153
+ yield "(no text was returned by the model)"
154
+ except Exception as e:
155
+ # don't let exceptions escape the generator; yield them so Gradio can display them
156
+ yield f"Error during inference: {e}"
157
 
158
 
159
  examples = [
 
189
  additional_inputs=[login_btn],
190
  )
191
 
192
+ # ChatInterface is already created inside the Blocks context; calling render() can duplicate it
193
+ # so we avoid calling chatbot.render() here.
194
 
195
 
196
  if __name__ == "__main__":