monurcan commited on
Commit
8178d81
·
1 Parent(s): d0df48e
Files changed (1) hide show
  1. app.py +50 -48
app.py CHANGED
@@ -105,55 +105,57 @@ def model_inference(input_dict, history, *additional_inputs):
105
  yield "Please login with a Hugging Face account (use the Login button in the sidebar)."
106
  return
107
 
108
- try:
109
- client = InferenceClient(token=hf_token.token, model=model_name)
110
-
111
- response = ""
112
- yield progress_bar_html("Processing...")
113
 
114
- # The API may stream tokens. Try to iterate the streaming generator and extract token deltas.
115
- try:
116
- stream = client.chat.completions.create(messages=messages, stream=True)
117
- except TypeError:
118
- # older/newer client variants: try the alternative method name
119
- stream = client.chat_completion(messages=messages, stream=True)
120
-
121
- for chunk in stream:
122
- # chunk can be an object with attributes or a dict depending on client version
123
- token = ""
124
- try:
125
- # attempt dict-style
126
- if isinstance(chunk, dict):
127
- choices = chunk.get("choices")
128
- if choices and len(choices) > 0:
129
- delta = choices[0].get("delta", {})
130
- token = delta.get("content") or ""
131
- else:
132
- # attribute-style
133
- choices = getattr(chunk, "choices", None)
134
- if choices and len(choices) > 0:
135
- delta = getattr(choices[0], "delta", None)
136
- if isinstance(delta, dict):
137
- token = delta.get("content") or ""
138
- else:
139
- token = getattr(delta, "content", "")
140
- except Exception:
141
- token = ""
142
-
143
- if token:
144
- # escape incremental token to avoid raw HTML breaking the chat box
145
- response += html.escape(token)
146
- time.sleep(0.001)
147
- yield response
148
-
149
- # ensure we yield at least one final message so the async iterator doesn't see StopIteration
150
- if response:
151
- yield response
152
- else:
153
- yield "(no text was returned by the model)"
154
- except Exception as e:
155
- # don't let exceptions escape the generator; yield them so Gradio can display them
156
- yield f"Error during inference: {e}"
 
 
 
 
157
 
158
 
159
  examples = [
 
105
  yield "Please login with a Hugging Face account (use the Login button in the sidebar)."
106
  return
107
 
108
+ client = InferenceClient(
109
+ token=hf_token.token, model=model_name, provider="hf-inference"
110
+ )
 
 
111
 
112
+ response = ""
113
+ for message in client.chat_completion(
114
+ messages,
115
+ max_tokens=1024,
116
+ stream=True,
117
+ ):
118
+ choices = message.choices
119
+ token = ""
120
+ if len(choices) and choices[0].delta.content:
121
+ token = choices[0].delta.content
122
+
123
+ response += token
124
+ yield response
125
+
126
+ # for chunk in stream:
127
+ # # chunk can be an object with attributes or a dict depending on client version
128
+ # token = ""
129
+ # try:
130
+ # # attempt dict-style
131
+ # if isinstance(chunk, dict):
132
+ # choices = chunk.get("choices")
133
+ # if choices and len(choices) > 0:
134
+ # delta = choices[0].get("delta", {})
135
+ # token = delta.get("content") or ""
136
+ # else:
137
+ # # attribute-style
138
+ # choices = getattr(chunk, "choices", None)
139
+ # if choices and len(choices) > 0:
140
+ # delta = getattr(choices[0], "delta", None)
141
+ # if isinstance(delta, dict):
142
+ # token = delta.get("content") or ""
143
+ # else:
144
+ # token = getattr(delta, "content", "")
145
+ # except Exception:
146
+ # token = ""
147
+
148
+ # if token:
149
+ # # escape incremental token to avoid raw HTML breaking the chat box
150
+ # response += html.escape(token)
151
+ # time.sleep(0.001)
152
+ # yield response
153
+
154
+ # # ensure we yield at least one final message so the async iterator doesn't see StopIteration
155
+ # if response:
156
+ # yield response
157
+ # else:
158
+ # yield "(no text was returned by the model)"
159
 
160
 
161
  examples = [