Spaces:
Running
Running
feat(core): default to auto model provider
Browse files- [docs] Update "Custom Model with Provider" heading to "Model Examples" (README.md:102)
- [docs] Add "Auto provider" example and modify "Specific provider" example (README.md:105-108)
- [docs] Add "Auto" option description under "Chat Providers" (README.md:141)
- [docs] Update "Chat Examples" for auto and specific providers (README.md:178-183)
- [feat] Change default "Model Name" value to "openai/gpt-oss-20b" in Chat Assistant (app.py:198-199)
- [feat] Update "Model Name" placeholder text for clarity (app.py:198-199)
- [docs] Add "auto" provider description to "Popular Providers" markdown (app.py:246)
- [docs] Update model examples under "Popular Providers" markdown (app.py:250-251)
README.md
CHANGED
|
@@ -102,8 +102,12 @@ The app requires:
|
|
| 102 |
3. Adjust parameters if needed (temperature, model, etc.)
|
| 103 |
4. Watch the AI respond with streaming text
|
| 104 |
|
| 105 |
-
####
|
| 106 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
Model Name: openai/gpt-oss-20b:fireworks-ai
|
| 108 |
System Message: You are a helpful coding assistant specializing in Python.
|
| 109 |
```
|
|
@@ -141,6 +145,7 @@ System Message: You are a helpful coding assistant specializing in Python.
|
|
| 141 |
## π― Provider-Specific Features
|
| 142 |
|
| 143 |
### Chat Providers
|
|
|
|
| 144 |
- **Fireworks AI**: Fast and reliable inference service
|
| 145 |
- **Cerebras**: High-performance inference with low latency
|
| 146 |
- **Cohere**: Advanced language models with multilingual support
|
|
@@ -175,8 +180,15 @@ System Message: You are a helpful coding assistant specializing in Python.
|
|
| 175 |
|
| 176 |
#### Chat Examples
|
| 177 |
```
|
| 178 |
-
|
| 179 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 180 |
"Write a creative story about a time-traveling cat"
|
| 181 |
"What are the pros and cons of renewable energy?"
|
| 182 |
```
|
|
|
|
| 102 |
3. Adjust parameters if needed (temperature, model, etc.)
|
| 103 |
4. Watch the AI respond with streaming text
|
| 104 |
|
| 105 |
+
#### Model Examples
|
| 106 |
```
|
| 107 |
+
# Auto provider (default - let HF choose best)
|
| 108 |
+
Model Name: openai/gpt-oss-20b
|
| 109 |
+
|
| 110 |
+
# Specific provider
|
| 111 |
Model Name: openai/gpt-oss-20b:fireworks-ai
|
| 112 |
System Message: You are a helpful coding assistant specializing in Python.
|
| 113 |
```
|
|
|
|
| 145 |
## π― Provider-Specific Features
|
| 146 |
|
| 147 |
### Chat Providers
|
| 148 |
+
- **Auto**: Let HuggingFace choose the best provider (default)
|
| 149 |
- **Fireworks AI**: Fast and reliable inference service
|
| 150 |
- **Cerebras**: High-performance inference with low latency
|
| 151 |
- **Cohere**: Advanced language models with multilingual support
|
|
|
|
| 180 |
|
| 181 |
#### Chat Examples
|
| 182 |
```
|
| 183 |
+
# Using auto provider (default)
|
| 184 |
+
Model: openai/gpt-oss-20b
|
| 185 |
+
Prompt: "Explain quantum computing in simple terms"
|
| 186 |
+
|
| 187 |
+
# Using specific provider
|
| 188 |
+
Model: openai/gpt-oss-20b:fireworks-ai
|
| 189 |
+
Prompt: "Help me debug this Python code: [paste code]"
|
| 190 |
+
|
| 191 |
+
# Other example prompts:
|
| 192 |
"Write a creative story about a time-traveling cat"
|
| 193 |
"What are the pros and cons of renewable energy?"
|
| 194 |
```
|
app.py
CHANGED
|
@@ -116,7 +116,11 @@ def generate_image(
|
|
| 116 |
|
| 117 |
try:
|
| 118 |
# Get token from HF-Inferoxy proxy server
|
|
|
|
| 119 |
token, token_id = get_proxy_token(api_key=proxy_api_key)
|
|
|
|
|
|
|
|
|
|
| 120 |
|
| 121 |
# Create client with specified provider
|
| 122 |
client = InferenceClient(
|
|
@@ -124,6 +128,8 @@ def generate_image(
|
|
| 124 |
api_key=token
|
| 125 |
)
|
| 126 |
|
|
|
|
|
|
|
| 127 |
# Prepare generation parameters
|
| 128 |
generation_params = {
|
| 129 |
"model": model_name,
|
|
@@ -140,9 +146,14 @@ def generate_image(
|
|
| 140 |
if seed != -1:
|
| 141 |
generation_params["seed"] = seed
|
| 142 |
|
|
|
|
|
|
|
|
|
|
| 143 |
# Generate image
|
| 144 |
image = client.text_to_image(**generation_params)
|
| 145 |
|
|
|
|
|
|
|
| 146 |
# Report successful token usage
|
| 147 |
report_token_status(token_id, "success", api_key=proxy_api_key)
|
| 148 |
|
|
@@ -188,64 +199,70 @@ with gr.Blocks(title="HF-Inferoxy AI Hub", theme=gr.themes.Soft()) as demo:
|
|
| 188 |
|
| 189 |
# ==================== CHAT TAB ====================
|
| 190 |
with gr.Tab("π¬ Chat Assistant", id="chat"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
with gr.Row():
|
| 192 |
-
with gr.Column(
|
| 193 |
-
# Create chat interface
|
| 194 |
-
chatbot = gr.ChatInterface(
|
| 195 |
-
chat_respond,
|
| 196 |
-
type="messages",
|
| 197 |
-
title="",
|
| 198 |
-
description="",
|
| 199 |
-
additional_inputs=[
|
| 200 |
-
gr.Textbox(
|
| 201 |
-
value="You are a helpful and friendly AI assistant. Provide clear, accurate, and helpful responses.",
|
| 202 |
-
label="System Message",
|
| 203 |
-
lines=2,
|
| 204 |
-
placeholder="Define the assistant's personality and behavior..."
|
| 205 |
-
),
|
| 206 |
-
gr.Textbox(
|
| 207 |
-
value="openai/gpt-oss-20b:fireworks-ai",
|
| 208 |
-
label="Model Name",
|
| 209 |
-
placeholder="e.g., openai/gpt-oss-20b:fireworks-ai or mistralai/Mistral-7B-Instruct-v0.2:groq"
|
| 210 |
-
),
|
| 211 |
-
gr.Slider(
|
| 212 |
-
minimum=1, maximum=4096, value=1024, step=1,
|
| 213 |
-
label="Max New Tokens"
|
| 214 |
-
),
|
| 215 |
-
gr.Slider(
|
| 216 |
-
minimum=0.1, maximum=2.0, value=0.7, step=0.1,
|
| 217 |
-
label="Temperature"
|
| 218 |
-
),
|
| 219 |
-
gr.Slider(
|
| 220 |
-
minimum=0.1, maximum=1.0, value=0.95, step=0.05,
|
| 221 |
-
label="Top-p (nucleus sampling)"
|
| 222 |
-
),
|
| 223 |
-
],
|
| 224 |
-
)
|
| 225 |
-
|
| 226 |
-
with gr.Column(scale=1):
|
| 227 |
gr.Markdown("""
|
| 228 |
### π‘ Chat Tips
|
| 229 |
|
| 230 |
**Model Format:**
|
| 231 |
-
- Single model: `openai/gpt-oss-20b`
|
| 232 |
-
- With provider: `
|
| 233 |
|
| 234 |
**Popular Models:**
|
| 235 |
- `openai/gpt-oss-20b` - Fast general purpose
|
| 236 |
- `meta-llama/Llama-2-7b-chat-hf` - Chat optimized
|
| 237 |
- `microsoft/DialoGPT-medium` - Conversation
|
| 238 |
- `google/flan-t5-base` - Instruction following
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 239 |
|
| 240 |
-
**
|
| 241 |
-
-
|
| 242 |
-
-
|
| 243 |
-
-
|
| 244 |
-
-
|
| 245 |
-
-
|
| 246 |
|
| 247 |
-
**
|
| 248 |
-
`openai/gpt-oss-20b
|
|
|
|
| 249 |
""")
|
| 250 |
|
| 251 |
# ==================== IMAGE GENERATION TAB ====================
|
|
|
|
| 116 |
|
| 117 |
try:
|
| 118 |
# Get token from HF-Inferoxy proxy server
|
| 119 |
+
print(f"π Image: Requesting token from proxy...")
|
| 120 |
token, token_id = get_proxy_token(api_key=proxy_api_key)
|
| 121 |
+
print(f"β
Image: Got token: {token_id}")
|
| 122 |
+
|
| 123 |
+
print(f"π¨ Image: Using model='{model_name}', provider='{provider}'")
|
| 124 |
|
| 125 |
# Create client with specified provider
|
| 126 |
client = InferenceClient(
|
|
|
|
| 128 |
api_key=token
|
| 129 |
)
|
| 130 |
|
| 131 |
+
print(f"π Image: Client created, preparing generation params...")
|
| 132 |
+
|
| 133 |
# Prepare generation parameters
|
| 134 |
generation_params = {
|
| 135 |
"model": model_name,
|
|
|
|
| 146 |
if seed != -1:
|
| 147 |
generation_params["seed"] = seed
|
| 148 |
|
| 149 |
+
print(f"π Image: Dimensions: {width}x{height}, steps: {num_inference_steps}, guidance: {guidance_scale}")
|
| 150 |
+
print(f"π‘ Image: Making generation request...")
|
| 151 |
+
|
| 152 |
# Generate image
|
| 153 |
image = client.text_to_image(**generation_params)
|
| 154 |
|
| 155 |
+
print(f"πΌοΈ Image: Generation completed! Image type: {type(image)}")
|
| 156 |
+
|
| 157 |
# Report successful token usage
|
| 158 |
report_token_status(token_id, "success", api_key=proxy_api_key)
|
| 159 |
|
|
|
|
| 199 |
|
| 200 |
# ==================== CHAT TAB ====================
|
| 201 |
with gr.Tab("π¬ Chat Assistant", id="chat"):
|
| 202 |
+
# Main chat interface - full width and prominent
|
| 203 |
+
chatbot = gr.ChatInterface(
|
| 204 |
+
chat_respond,
|
| 205 |
+
type="messages",
|
| 206 |
+
title="",
|
| 207 |
+
description="",
|
| 208 |
+
additional_inputs=[
|
| 209 |
+
gr.Textbox(
|
| 210 |
+
value="openai/gpt-oss-20b",
|
| 211 |
+
label="Model Name",
|
| 212 |
+
placeholder="e.g., openai/gpt-oss-20b or openai/gpt-oss-20b:fireworks-ai"
|
| 213 |
+
),
|
| 214 |
+
gr.Textbox(
|
| 215 |
+
value="You are a helpful and friendly AI assistant. Provide clear, accurate, and helpful responses.",
|
| 216 |
+
label="System Message",
|
| 217 |
+
lines=2,
|
| 218 |
+
placeholder="Define the assistant's personality and behavior..."
|
| 219 |
+
),
|
| 220 |
+
gr.Slider(
|
| 221 |
+
minimum=1, maximum=4096, value=1024, step=1,
|
| 222 |
+
label="Max New Tokens"
|
| 223 |
+
),
|
| 224 |
+
gr.Slider(
|
| 225 |
+
minimum=0.1, maximum=2.0, value=0.7, step=0.1,
|
| 226 |
+
label="Temperature"
|
| 227 |
+
),
|
| 228 |
+
gr.Slider(
|
| 229 |
+
minimum=0.1, maximum=1.0, value=0.95, step=0.05,
|
| 230 |
+
label="Top-p (nucleus sampling)"
|
| 231 |
+
),
|
| 232 |
+
],
|
| 233 |
+
)
|
| 234 |
+
|
| 235 |
+
# Configuration tips below the chat
|
| 236 |
with gr.Row():
|
| 237 |
+
with gr.Column():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 238 |
gr.Markdown("""
|
| 239 |
### π‘ Chat Tips
|
| 240 |
|
| 241 |
**Model Format:**
|
| 242 |
+
- Single model: `openai/gpt-oss-20b` (uses auto provider)
|
| 243 |
+
- With provider: `openai/gpt-oss-20b:fireworks-ai`
|
| 244 |
|
| 245 |
**Popular Models:**
|
| 246 |
- `openai/gpt-oss-20b` - Fast general purpose
|
| 247 |
- `meta-llama/Llama-2-7b-chat-hf` - Chat optimized
|
| 248 |
- `microsoft/DialoGPT-medium` - Conversation
|
| 249 |
- `google/flan-t5-base` - Instruction following
|
| 250 |
+
""")
|
| 251 |
+
|
| 252 |
+
with gr.Column():
|
| 253 |
+
gr.Markdown("""
|
| 254 |
+
### π Popular Providers
|
| 255 |
|
| 256 |
+
- **auto** - Let HF choose best provider (default)
|
| 257 |
+
- **fireworks-ai** - Fast and reliable
|
| 258 |
+
- **cerebras** - High performance
|
| 259 |
+
- **groq** - Ultra-fast inference
|
| 260 |
+
- **together** - Wide model support
|
| 261 |
+
- **cohere** - Advanced language models
|
| 262 |
|
| 263 |
+
**Examples:**
|
| 264 |
+
- `openai/gpt-oss-20b` (auto provider)
|
| 265 |
+
- `openai/gpt-oss-20b:fireworks-ai` (specific provider)
|
| 266 |
""")
|
| 267 |
|
| 268 |
# ==================== IMAGE GENERATION TAB ====================
|