Commit
·
5d37f3d
1
Parent(s):
9d31b94
Update token allocation for DeepSeek R1 128K context window - Set NOVITA_MODEL_CONTEXT_WINDOW to 128000 (128K tokens) - Increase USER_INPUT_MAX_TOKENS to 32000 - Increase CONTEXT_PREPARATION_BUDGET to 115000 - Increase CONTEXT_PRUNING_THRESHOLD to 115000 - Update validators to support larger token limits
Browse files- ENV_EXAMPLE_CONTENT.txt +8 -7
- src/config.py +17 -11
ENV_EXAMPLE_CONTENT.txt
CHANGED
|
@@ -31,23 +31,24 @@ DEEPSEEK_R1_FORCE_REASONING=True
|
|
| 31 |
# Token Allocation Configuration
|
| 32 |
# =============================================================================
|
| 33 |
# Maximum tokens dedicated for user input (prioritized over context)
|
| 34 |
-
# Recommended:
|
| 35 |
-
USER_INPUT_MAX_TOKENS=
|
| 36 |
|
| 37 |
# Maximum tokens for context preparation (includes user input + context)
|
| 38 |
-
# Recommended:
|
| 39 |
-
CONTEXT_PREPARATION_BUDGET=
|
| 40 |
|
| 41 |
# Context pruning threshold (should match context_preparation_budget)
|
| 42 |
-
CONTEXT_PRUNING_THRESHOLD=
|
| 43 |
|
| 44 |
# Always prioritize user input over historical context
|
| 45 |
PRIORITIZE_USER_INPUT=True
|
| 46 |
|
| 47 |
# Model context window (actual limit for your deployed model)
|
| 48 |
-
# Default:
|
| 49 |
# This is the maximum total tokens (input + output) the model can handle
|
| 50 |
-
|
|
|
|
| 51 |
|
| 52 |
# =============================================================================
|
| 53 |
# Database Configuration
|
|
|
|
| 31 |
# Token Allocation Configuration
|
| 32 |
# =============================================================================
|
| 33 |
# Maximum tokens dedicated for user input (prioritized over context)
|
| 34 |
+
# Recommended: 32000 tokens for DeepSeek R1 (128K context window)
|
| 35 |
+
USER_INPUT_MAX_TOKENS=32000
|
| 36 |
|
| 37 |
# Maximum tokens for context preparation (includes user input + context)
|
| 38 |
+
# Recommended: 115000 tokens for DeepSeek R1 (leaves ~13K for output)
|
| 39 |
+
CONTEXT_PREPARATION_BUDGET=115000
|
| 40 |
|
| 41 |
# Context pruning threshold (should match context_preparation_budget)
|
| 42 |
+
CONTEXT_PRUNING_THRESHOLD=115000
|
| 43 |
|
| 44 |
# Always prioritize user input over historical context
|
| 45 |
PRIORITIZE_USER_INPUT=True
|
| 46 |
|
| 47 |
# Model context window (actual limit for your deployed model)
|
| 48 |
+
# Default: 128000 tokens for DeepSeek R1 (128K context window)
|
| 49 |
# This is the maximum total tokens (input + output) the model can handle
|
| 50 |
+
# Take full advantage of DeepSeek R1's 128K capability
|
| 51 |
+
NOVITA_MODEL_CONTEXT_WINDOW=128000
|
| 52 |
|
| 53 |
# =============================================================================
|
| 54 |
# Database Configuration
|
src/config.py
CHANGED
|
@@ -209,19 +209,19 @@ class Settings(BaseSettings):
|
|
| 209 |
|
| 210 |
# Token Allocation Configuration
|
| 211 |
user_input_max_tokens: int = Field(
|
| 212 |
-
default=
|
| 213 |
description="Maximum tokens dedicated for user input (prioritized over context)",
|
| 214 |
env="USER_INPUT_MAX_TOKENS"
|
| 215 |
)
|
| 216 |
|
| 217 |
context_preparation_budget: int = Field(
|
| 218 |
-
default=
|
| 219 |
description="Maximum tokens for context preparation (includes user input + context)",
|
| 220 |
env="CONTEXT_PREPARATION_BUDGET"
|
| 221 |
)
|
| 222 |
|
| 223 |
context_pruning_threshold: int = Field(
|
| 224 |
-
default=
|
| 225 |
description="Context pruning threshold (should match context_preparation_budget)",
|
| 226 |
env="CONTEXT_PRUNING_THRESHOLD"
|
| 227 |
)
|
|
@@ -234,8 +234,8 @@ class Settings(BaseSettings):
|
|
| 234 |
|
| 235 |
# Model Context Window Configuration
|
| 236 |
novita_model_context_window: int = Field(
|
| 237 |
-
default=
|
| 238 |
-
description="Maximum context window for Novita AI model (input + output tokens)",
|
| 239 |
env="NOVITA_MODEL_CONTEXT_WINDOW"
|
| 240 |
)
|
| 241 |
|
|
@@ -264,20 +264,26 @@ class Settings(BaseSettings):
|
|
| 264 |
@validator("user_input_max_tokens", pre=True)
|
| 265 |
def validate_user_input_tokens(cls, v):
|
| 266 |
"""Validate user input token limit"""
|
| 267 |
-
val = int(v) if v else
|
| 268 |
-
return max(1000, min(
|
| 269 |
|
| 270 |
@validator("context_preparation_budget", pre=True)
|
| 271 |
def validate_context_budget(cls, v):
|
| 272 |
"""Validate context preparation budget"""
|
| 273 |
-
val = int(v) if v else
|
| 274 |
-
return max(4000, min(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 275 |
|
| 276 |
@validator("novita_model_context_window", pre=True)
|
| 277 |
def validate_context_window(cls, v):
|
| 278 |
"""Validate context window size"""
|
| 279 |
-
val = int(v) if v else
|
| 280 |
-
return max(1000, min(200000, val)) #
|
| 281 |
|
| 282 |
# ==================== Model Configuration ====================
|
| 283 |
|
|
|
|
| 209 |
|
| 210 |
# Token Allocation Configuration
|
| 211 |
user_input_max_tokens: int = Field(
|
| 212 |
+
default=32000,
|
| 213 |
description="Maximum tokens dedicated for user input (prioritized over context)",
|
| 214 |
env="USER_INPUT_MAX_TOKENS"
|
| 215 |
)
|
| 216 |
|
| 217 |
context_preparation_budget: int = Field(
|
| 218 |
+
default=115000,
|
| 219 |
description="Maximum tokens for context preparation (includes user input + context)",
|
| 220 |
env="CONTEXT_PREPARATION_BUDGET"
|
| 221 |
)
|
| 222 |
|
| 223 |
context_pruning_threshold: int = Field(
|
| 224 |
+
default=115000,
|
| 225 |
description="Context pruning threshold (should match context_preparation_budget)",
|
| 226 |
env="CONTEXT_PRUNING_THRESHOLD"
|
| 227 |
)
|
|
|
|
| 234 |
|
| 235 |
# Model Context Window Configuration
|
| 236 |
novita_model_context_window: int = Field(
|
| 237 |
+
default=128000,
|
| 238 |
+
description="Maximum context window for Novita AI model (input + output tokens). DeepSeek R1 supports 128K tokens.",
|
| 239 |
env="NOVITA_MODEL_CONTEXT_WINDOW"
|
| 240 |
)
|
| 241 |
|
|
|
|
| 264 |
@validator("user_input_max_tokens", pre=True)
|
| 265 |
def validate_user_input_tokens(cls, v):
|
| 266 |
"""Validate user input token limit"""
|
| 267 |
+
val = int(v) if v else 32000
|
| 268 |
+
return max(1000, min(50000, val)) # Allow up to 50K for large inputs
|
| 269 |
|
| 270 |
@validator("context_preparation_budget", pre=True)
|
| 271 |
def validate_context_budget(cls, v):
|
| 272 |
"""Validate context preparation budget"""
|
| 273 |
+
val = int(v) if v else 115000
|
| 274 |
+
return max(4000, min(125000, val)) # Allow up to 125K for 128K context window
|
| 275 |
+
|
| 276 |
+
@validator("context_pruning_threshold", pre=True)
|
| 277 |
+
def validate_pruning_threshold(cls, v):
|
| 278 |
+
"""Validate context pruning threshold"""
|
| 279 |
+
val = int(v) if v else 115000
|
| 280 |
+
return max(4000, min(125000, val)) # Match context_preparation_budget limits
|
| 281 |
|
| 282 |
@validator("novita_model_context_window", pre=True)
|
| 283 |
def validate_context_window(cls, v):
|
| 284 |
"""Validate context window size"""
|
| 285 |
+
val = int(v) if v else 128000
|
| 286 |
+
return max(1000, min(200000, val)) # Support up to 200K for future models
|
| 287 |
|
| 288 |
# ==================== Model Configuration ====================
|
| 289 |
|