JatsTheAIGen commited on
Commit
5d37f3d
·
1 Parent(s): 9d31b94

Update token allocation for DeepSeek R1 128K context window - Set NOVITA_MODEL_CONTEXT_WINDOW to 128000 (128K tokens) - Increase USER_INPUT_MAX_TOKENS to 32000 - Increase CONTEXT_PREPARATION_BUDGET to 115000 - Increase CONTEXT_PRUNING_THRESHOLD to 115000 - Update validators to support larger token limits

Browse files
Files changed (2) hide show
  1. ENV_EXAMPLE_CONTENT.txt +8 -7
  2. src/config.py +17 -11
ENV_EXAMPLE_CONTENT.txt CHANGED
@@ -31,23 +31,24 @@ DEEPSEEK_R1_FORCE_REASONING=True
31
  # Token Allocation Configuration
32
  # =============================================================================
33
  # Maximum tokens dedicated for user input (prioritized over context)
34
- # Recommended: 8000 tokens for large queries
35
- USER_INPUT_MAX_TOKENS=8000
36
 
37
  # Maximum tokens for context preparation (includes user input + context)
38
- # Recommended: 28000 tokens for 32K context window models
39
- CONTEXT_PREPARATION_BUDGET=28000
40
 
41
  # Context pruning threshold (should match context_preparation_budget)
42
- CONTEXT_PRUNING_THRESHOLD=28000
43
 
44
  # Always prioritize user input over historical context
45
  PRIORITIZE_USER_INPUT=True
46
 
47
  # Model context window (actual limit for your deployed model)
48
- # Default: 5000 tokens (adjust based on your Novita AI deployment)
49
  # This is the maximum total tokens (input + output) the model can handle
50
- NOVITA_MODEL_CONTEXT_WINDOW=5000
 
51
 
52
  # =============================================================================
53
  # Database Configuration
 
31
  # Token Allocation Configuration
32
  # =============================================================================
33
  # Maximum tokens dedicated for user input (prioritized over context)
34
+ # Recommended: 32000 tokens for DeepSeek R1 (128K context window)
35
+ USER_INPUT_MAX_TOKENS=32000
36
 
37
  # Maximum tokens for context preparation (includes user input + context)
38
+ # Recommended: 115000 tokens for DeepSeek R1 (leaves ~13K for output)
39
+ CONTEXT_PREPARATION_BUDGET=115000
40
 
41
  # Context pruning threshold (should match context_preparation_budget)
42
+ CONTEXT_PRUNING_THRESHOLD=115000
43
 
44
  # Always prioritize user input over historical context
45
  PRIORITIZE_USER_INPUT=True
46
 
47
  # Model context window (actual limit for your deployed model)
48
+ # Default: 128000 tokens for DeepSeek R1 (128K context window)
49
  # This is the maximum total tokens (input + output) the model can handle
50
+ # Take full advantage of DeepSeek R1's 128K capability
51
+ NOVITA_MODEL_CONTEXT_WINDOW=128000
52
 
53
  # =============================================================================
54
  # Database Configuration
src/config.py CHANGED
@@ -209,19 +209,19 @@ class Settings(BaseSettings):
209
 
210
  # Token Allocation Configuration
211
  user_input_max_tokens: int = Field(
212
- default=8000,
213
  description="Maximum tokens dedicated for user input (prioritized over context)",
214
  env="USER_INPUT_MAX_TOKENS"
215
  )
216
 
217
  context_preparation_budget: int = Field(
218
- default=28000,
219
  description="Maximum tokens for context preparation (includes user input + context)",
220
  env="CONTEXT_PREPARATION_BUDGET"
221
  )
222
 
223
  context_pruning_threshold: int = Field(
224
- default=28000,
225
  description="Context pruning threshold (should match context_preparation_budget)",
226
  env="CONTEXT_PRUNING_THRESHOLD"
227
  )
@@ -234,8 +234,8 @@ class Settings(BaseSettings):
234
 
235
  # Model Context Window Configuration
236
  novita_model_context_window: int = Field(
237
- default=5000,
238
- description="Maximum context window for Novita AI model (input + output tokens)",
239
  env="NOVITA_MODEL_CONTEXT_WINDOW"
240
  )
241
 
@@ -264,20 +264,26 @@ class Settings(BaseSettings):
264
  @validator("user_input_max_tokens", pre=True)
265
  def validate_user_input_tokens(cls, v):
266
  """Validate user input token limit"""
267
- val = int(v) if v else 8000
268
- return max(1000, min(20000, val))
269
 
270
  @validator("context_preparation_budget", pre=True)
271
  def validate_context_budget(cls, v):
272
  """Validate context preparation budget"""
273
- val = int(v) if v else 28000
274
- return max(4000, min(120000, val))
 
 
 
 
 
 
275
 
276
  @validator("novita_model_context_window", pre=True)
277
  def validate_context_window(cls, v):
278
  """Validate context window size"""
279
- val = int(v) if v else 5000
280
- return max(1000, min(200000, val)) # Reasonable bounds
281
 
282
  # ==================== Model Configuration ====================
283
 
 
209
 
210
  # Token Allocation Configuration
211
  user_input_max_tokens: int = Field(
212
+ default=32000,
213
  description="Maximum tokens dedicated for user input (prioritized over context)",
214
  env="USER_INPUT_MAX_TOKENS"
215
  )
216
 
217
  context_preparation_budget: int = Field(
218
+ default=115000,
219
  description="Maximum tokens for context preparation (includes user input + context)",
220
  env="CONTEXT_PREPARATION_BUDGET"
221
  )
222
 
223
  context_pruning_threshold: int = Field(
224
+ default=115000,
225
  description="Context pruning threshold (should match context_preparation_budget)",
226
  env="CONTEXT_PRUNING_THRESHOLD"
227
  )
 
234
 
235
  # Model Context Window Configuration
236
  novita_model_context_window: int = Field(
237
+ default=128000,
238
+ description="Maximum context window for Novita AI model (input + output tokens). DeepSeek R1 supports 128K tokens.",
239
  env="NOVITA_MODEL_CONTEXT_WINDOW"
240
  )
241
 
 
264
  @validator("user_input_max_tokens", pre=True)
265
  def validate_user_input_tokens(cls, v):
266
  """Validate user input token limit"""
267
+ val = int(v) if v else 32000
268
+ return max(1000, min(50000, val)) # Allow up to 50K for large inputs
269
 
270
  @validator("context_preparation_budget", pre=True)
271
  def validate_context_budget(cls, v):
272
  """Validate context preparation budget"""
273
+ val = int(v) if v else 115000
274
+ return max(4000, min(125000, val)) # Allow up to 125K for 128K context window
275
+
276
+ @validator("context_pruning_threshold", pre=True)
277
+ def validate_pruning_threshold(cls, v):
278
+ """Validate context pruning threshold"""
279
+ val = int(v) if v else 115000
280
+ return max(4000, min(125000, val)) # Match context_preparation_budget limits
281
 
282
  @validator("novita_model_context_window", pre=True)
283
  def validate_context_window(cls, v):
284
  """Validate context window size"""
285
+ val = int(v) if v else 128000
286
+ return max(1000, min(200000, val)) # Support up to 200K for future models
287
 
288
  # ==================== Model Configuration ====================
289