JatsTheAIGen commited on
Commit
ca77f38
·
1 Parent(s): fd88fa8

Add logging for context window configuration and improve max_tokens calculation debugging

Browse files
Files changed (1) hide show
  1. src/llm_router.py +17 -10
src/llm_router.py CHANGED
@@ -54,7 +54,8 @@ class LLMRouter:
54
  logger.info("Novita AI API client initialized")
55
  logger.info(f"Base URL: {self.settings.novita_base_url}")
56
  logger.info(f"Model: {self.settings.novita_model}")
57
- except Exception as e:
 
58
  logger.error(f"Failed to initialize Novita AI client: {e}")
59
  raise RuntimeError(f"Could not initialize Novita AI API client: {e}") from e
60
 
@@ -149,7 +150,7 @@ class LLMRouter:
149
  response_text = self._clean_reasoning_tags(response_text)
150
  logger.info(f"Novita AI API generated response (length: {len(response_text)})")
151
  return response_text
152
- else:
153
  # Handle non-streaming response
154
  response = self.novita_client.chat.completions.create(**request_params)
155
 
@@ -159,14 +160,14 @@ class LLMRouter:
159
  result = self._clean_reasoning_tags(result)
160
  logger.info(f"Novita AI API generated response (length: {len(result)})")
161
  return result
162
- else:
163
  logger.error("Novita AI API returned empty response")
164
  return None
165
 
166
  except Exception as e:
167
  logger.error(f"Error calling Novita AI API: {e}", exc_info=True)
168
- raise
169
-
170
  def _calculate_safe_max_tokens(self, prompt: str, requested_max_tokens: int) -> int:
171
  """
172
  Calculate safe max_tokens based on input token count and model context window.
@@ -182,9 +183,14 @@ class LLMRouter:
182
  # For more accuracy, you could use tiktoken if available
183
  input_tokens = len(prompt) // 4
184
 
185
- # Get model context window
186
  context_window = self.settings.novita_model_context_window
187
 
 
 
 
 
 
188
  # Reserve minimum 100 tokens for safety margin
189
  available_tokens = context_window - input_tokens - 100
190
 
@@ -197,7 +203,8 @@ class LLMRouter:
197
  if safe_max_tokens < requested_max_tokens:
198
  logger.warning(
199
  f"Reduced max_tokens from {requested_max_tokens} to {safe_max_tokens} "
200
- f"(input: ~{input_tokens} tokens, context window: {context_window} tokens)"
 
201
  )
202
 
203
  return safe_max_tokens
@@ -375,7 +382,7 @@ class LLMRouter:
375
  def _truncate_to_tokens(self, content: str, max_tokens: int) -> str:
376
  """Truncate content to fit within token limit"""
377
  # Simple character-based truncation (1 token ≈ 4 chars)
378
- max_chars = max_tokens * 4
379
- if len(content) <= max_chars:
380
- return content
381
  return content[:max_chars - 3] + "..."
 
54
  logger.info("Novita AI API client initialized")
55
  logger.info(f"Base URL: {self.settings.novita_base_url}")
56
  logger.info(f"Model: {self.settings.novita_model}")
57
+ logger.info(f"Context Window: {self.settings.novita_model_context_window} tokens")
58
+ except Exception as e:
59
  logger.error(f"Failed to initialize Novita AI client: {e}")
60
  raise RuntimeError(f"Could not initialize Novita AI API client: {e}") from e
61
 
 
150
  response_text = self._clean_reasoning_tags(response_text)
151
  logger.info(f"Novita AI API generated response (length: {len(response_text)})")
152
  return response_text
153
+ else:
154
  # Handle non-streaming response
155
  response = self.novita_client.chat.completions.create(**request_params)
156
 
 
160
  result = self._clean_reasoning_tags(result)
161
  logger.info(f"Novita AI API generated response (length: {len(result)})")
162
  return result
163
+ else:
164
  logger.error("Novita AI API returned empty response")
165
  return None
166
 
167
  except Exception as e:
168
  logger.error(f"Error calling Novita AI API: {e}", exc_info=True)
169
+ raise
170
+
171
  def _calculate_safe_max_tokens(self, prompt: str, requested_max_tokens: int) -> int:
172
  """
173
  Calculate safe max_tokens based on input token count and model context window.
 
183
  # For more accuracy, you could use tiktoken if available
184
  input_tokens = len(prompt) // 4
185
 
186
+ # Get model context window from settings
187
  context_window = self.settings.novita_model_context_window
188
 
189
+ logger.debug(
190
+ f"Calculating safe max_tokens: input ~{input_tokens} tokens, "
191
+ f"context_window={context_window}, requested={requested_max_tokens}"
192
+ )
193
+
194
  # Reserve minimum 100 tokens for safety margin
195
  available_tokens = context_window - input_tokens - 100
196
 
 
203
  if safe_max_tokens < requested_max_tokens:
204
  logger.warning(
205
  f"Reduced max_tokens from {requested_max_tokens} to {safe_max_tokens} "
206
+ f"(input: ~{input_tokens} tokens, context window: {context_window} tokens, "
207
+ f"available: {available_tokens} tokens)"
208
  )
209
 
210
  return safe_max_tokens
 
382
  def _truncate_to_tokens(self, content: str, max_tokens: int) -> str:
383
  """Truncate content to fit within token limit"""
384
  # Simple character-based truncation (1 token ≈ 4 chars)
385
+ max_chars = max_tokens * 4
386
+ if len(content) <= max_chars:
387
+ return content
388
  return content[:max_chars - 3] + "..."