HonestAI

Paused

JatsTheAIGen commited on Nov 4

Commit

8d4bf4a

1 Parent(s): e2eb926

Fix infinite fallback loop in local model loading

- Add _is_fallback flag to prevent infinite recursion
- Remove fallback from fallback config to prevent loops
- Check if fallback model is different from primary before attempting
- Better error handling when both primary and fallback models are gated
- Prevent recursive fallback attempts when fallback also fails

Files changed (1) hide show

src/llm_router.py +25 -6

src/llm_router.py CHANGED Viewed

@@ -86,6 +86,9 @@ class LLMRouter:
         if not self.local_loader:
             return None
         model_id = model_config["model_id"]
         max_tokens = kwargs.get('max_tokens', 512)
         temperature = kwargs.get('temperature', 0.7)
@@ -113,23 +116,39 @@ class LLMRouter:
                     logger.error(f"❌ Cannot access gated repository {model_id}")
                     logger.error(f"   Visit https://huggingface.co/{model_id.split(':')[0] if ':' in model_id else model_id} to request access.")
-                    # Try fallback model if available
                     fallback_model_id = model_config.get("fallback")
-                    if fallback_model_id:
                         logger.warning(f"Attempting fallback model: {fallback_model_id}")
                         try:
-                            # Create fallback config
                             fallback_config = model_config.copy()
                             fallback_config["model_id"] = fallback_model_id
-                            # Retry with fallback model
-                            return await self._call_local_model(fallback_config, prompt, task_type, **kwargs)
                         except Exception as fallback_error:
                             logger.error(f"Fallback model also failed: {fallback_error}")
                             logger.warning("Falling back to HF Inference API")
                             return None
                     else:
-                        logger.warning("No fallback model configured, falling back to HF Inference API")
                         return None
             # Format as chat messages if needed

         if not self.local_loader:
             return None
+        # Check if this is already a fallback attempt (prevent infinite loops)
+        is_fallback_attempt = kwargs.get('_is_fallback', False)
         model_id = model_config["model_id"]
         max_tokens = kwargs.get('max_tokens', 512)
         temperature = kwargs.get('temperature', 0.7)
                     logger.error(f"❌ Cannot access gated repository {model_id}")
                     logger.error(f"   Visit https://huggingface.co/{model_id.split(':')[0] if ':' in model_id else model_id} to request access.")
+                    # Prevent infinite loops: if this is already a fallback attempt, don't try another fallback
+                    if is_fallback_attempt:
+                        logger.error("❌ Fallback model also failed with gated repository error")
+                        logger.warning("Both primary and fallback models are gated. Falling back to HF Inference API.")
+                        return None
+                    # Try fallback model if available and this is not already a fallback attempt
                     fallback_model_id = model_config.get("fallback")
+                    if fallback_model_id and fallback_model_id != model_id:  # Ensure fallback is different
                         logger.warning(f"Attempting fallback model: {fallback_model_id}")
                         try:
+                            # Create fallback config without fallback to prevent loops
                             fallback_config = model_config.copy()
                             fallback_config["model_id"] = fallback_model_id
+                            fallback_config.pop("fallback", None)  # Remove fallback to prevent infinite recursion
+                            # Retry with fallback model (mark as fallback attempt)
+                            return await self._call_local_model(
+                                fallback_config,
+                                prompt,
+                                task_type,
+                                **{**kwargs, '_is_fallback': True}
+                            )
+                        except GatedRepoError as fallback_gated_error:
+                            logger.error(f"❌ Fallback model {fallback_model_id} is also gated")
+                            logger.warning("Both primary and fallback models are gated. Falling back to HF Inference API.")
+                            return None
                         except Exception as fallback_error:
                             logger.error(f"Fallback model also failed: {fallback_error}")
                             logger.warning("Falling back to HF Inference API")
                             return None
                     else:
+                        logger.warning("No fallback model configured or fallback same as primary, falling back to HF Inference API")
                         return None
             # Format as chat messages if needed