# models_config.py LLM_CONFIG = { "primary_provider": "huggingface", "models": { "reasoning_primary": { "model_id": "Qwen/Qwen2.5-7B-Instruct", # High-quality instruct model "task": "general_reasoning", "max_tokens": 10000, "temperature": 0.7, "cost_per_token": 0.000015, "fallback": "gpt2", # Simple but guaranteed working model "is_chat_model": True }, "embedding_specialist": { "model_id": "sentence-transformers/all-MiniLM-L6-v2", "task": "embeddings", "vector_dimensions": 384, "purpose": "semantic_similarity", "cost_advantage": "90%_cheaper_than_primary", "is_chat_model": False }, "classification_specialist": { "model_id": "Qwen/Qwen2.5-7B-Instruct", # Use chat model for classification "task": "intent_classification", "max_length": 512, "specialization": "fast_inference", "latency_target": "<100ms", "is_chat_model": True }, "safety_checker": { "model_id": "Qwen/Qwen2.5-7B-Instruct", # Use chat model for safety "task": "content_moderation", "confidence_threshold": 0.85, "purpose": "bias_detection", "is_chat_model": True } }, "routing_logic": { "strategy": "task_based_routing", "fallback_chain": ["primary", "fallback", "degraded_mode"], "load_balancing": "round_robin_with_health_check" }, "zero_gpu_task_mapping": { "intent_classification": "classification", "embedding_generation": "embedding", "safety_check": "general", "general_reasoning": "reasoning", "response_synthesis": "general" } }