# models_config.py
LLM_CONFIG = {
    "primary_provider": "huggingface",
    "models": {
        "reasoning_primary": {
            "model_id": "Qwen/Qwen2.5-7B-Instruct",  # High-quality instruct model
            "task": "general_reasoning",
            "max_tokens": 10000,
            "temperature": 0.7,
            "cost_per_token": 0.000015,
            "fallback": "gpt2",  # Simple but guaranteed working model
            "is_chat_model": True
        },
        "embedding_specialist": {
            "model_id": "sentence-transformers/all-MiniLM-L6-v2",
            "task": "embeddings",
            "vector_dimensions": 384,
            "purpose": "semantic_similarity",
            "cost_advantage": "90%_cheaper_than_primary",
            "is_chat_model": False
        },
        "classification_specialist": {
            "model_id": "Qwen/Qwen2.5-7B-Instruct",  # Use chat model for classification
            "task": "intent_classification",
            "max_length": 512,
            "specialization": "fast_inference",
            "latency_target": "<100ms",
            "is_chat_model": True
        },
        "safety_checker": {
            "model_id": "Qwen/Qwen2.5-7B-Instruct",  # Use chat model for safety
            "task": "content_moderation",
            "confidence_threshold": 0.85,
            "purpose": "bias_detection",
            "is_chat_model": True
        }
    },
    "routing_logic": {
        "strategy": "task_based_routing",
        "fallback_chain": ["primary", "fallback", "degraded_mode"],
        "load_balancing": "round_robin_with_health_check"
    },
    "zero_gpu_task_mapping": {
        "intent_classification": "classification",
        "embedding_generation": "embedding",
        "safety_check": "general",
        "general_reasoning": "reasoning",
        "response_synthesis": "general"
    }
}