File size: 1,859 Bytes
80a97c8 29048d9 80a97c8 a3e5843 80a97c8 a3e5843 80a97c8 a3e5843 80a97c8 a3e5843 80a97c8 a3e5843 80a97c8 a3e5843 80a97c8 a58b1f9 80a97c8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
# models_config.py
LLM_CONFIG = {
"primary_provider": "huggingface",
"models": {
"reasoning_primary": {
"model_id": "Qwen/Qwen2.5-7B-Instruct", # High-quality instruct model
"task": "general_reasoning",
"max_tokens": 10000,
"temperature": 0.7,
"cost_per_token": 0.000015,
"fallback": "gpt2", # Simple but guaranteed working model
"is_chat_model": True
},
"embedding_specialist": {
"model_id": "sentence-transformers/all-MiniLM-L6-v2",
"task": "embeddings",
"vector_dimensions": 384,
"purpose": "semantic_similarity",
"cost_advantage": "90%_cheaper_than_primary",
"is_chat_model": False
},
"classification_specialist": {
"model_id": "Qwen/Qwen2.5-7B-Instruct", # Use chat model for classification
"task": "intent_classification",
"max_length": 512,
"specialization": "fast_inference",
"latency_target": "<100ms",
"is_chat_model": True
},
"safety_checker": {
"model_id": "Qwen/Qwen2.5-7B-Instruct", # Use chat model for safety
"task": "content_moderation",
"confidence_threshold": 0.85,
"purpose": "bias_detection",
"is_chat_model": True
}
},
"routing_logic": {
"strategy": "task_based_routing",
"fallback_chain": ["primary", "fallback", "degraded_mode"],
"load_balancing": "round_robin_with_health_check"
},
"zero_gpu_task_mapping": {
"intent_classification": "classification",
"embedding_generation": "embedding",
"safety_check": "general",
"general_reasoning": "reasoning",
"response_synthesis": "general"
}
}
|