Upload 60 files
Browse filesInitial upload of project
This view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +36 -0
- .gitignore +26 -0
- .python-version +1 -0
- README.md +180 -0
- app.py +0 -0
- mcp_api_call.py +39 -0
- mcp_hub/__init__.py +20 -0
- mcp_hub/__pycache__/__init__.cpython-312.pyc +0 -0
- mcp_hub/__pycache__/cache_utils.cpython-312.pyc +0 -0
- mcp_hub/__pycache__/config.cpython-312.pyc +0 -0
- mcp_hub/__pycache__/exceptions.cpython-312.pyc +0 -0
- mcp_hub/__pycache__/health_monitoring.cpython-312.pyc +0 -0
- mcp_hub/__pycache__/logging_config.cpython-312.pyc +0 -0
- mcp_hub/__pycache__/package_utils.cpython-312.pyc +0 -0
- mcp_hub/__pycache__/performance_monitoring.cpython-312.pyc +0 -0
- mcp_hub/__pycache__/reliability_utils.cpython-312.pyc +0 -0
- mcp_hub/__pycache__/sandbox_pool.cpython-312.pyc +0 -0
- mcp_hub/__pycache__/utils.cpython-312.pyc +0 -0
- mcp_hub/advanced_config.py +272 -0
- mcp_hub/async_utils.py +95 -0
- mcp_hub/cache_utils.py +211 -0
- mcp_hub/config.py +120 -0
- mcp_hub/exceptions.py +28 -0
- mcp_hub/health_monitoring.py +261 -0
- mcp_hub/logging_config.py +51 -0
- mcp_hub/package_utils.py +192 -0
- mcp_hub/performance_monitoring.py +232 -0
- mcp_hub/reliability_utils.py +254 -0
- mcp_hub/sandbox_pool.py +701 -0
- mcp_hub/utils.py +439 -0
- pyproject.toml +28 -0
- pytest.ini +11 -0
- requirements.txt +11 -0
- tests/__init__.py +1 -0
- tests/__pycache__/__init__.cpython-312.pyc +0 -0
- tests/__pycache__/conftest.cpython-312-pytest-8.4.0.pyc +0 -0
- tests/conftest.py +142 -0
- tests/integration/__init__.py +1 -0
- tests/integration/__pycache__/__init__.cpython-312.pyc +0 -0
- tests/integration/__pycache__/test_async_sync_error_handling.cpython-312-pytest-8.4.0.pyc +0 -0
- tests/integration/__pycache__/test_end_to_end_workflow.cpython-312-pytest-8.4.0.pyc +0 -0
- tests/integration/__pycache__/test_performance_resources.cpython-312-pytest-8.4.0.pyc +0 -0
- tests/integration/__pycache__/test_ui_endpoints.cpython-312-pytest-8.4.0.pyc +0 -0
- tests/unit/__init__.py +1 -0
- tests/unit/__pycache__/__init__.cpython-312.pyc +0 -0
- tests/unit/__pycache__/test_citation_formatter_agent.cpython-312-pytest-8.4.0.pyc +0 -0
- tests/unit/__pycache__/test_code_generator_agent.cpython-312-pytest-8.4.0.pyc +0 -0
- tests/unit/__pycache__/test_code_runner_agent.cpython-312-pytest-8.4.0.pyc +0 -0
- tests/unit/__pycache__/test_llm_processor_agent.cpython-312-pytest-8.4.0.pyc +0 -0
- tests/unit/__pycache__/test_orchestrator_agent.cpython-312-pytest-8.4.0.pyc +0 -0
.gitattributes
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
*.png filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python-generated files
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[oc]
|
| 4 |
+
build/
|
| 5 |
+
dist/
|
| 6 |
+
wheels/
|
| 7 |
+
*.egg-info
|
| 8 |
+
|
| 9 |
+
# Virtual environments
|
| 10 |
+
.venv
|
| 11 |
+
.cadence
|
| 12 |
+
.env
|
| 13 |
+
.idea
|
| 14 |
+
.mypy_cache/
|
| 15 |
+
archive/
|
| 16 |
+
cache
|
| 17 |
+
logs
|
| 18 |
+
|
| 19 |
+
# Test-generated files
|
| 20 |
+
test_cache/
|
| 21 |
+
.coverage
|
| 22 |
+
htmlcov/
|
| 23 |
+
.pytest_cache/
|
| 24 |
+
.ruff_cache
|
| 25 |
+
assets
|
| 26 |
+
static
|
.python-version
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
3.12
|
README.md
ADDED
|
@@ -0,0 +1,180 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: ShallowCodeResearch
|
| 3 |
+
emoji: 📉
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: pink
|
| 6 |
+
sdk: gradio
|
| 7 |
+
sdk_version: 5.33.0
|
| 8 |
+
app_file: app.py
|
| 9 |
+
pinned: false
|
| 10 |
+
short_description: Coding research assistant that generates code and tests it
|
| 11 |
+
tags:
|
| 12 |
+
- mcp
|
| 13 |
+
- multi-agent
|
| 14 |
+
- research
|
| 15 |
+
- code-generation
|
| 16 |
+
- ai-assistant
|
| 17 |
+
- gradio
|
| 18 |
+
- python
|
| 19 |
+
- web-search
|
| 20 |
+
- llm
|
| 21 |
+
- modal
|
| 22 |
+
python_version: "3.12"
|
| 23 |
+
---
|
| 24 |
+
---
|
| 25 |
+
|
| 26 |
+
# MCP Hub - Multi-Agent AI Research & Code Assistant
|
| 27 |
+
|
| 28 |
+
🚀 **Advanced multi-agent system for AI-powered research and code generation**
|
| 29 |
+
|
| 30 |
+
## What is MCP Hub?
|
| 31 |
+
|
| 32 |
+
MCP Hub is a sophisticated multi-agent research and code assistant built using Gradio's Model Context Protocol (MCP) server functionality. It orchestrates specialized AI agents to provide comprehensive research capabilities and generate executable Python code.
|
| 33 |
+
|
| 34 |
+
## ✨ Key Features
|
| 35 |
+
|
| 36 |
+
- 🧠 **Multi-Agent Architecture**: Specialized agents working in orchestrated workflows
|
| 37 |
+
- 🔍 **Intelligent Research**: Web search with automatic summarization and citation formatting
|
| 38 |
+
- 💻 **Code Generation**: Context-aware Python code creation with secure execution
|
| 39 |
+
- 🔗 **MCP Server**: Built-in MCP server for seamless agent communication
|
| 40 |
+
- 🎯 **Multiple LLM Support**: Compatible with Nebius, OpenAI, Anthropic, and HuggingFace
|
| 41 |
+
- 🛡️ **Secure Execution**: Modal sandbox environment for safe code execution
|
| 42 |
+
- 📊 **Performance Monitoring**: Advanced metrics collection and health monitoring
|
| 43 |
+
|
| 44 |
+
## 🚀 Quick Start
|
| 45 |
+
|
| 46 |
+
1. **Configure your environment** by setting up API keys in the Settings tab
|
| 47 |
+
2. **Choose your LLM provider** (Nebius recommended for best performance)
|
| 48 |
+
3. **Input your research query** in the Orchestrator Flow tab
|
| 49 |
+
4. **Watch the magic happen** as agents collaborate to research and generate code
|
| 50 |
+
|
| 51 |
+
## 🏗️ Architecture
|
| 52 |
+
|
| 53 |
+
### Core Agents
|
| 54 |
+
|
| 55 |
+
- **Question Enhancer**: Breaks down complex queries into focused sub-questions
|
| 56 |
+
- **Web Search Agent**: Performs targeted searches using Tavily API
|
| 57 |
+
- **LLM Processor**: Handles text processing, summarization, and analysis
|
| 58 |
+
- **Citation Formatter**: Manages academic citation formatting (APA style)
|
| 59 |
+
- **Code Generator**: Creates contextually-aware Python code
|
| 60 |
+
- **Code Runner**: Executes code in secure Modal sandboxes
|
| 61 |
+
- **Orchestrator**: Coordinates the complete workflow
|
| 62 |
+
|
| 63 |
+
### Workflow Example
|
| 64 |
+
|
| 65 |
+
```
|
| 66 |
+
User Query: "Create Python code to analyze Twitter sentiment"
|
| 67 |
+
↓
|
| 68 |
+
Question Enhancement: Split into focused sub-questions
|
| 69 |
+
↓
|
| 70 |
+
Web Research: Search for Twitter APIs, sentiment libraries, examples
|
| 71 |
+
↓
|
| 72 |
+
Context Integration: Combine research into comprehensive context
|
| 73 |
+
↓
|
| 74 |
+
Code Generation: Create executable Python script
|
| 75 |
+
↓
|
| 76 |
+
Secure Execution: Run code in Modal sandbox
|
| 77 |
+
↓
|
| 78 |
+
Results: Code + output + research summary + citations
|
| 79 |
+
```
|
| 80 |
+
|
| 81 |
+
## 🛠️ Setup Requirements
|
| 82 |
+
|
| 83 |
+
### Required API Keys
|
| 84 |
+
|
| 85 |
+
- **LLM Provider** (choose one):
|
| 86 |
+
- Nebius API (recommended)
|
| 87 |
+
- OpenAI API
|
| 88 |
+
- Anthropic API
|
| 89 |
+
- HuggingFace Inference API
|
| 90 |
+
- **Tavily API** (for web search)
|
| 91 |
+
- **Modal Account** (for code execution)
|
| 92 |
+
|
| 93 |
+
### Environment Configuration
|
| 94 |
+
|
| 95 |
+
Set these environment variables or configure in the app:
|
| 96 |
+
|
| 97 |
+
```bash
|
| 98 |
+
LLM_PROVIDER=nebius # Your chosen provider
|
| 99 |
+
NEBIUS_API_KEY=your_key_here
|
| 100 |
+
TAVILY_API_KEY=your_key_here
|
| 101 |
+
# Modal setup handled automatically
|
| 102 |
+
```
|
| 103 |
+
|
| 104 |
+
## 🎯 Use Cases
|
| 105 |
+
|
| 106 |
+
### Research & Development
|
| 107 |
+
- **Academic Research**: Automated literature review and citation management
|
| 108 |
+
- **Technical Documentation**: Generate comprehensive guides with current information
|
| 109 |
+
- **Market Analysis**: Research trends and generate analytical reports
|
| 110 |
+
|
| 111 |
+
### Code Generation
|
| 112 |
+
- **Prototype Development**: Rapidly create functional code based on requirements
|
| 113 |
+
- **API Integration**: Generate code for working with various APIs and services
|
| 114 |
+
- **Data Analysis**: Create scripts for data processing and visualization
|
| 115 |
+
|
| 116 |
+
### Learning & Education
|
| 117 |
+
- **Code Examples**: Generate educational code samples with explanations
|
| 118 |
+
- **Concept Exploration**: Research and understand complex programming concepts
|
| 119 |
+
- **Best Practices**: Learn current industry standards and methodologies
|
| 120 |
+
|
| 121 |
+
## 🔧 Advanced Features
|
| 122 |
+
|
| 123 |
+
### Performance Monitoring
|
| 124 |
+
- Real-time metrics collection
|
| 125 |
+
- Response time tracking
|
| 126 |
+
- Success rate monitoring
|
| 127 |
+
- Resource usage analytics
|
| 128 |
+
|
| 129 |
+
### Intelligent Caching
|
| 130 |
+
- Reduces redundant API calls
|
| 131 |
+
- Improves response times
|
| 132 |
+
- Configurable TTL settings
|
| 133 |
+
|
| 134 |
+
### Fault Tolerance
|
| 135 |
+
- Circuit breaker protection
|
| 136 |
+
- Rate limiting management
|
| 137 |
+
- Graceful error handling
|
| 138 |
+
- Automatic retry mechanisms
|
| 139 |
+
|
| 140 |
+
### Sandbox Pool Management
|
| 141 |
+
- Pre-warmed execution environments
|
| 142 |
+
- Optimized performance
|
| 143 |
+
- Resource pooling
|
| 144 |
+
- Automatic scaling
|
| 145 |
+
|
| 146 |
+
## 📱 Interface Tabs
|
| 147 |
+
|
| 148 |
+
1. **Orchestrator Flow**: Complete end-to-end workflow
|
| 149 |
+
2. **Individual Agents**: Access each agent separately for specific tasks
|
| 150 |
+
3. **Advanced Features**: System monitoring and performance analytics
|
| 151 |
+
|
| 152 |
+
## 🤝 MCP Integration
|
| 153 |
+
|
| 154 |
+
This application demonstrates advanced MCP (Model Context Protocol) implementation:
|
| 155 |
+
|
| 156 |
+
- **Server Architecture**: Full MCP server with schema generation
|
| 157 |
+
- **Function Registry**: Proper MCP function definitions with typing
|
| 158 |
+
- **Multi-Agent Communication**: Structured data flow between agents
|
| 159 |
+
- **Error Handling**: Robust error management across agent interactions
|
| 160 |
+
|
| 161 |
+
## 📊 Performance
|
| 162 |
+
|
| 163 |
+
- **Response Times**: Optimized for sub-second agent responses
|
| 164 |
+
- **Scalability**: Handles concurrent requests efficiently
|
| 165 |
+
- **Reliability**: Built-in fault tolerance and monitoring
|
| 166 |
+
- **Resource Management**: Intelligent caching and pooling
|
| 167 |
+
|
| 168 |
+
## 🔍 Technical Details
|
| 169 |
+
|
| 170 |
+
- **Python**: 3.12+ required
|
| 171 |
+
- **Framework**: Gradio with MCP server capabilities
|
| 172 |
+
- **Execution**: Modal for secure sandboxed code execution
|
| 173 |
+
- **Search**: Tavily API for real-time web research
|
| 174 |
+
- **Monitoring**: Comprehensive performance and health tracking
|
| 175 |
+
|
| 176 |
+
---
|
| 177 |
+
|
| 178 |
+
**Ready to experience the future of AI-assisted research and development?**
|
| 179 |
+
|
| 180 |
+
Start by configuring your API keys and dive into the world of multi-agent AI collaboration! 🚀
|
app.py
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
mcp_api_call.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from gradio_client import Client
|
| 2 |
+
|
| 3 |
+
def print_human_readable_result(result):
|
| 4 |
+
# Print main request and status
|
| 5 |
+
if isinstance(result, tuple):
|
| 6 |
+
result = next((item for item in result if isinstance(item, dict)), result[0])
|
| 7 |
+
print("Status:", result.get('status', 'N/A'))
|
| 8 |
+
print("Status:", result.get('status', 'N/A'))
|
| 9 |
+
print("User Request:", result.get('user_request', 'N/A'))
|
| 10 |
+
print("\nSub-Questions:")
|
| 11 |
+
for i, sub_q in enumerate(result.get('sub_questions', []), 1):
|
| 12 |
+
print(f" {i}. {sub_q}")
|
| 13 |
+
|
| 14 |
+
print("\nSearch Summaries:")
|
| 15 |
+
for i, summary in enumerate(result.get('search_summaries', []), 1):
|
| 16 |
+
print(f" {i}. {summary}")
|
| 17 |
+
|
| 18 |
+
print("\nSearch Results:")
|
| 19 |
+
for i, res in enumerate(result.get('search_results', []), 1):
|
| 20 |
+
print(f" {i}. {res['title']}\n URL: {res['url']}\n Content: {res['content'][:100]}{'...' if len(res['content']) > 100 else ''}\n Score: {res['score']:.3f}")
|
| 21 |
+
|
| 22 |
+
print("\nGenerated Code:\n" + result.get('code_string', 'N/A'))
|
| 23 |
+
|
| 24 |
+
print("\nExecution Output:\n" + result.get('execution_output', 'N/A'))
|
| 25 |
+
|
| 26 |
+
print("\nCitations:")
|
| 27 |
+
for i, cit in enumerate(result.get('citations', []), 1):
|
| 28 |
+
print(f" {i}. {cit}")
|
| 29 |
+
|
| 30 |
+
print("\nFinal Summary:\n" + result.get('final_summary', 'N/A'))
|
| 31 |
+
|
| 32 |
+
print("\nOrchestration Message:", result.get('message', 'N/A'))
|
| 33 |
+
|
| 34 |
+
client = Client("http://127.0.0.1:7860/")
|
| 35 |
+
result = client.predict(
|
| 36 |
+
user_request="How do I calculate the sum of an array in Python?",
|
| 37 |
+
api_name="/process_orchestrator_request"
|
| 38 |
+
)
|
| 39 |
+
print_human_readable_result(result)
|
mcp_hub/__init__.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""MCP Hub - Multi-Agent Communication Protocol Hub for Research and Code Generation."""
|
| 2 |
+
|
| 3 |
+
__version__ = "1.0.0"
|
| 4 |
+
__author__ = "Your Name"
|
| 5 |
+
__description__ = "Advanced MCP Hub with intelligent agent orchestration"
|
| 6 |
+
|
| 7 |
+
# Core imports that should be available at package level
|
| 8 |
+
try:
|
| 9 |
+
from .config import api_config, model_config, app_config
|
| 10 |
+
from .exceptions import APIError, ValidationError, CodeGenerationError, CodeExecutionError
|
| 11 |
+
from .logging_config import logger
|
| 12 |
+
|
| 13 |
+
__all__ = [
|
| 14 |
+
"api_config", "model_config", "app_config",
|
| 15 |
+
"APIError", "ValidationError", "CodeGenerationError", "CodeExecutionError",
|
| 16 |
+
"logger"
|
| 17 |
+
]
|
| 18 |
+
except ImportError:
|
| 19 |
+
# Graceful degradation for missing dependencies
|
| 20 |
+
__all__ = []
|
mcp_hub/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (818 Bytes). View file
|
|
|
mcp_hub/__pycache__/cache_utils.cpython-312.pyc
ADDED
|
Binary file (11 kB). View file
|
|
|
mcp_hub/__pycache__/config.cpython-312.pyc
ADDED
|
Binary file (5.42 kB). View file
|
|
|
mcp_hub/__pycache__/exceptions.cpython-312.pyc
ADDED
|
Binary file (1.83 kB). View file
|
|
|
mcp_hub/__pycache__/health_monitoring.cpython-312.pyc
ADDED
|
Binary file (11.2 kB). View file
|
|
|
mcp_hub/__pycache__/logging_config.cpython-312.pyc
ADDED
|
Binary file (2.1 kB). View file
|
|
|
mcp_hub/__pycache__/package_utils.cpython-312.pyc
ADDED
|
Binary file (6.79 kB). View file
|
|
|
mcp_hub/__pycache__/performance_monitoring.cpython-312.pyc
ADDED
|
Binary file (13.3 kB). View file
|
|
|
mcp_hub/__pycache__/reliability_utils.cpython-312.pyc
ADDED
|
Binary file (11.6 kB). View file
|
|
|
mcp_hub/__pycache__/sandbox_pool.cpython-312.pyc
ADDED
|
Binary file (37.9 kB). View file
|
|
|
mcp_hub/__pycache__/utils.cpython-312.pyc
ADDED
|
Binary file (19.1 kB). View file
|
|
|
mcp_hub/advanced_config.py
ADDED
|
@@ -0,0 +1,272 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Advanced configuration management with validation and environment-specific settings."""
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
import json
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
from typing import Dict, Any, Optional
|
| 7 |
+
from dataclasses import dataclass, field
|
| 8 |
+
from .logging_config import logger
|
| 9 |
+
|
| 10 |
+
@dataclass
|
| 11 |
+
class APIConfig:
|
| 12 |
+
"""API configuration with validation."""
|
| 13 |
+
nebius_api_key: str = ""
|
| 14 |
+
nebius_base_url: str = "https://api.studio.nebius.ai/v1/"
|
| 15 |
+
tavily_api_key: str = ""
|
| 16 |
+
|
| 17 |
+
# API-specific settings
|
| 18 |
+
nebius_model: str = "meta-llama/Meta-Llama-3.1-8B-Instruct"
|
| 19 |
+
nebius_max_tokens: int = 1000
|
| 20 |
+
nebius_temperature: float = 0.7
|
| 21 |
+
|
| 22 |
+
tavily_search_depth: str = "basic"
|
| 23 |
+
tavily_max_results: int = 5
|
| 24 |
+
|
| 25 |
+
def __post_init__(self):
|
| 26 |
+
"""Validate configuration after initialization."""
|
| 27 |
+
if not self.nebius_api_key:
|
| 28 |
+
raise ValueError("NEBIUS_API_KEY is required")
|
| 29 |
+
if not self.tavily_api_key:
|
| 30 |
+
raise ValueError("TAVILY_API_KEY is required")
|
| 31 |
+
|
| 32 |
+
# Validate numeric ranges
|
| 33 |
+
if not 0.0 <= self.nebius_temperature <= 2.0:
|
| 34 |
+
raise ValueError("nebius_temperature must be between 0.0 and 2.0")
|
| 35 |
+
if self.nebius_max_tokens <= 0:
|
| 36 |
+
raise ValueError("nebius_max_tokens must be positive")
|
| 37 |
+
if self.tavily_max_results <= 0:
|
| 38 |
+
raise ValueError("tavily_max_results must be positive")
|
| 39 |
+
|
| 40 |
+
@dataclass
|
| 41 |
+
class AppConfig:
|
| 42 |
+
"""Application configuration."""
|
| 43 |
+
environment: str = "development" # development, staging, production
|
| 44 |
+
debug: bool = True
|
| 45 |
+
log_level: str = "INFO"
|
| 46 |
+
|
| 47 |
+
# Gradio settings
|
| 48 |
+
gradio_server_name: str = "0.0.0.0"
|
| 49 |
+
gradio_server_port: int = 7860
|
| 50 |
+
gradio_share: bool = False
|
| 51 |
+
gradio_auth: Optional[tuple] = None
|
| 52 |
+
|
| 53 |
+
# Performance settings
|
| 54 |
+
max_search_results: int = 10
|
| 55 |
+
max_sub_questions: int = 5
|
| 56 |
+
cache_ttl_seconds: int = 3600
|
| 57 |
+
request_timeout_seconds: int = 30
|
| 58 |
+
|
| 59 |
+
# Rate limiting
|
| 60 |
+
api_calls_per_second: float = 2.0
|
| 61 |
+
api_burst_size: int = 5
|
| 62 |
+
|
| 63 |
+
# Circuit breaker settings
|
| 64 |
+
circuit_breaker_failure_threshold: int = 5
|
| 65 |
+
circuit_breaker_timeout_seconds: int = 60
|
| 66 |
+
|
| 67 |
+
# Monitoring settings
|
| 68 |
+
metrics_retention_hours: int = 24
|
| 69 |
+
health_check_interval_seconds: int = 300 # 5 minutes
|
| 70 |
+
|
| 71 |
+
def __post_init__(self):
|
| 72 |
+
"""Validate application configuration."""
|
| 73 |
+
valid_environments = ["development", "staging", "production"]
|
| 74 |
+
if self.environment not in valid_environments:
|
| 75 |
+
raise ValueError(f"environment must be one of: {valid_environments}")
|
| 76 |
+
|
| 77 |
+
valid_log_levels = ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]
|
| 78 |
+
if self.log_level not in valid_log_levels:
|
| 79 |
+
raise ValueError(f"log_level must be one of: {valid_log_levels}")
|
| 80 |
+
|
| 81 |
+
if self.gradio_server_port <= 0 or self.gradio_server_port > 65535:
|
| 82 |
+
raise ValueError("gradio_server_port must be between 1 and 65535")
|
| 83 |
+
|
| 84 |
+
@dataclass
|
| 85 |
+
class SecurityConfig:
|
| 86 |
+
"""Security configuration."""
|
| 87 |
+
enable_authentication: bool = False
|
| 88 |
+
allowed_origins: list = field(default_factory=lambda: ["*"])
|
| 89 |
+
api_key_header: str = "X-API-Key"
|
| 90 |
+
rate_limit_per_ip: int = 100 # requests per hour
|
| 91 |
+
max_request_size_mb: int = 10
|
| 92 |
+
|
| 93 |
+
# Content filtering
|
| 94 |
+
enable_content_filtering: bool = True
|
| 95 |
+
blocked_patterns: list = field(default_factory=list)
|
| 96 |
+
|
| 97 |
+
def __post_init__(self):
|
| 98 |
+
"""Validate security configuration."""
|
| 99 |
+
if self.rate_limit_per_ip <= 0:
|
| 100 |
+
raise ValueError("rate_limit_per_ip must be positive")
|
| 101 |
+
if self.max_request_size_mb <= 0:
|
| 102 |
+
raise ValueError("max_request_size_mb must be positive")
|
| 103 |
+
|
| 104 |
+
class ConfigManager:
|
| 105 |
+
"""Centralized configuration management with environment-specific overrides."""
|
| 106 |
+
|
| 107 |
+
def __init__(self, config_dir: str = "config"):
|
| 108 |
+
"""
|
| 109 |
+
Initialize configuration manager.
|
| 110 |
+
|
| 111 |
+
Args:
|
| 112 |
+
config_dir: Directory containing configuration files
|
| 113 |
+
"""
|
| 114 |
+
self.config_dir = Path(config_dir)
|
| 115 |
+
self.config_dir.mkdir(exist_ok=True)
|
| 116 |
+
|
| 117 |
+
# Load environment variables
|
| 118 |
+
self._load_environment_variables()
|
| 119 |
+
|
| 120 |
+
# Initialize configurations
|
| 121 |
+
self.api_config = self._load_api_config()
|
| 122 |
+
self.app_config = self._load_app_config()
|
| 123 |
+
self.security_config = self._load_security_config()
|
| 124 |
+
|
| 125 |
+
logger.info(f"Configuration loaded for environment: {self.app_config.environment}")
|
| 126 |
+
|
| 127 |
+
def _load_environment_variables(self):
|
| 128 |
+
"""Load environment variables from .env file if it exists."""
|
| 129 |
+
env_file = Path(".env")
|
| 130 |
+
if env_file.exists():
|
| 131 |
+
from dotenv import load_dotenv
|
| 132 |
+
load_dotenv()
|
| 133 |
+
logger.info("Loaded environment variables from .env file")
|
| 134 |
+
|
| 135 |
+
def _load_api_config(self) -> APIConfig:
|
| 136 |
+
"""Load API configuration from environment and config files."""
|
| 137 |
+
# Start with environment variables
|
| 138 |
+
config_data = {
|
| 139 |
+
"nebius_api_key": os.getenv("NEBIUS_API_KEY", ""),
|
| 140 |
+
"nebius_base_url": os.getenv("NEBIUS_BASE_URL", "https://api.studio.nebius.ai/v1/"),
|
| 141 |
+
"tavily_api_key": os.getenv("TAVILY_API_KEY", ""),
|
| 142 |
+
"nebius_model": os.getenv("NEBIUS_MODEL", "meta-llama/Meta-Llama-3.1-8B-Instruct"),
|
| 143 |
+
"nebius_max_tokens": int(os.getenv("NEBIUS_MAX_TOKENS", "1000")),
|
| 144 |
+
"nebius_temperature": float(os.getenv("NEBIUS_TEMPERATURE", "0.7")),
|
| 145 |
+
"tavily_search_depth": os.getenv("TAVILY_SEARCH_DEPTH", "basic"),
|
| 146 |
+
"tavily_max_results": int(os.getenv("TAVILY_MAX_RESULTS", "5"))
|
| 147 |
+
}
|
| 148 |
+
|
| 149 |
+
# Override with config file if it exists
|
| 150 |
+
config_file = self.config_dir / "api_config.json"
|
| 151 |
+
if config_file.exists():
|
| 152 |
+
try:
|
| 153 |
+
with open(config_file, 'r') as f:
|
| 154 |
+
file_config = json.load(f)
|
| 155 |
+
config_data.update(file_config)
|
| 156 |
+
logger.info("Loaded API configuration from config file")
|
| 157 |
+
except Exception as e:
|
| 158 |
+
logger.warning(f"Failed to load API config file: {e}")
|
| 159 |
+
|
| 160 |
+
return APIConfig(**config_data)
|
| 161 |
+
|
| 162 |
+
def _load_app_config(self) -> AppConfig:
|
| 163 |
+
"""Load application configuration."""
|
| 164 |
+
environment = os.getenv("ENVIRONMENT", "development")
|
| 165 |
+
|
| 166 |
+
# Base configuration
|
| 167 |
+
config_data = {
|
| 168 |
+
"environment": environment,
|
| 169 |
+
"debug": environment == "development",
|
| 170 |
+
"log_level": os.getenv("LOG_LEVEL", "INFO"),
|
| 171 |
+
"gradio_server_name": os.getenv("GRADIO_SERVER_NAME", "0.0.0.0"),
|
| 172 |
+
"gradio_server_port": int(os.getenv("GRADIO_SERVER_PORT", "7860")),
|
| 173 |
+
"gradio_share": os.getenv("GRADIO_SHARE", "false").lower() == "true",
|
| 174 |
+
"max_search_results": int(os.getenv("MAX_SEARCH_RESULTS", "10")),
|
| 175 |
+
"max_sub_questions": int(os.getenv("MAX_SUB_QUESTIONS", "5")),
|
| 176 |
+
"cache_ttl_seconds": int(os.getenv("CACHE_TTL_SECONDS", "3600")),
|
| 177 |
+
"request_timeout_seconds": int(os.getenv("REQUEST_TIMEOUT_SECONDS", "30"))
|
| 178 |
+
}
|
| 179 |
+
|
| 180 |
+
# Environment-specific overrides
|
| 181 |
+
env_config_file = self.config_dir / f"app_config_{environment}.json"
|
| 182 |
+
if env_config_file.exists():
|
| 183 |
+
try:
|
| 184 |
+
with open(env_config_file, 'r') as f:
|
| 185 |
+
env_config = json.load(f)
|
| 186 |
+
config_data.update(env_config)
|
| 187 |
+
logger.info(f"Loaded environment-specific config: {environment}")
|
| 188 |
+
except Exception as e:
|
| 189 |
+
logger.warning(f"Failed to load environment config: {e}")
|
| 190 |
+
|
| 191 |
+
return AppConfig(**config_data)
|
| 192 |
+
|
| 193 |
+
def _load_security_config(self) -> SecurityConfig:
|
| 194 |
+
"""Load security configuration."""
|
| 195 |
+
config_data = {
|
| 196 |
+
"enable_authentication": os.getenv("ENABLE_AUTH", "false").lower() == "true",
|
| 197 |
+
"rate_limit_per_ip": int(os.getenv("RATE_LIMIT_PER_IP", "100")),
|
| 198 |
+
"max_request_size_mb": int(os.getenv("MAX_REQUEST_SIZE_MB", "10")),
|
| 199 |
+
"enable_content_filtering": os.getenv("ENABLE_CONTENT_FILTERING", "true").lower() == "true"
|
| 200 |
+
}
|
| 201 |
+
|
| 202 |
+
# Load from config file
|
| 203 |
+
config_file = self.config_dir / "security_config.json"
|
| 204 |
+
if config_file.exists():
|
| 205 |
+
try:
|
| 206 |
+
with open(config_file, 'r') as f:
|
| 207 |
+
file_config = json.load(f)
|
| 208 |
+
config_data.update(file_config)
|
| 209 |
+
logger.info("Loaded security configuration from config file")
|
| 210 |
+
except Exception as e:
|
| 211 |
+
logger.warning(f"Failed to load security config: {e}")
|
| 212 |
+
|
| 213 |
+
return SecurityConfig(**config_data)
|
| 214 |
+
|
| 215 |
+
def save_config_template(self):
|
| 216 |
+
"""Save configuration templates for easy editing."""
|
| 217 |
+
templates = {
|
| 218 |
+
"api_config.json": {
|
| 219 |
+
"nebius_model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
| 220 |
+
"nebius_max_tokens": 1000,
|
| 221 |
+
"nebius_temperature": 0.7,
|
| 222 |
+
"tavily_search_depth": "basic",
|
| 223 |
+
"tavily_max_results": 5
|
| 224 |
+
},
|
| 225 |
+
"app_config_development.json": {
|
| 226 |
+
"debug": True,
|
| 227 |
+
"log_level": "DEBUG",
|
| 228 |
+
"gradio_share": False,
|
| 229 |
+
"max_search_results": 5
|
| 230 |
+
},
|
| 231 |
+
"app_config_production.json": {
|
| 232 |
+
"debug": False,
|
| 233 |
+
"log_level": "INFO",
|
| 234 |
+
"gradio_share": False,
|
| 235 |
+
"max_search_results": 10,
|
| 236 |
+
"cache_ttl_seconds": 7200
|
| 237 |
+
},
|
| 238 |
+
"security_config.json": {
|
| 239 |
+
"enable_authentication": False,
|
| 240 |
+
"allowed_origins": ["*"],
|
| 241 |
+
"rate_limit_per_ip": 100,
|
| 242 |
+
"enable_content_filtering": True,
|
| 243 |
+
"blocked_patterns": []
|
| 244 |
+
}
|
| 245 |
+
}
|
| 246 |
+
|
| 247 |
+
for filename, template in templates.items():
|
| 248 |
+
config_file = self.config_dir / filename
|
| 249 |
+
if not config_file.exists():
|
| 250 |
+
try:
|
| 251 |
+
with open(config_file, 'w') as f:
|
| 252 |
+
json.dump(template, f, indent=2)
|
| 253 |
+
logger.info(f"Created config template: {filename}")
|
| 254 |
+
except Exception as e:
|
| 255 |
+
logger.error(f"Failed to create config template {filename}: {e}")
|
| 256 |
+
|
| 257 |
+
def get_config_summary(self) -> Dict[str, Any]:
|
| 258 |
+
"""Get a summary of current configuration (without sensitive data)."""
|
| 259 |
+
return {
|
| 260 |
+
"environment": self.app_config.environment,
|
| 261 |
+
"debug_mode": self.app_config.debug,
|
| 262 |
+
"log_level": self.app_config.log_level,
|
| 263 |
+
"gradio_port": self.app_config.gradio_server_port,
|
| 264 |
+
"cache_ttl": self.app_config.cache_ttl_seconds,
|
| 265 |
+
"max_search_results": self.app_config.max_search_results,
|
| 266 |
+
"authentication_enabled": self.security_config.enable_authentication,
|
| 267 |
+
"content_filtering_enabled": self.security_config.enable_content_filtering,
|
| 268 |
+
"api_endpoints": {
|
| 269 |
+
"nebius": bool(self.api_config.nebius_api_key),
|
| 270 |
+
"tavily": bool(self.api_config.tavily_api_key)
|
| 271 |
+
}
|
| 272 |
+
}
|
mcp_hub/async_utils.py
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Async utilities for improved performance in concurrent operations."""
|
| 2 |
+
|
| 3 |
+
import asyncio
|
| 4 |
+
import aiohttp
|
| 5 |
+
from typing import Dict, Any, List
|
| 6 |
+
from concurrent.futures import ThreadPoolExecutor
|
| 7 |
+
from .config import api_config, app_config
|
| 8 |
+
from .exceptions import APIError
|
| 9 |
+
from .logging_config import logger
|
| 10 |
+
|
| 11 |
+
class AsyncWebSearchAgent:
|
| 12 |
+
"""Async version of web search for concurrent operations."""
|
| 13 |
+
|
| 14 |
+
def __init__(self):
|
| 15 |
+
self.session = None
|
| 16 |
+
|
| 17 |
+
async def __aenter__(self):
|
| 18 |
+
"""Async context manager entry."""
|
| 19 |
+
self.session = aiohttp.ClientSession()
|
| 20 |
+
return self
|
| 21 |
+
|
| 22 |
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
| 23 |
+
"""Async context manager exit."""
|
| 24 |
+
if self.session:
|
| 25 |
+
await self.session.close()
|
| 26 |
+
|
| 27 |
+
async def search_multiple_queries(self, queries: List[str]) -> List[Dict[str, Any]]:
|
| 28 |
+
"""Search multiple queries concurrently."""
|
| 29 |
+
if not self.session:
|
| 30 |
+
raise APIError("AsyncWebSearch", "Session not initialized. Use as async context manager.")
|
| 31 |
+
|
| 32 |
+
logger.info(f"Starting concurrent search for {len(queries)} queries")
|
| 33 |
+
|
| 34 |
+
# Create tasks for concurrent execution
|
| 35 |
+
tasks = [self._search_single_query(query) for query in queries]
|
| 36 |
+
|
| 37 |
+
# Execute all searches concurrently
|
| 38 |
+
results = await asyncio.gather(*tasks, return_exceptions=True)
|
| 39 |
+
|
| 40 |
+
# Process results and handle any exceptions
|
| 41 |
+
processed_results = []
|
| 42 |
+
for i, result in enumerate(results):
|
| 43 |
+
if isinstance(result, Exception):
|
| 44 |
+
logger.error(f"Search failed for query {i}: {str(result)}")
|
| 45 |
+
processed_results.append({
|
| 46 |
+
"error": str(result),
|
| 47 |
+
"query": queries[i],
|
| 48 |
+
"results": []
|
| 49 |
+
})
|
| 50 |
+
else:
|
| 51 |
+
processed_results.append(result)
|
| 52 |
+
|
| 53 |
+
logger.info(f"Completed concurrent searches: {len([r for r in processed_results if not r.get('error')])} successful")
|
| 54 |
+
return processed_results
|
| 55 |
+
|
| 56 |
+
async def _search_single_query(self, query: str) -> Dict[str, Any]:
|
| 57 |
+
"""Search a single query using Tavily API."""
|
| 58 |
+
try:
|
| 59 |
+
# In a real implementation, you'd make async HTTP calls to Tavily
|
| 60 |
+
# For now, we'll use the sync version in a thread pool
|
| 61 |
+
from tavily import TavilyClient
|
| 62 |
+
client = TavilyClient(api_key=api_config.tavily_api_key)
|
| 63 |
+
|
| 64 |
+
# Run sync operation in thread pool
|
| 65 |
+
loop = asyncio.get_event_loop()
|
| 66 |
+
with ThreadPoolExecutor() as executor:
|
| 67 |
+
response = await loop.run_in_executor(
|
| 68 |
+
executor,
|
| 69 |
+
lambda: client.search(
|
| 70 |
+
query=query,
|
| 71 |
+
search_depth="basic",
|
| 72 |
+
max_results=app_config.max_search_results,
|
| 73 |
+
include_answer=True
|
| 74 |
+
)
|
| 75 |
+
)
|
| 76 |
+
|
| 77 |
+
return {
|
| 78 |
+
"query": response.get("query", query),
|
| 79 |
+
"tavily_answer": response.get("answer"),
|
| 80 |
+
"results": response.get("results", []),
|
| 81 |
+
"data_source": "Tavily Search API (Async)",
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
+
except Exception as e:
|
| 85 |
+
raise APIError("Tavily", f"Async search failed: {str(e)}")
|
| 86 |
+
|
| 87 |
+
async def process_subquestions_concurrently(sub_questions: List[str]) -> List[Dict[str, Any]]:
|
| 88 |
+
"""Process multiple sub-questions concurrently for better performance."""
|
| 89 |
+
logger.info(f"Processing {len(sub_questions)} sub-questions concurrently")
|
| 90 |
+
|
| 91 |
+
async with AsyncWebSearchAgent() as async_searcher:
|
| 92 |
+
# Execute all searches concurrently
|
| 93 |
+
search_results = await async_searcher.search_multiple_queries(sub_questions)
|
| 94 |
+
|
| 95 |
+
return search_results
|
mcp_hub/cache_utils.py
ADDED
|
@@ -0,0 +1,211 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Caching system for improved performance and reduced API calls."""
|
| 2 |
+
|
| 3 |
+
import hashlib
|
| 4 |
+
import json
|
| 5 |
+
import pickle
|
| 6 |
+
from datetime import datetime, timedelta
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
from typing import Any, Dict, Optional, Callable
|
| 9 |
+
from functools import wraps
|
| 10 |
+
from .logging_config import logger
|
| 11 |
+
|
| 12 |
+
class CacheManager:
|
| 13 |
+
"""Simple file-based cache manager for API responses and computations."""
|
| 14 |
+
|
| 15 |
+
def __init__(self, cache_dir: str = "cache", default_ttl: int = 3600):
|
| 16 |
+
"""
|
| 17 |
+
Initialize cache manager.
|
| 18 |
+
|
| 19 |
+
Args:
|
| 20 |
+
cache_dir: Directory to store cache files
|
| 21 |
+
default_ttl: Default time-to-live in seconds (1 hour default)
|
| 22 |
+
"""
|
| 23 |
+
self.cache_dir = Path(cache_dir)
|
| 24 |
+
self.cache_dir.mkdir(exist_ok=True)
|
| 25 |
+
self.default_ttl = default_ttl
|
| 26 |
+
logger.info(f"Cache manager initialized with directory: {self.cache_dir}")
|
| 27 |
+
|
| 28 |
+
def _get_cache_key(self, func_name: str, args: tuple, kwargs: dict) -> str:
|
| 29 |
+
"""Generate a unique cache key based on function name and arguments."""
|
| 30 |
+
# Create a string representation of arguments
|
| 31 |
+
key_data = {
|
| 32 |
+
"func": func_name,
|
| 33 |
+
"args": args,
|
| 34 |
+
"kwargs": kwargs
|
| 35 |
+
}
|
| 36 |
+
key_string = json.dumps(key_data, sort_keys=True, default=str)
|
| 37 |
+
return hashlib.md5(key_string.encode()).hexdigest()
|
| 38 |
+
|
| 39 |
+
def _get_cache_path(self, cache_key: str) -> Path:
|
| 40 |
+
"""Get the file path for a cache key."""
|
| 41 |
+
return self.cache_dir / f"{cache_key}.cache"
|
| 42 |
+
|
| 43 |
+
def get(self, cache_key: str) -> Optional[Any]:
|
| 44 |
+
"""Retrieve a value from cache if it exists and is not expired."""
|
| 45 |
+
cache_path = self._get_cache_path(cache_key)
|
| 46 |
+
|
| 47 |
+
if not cache_path.exists():
|
| 48 |
+
return None
|
| 49 |
+
|
| 50 |
+
try:
|
| 51 |
+
with open(cache_path, 'rb') as f:
|
| 52 |
+
cache_data = pickle.load(f)
|
| 53 |
+
|
| 54 |
+
# Check if cache has expired
|
| 55 |
+
if datetime.now() > cache_data['expires_at']:
|
| 56 |
+
logger.debug(f"Cache expired for key: {cache_key}")
|
| 57 |
+
cache_path.unlink() # Delete expired cache
|
| 58 |
+
return None
|
| 59 |
+
|
| 60 |
+
logger.debug(f"Cache hit for key: {cache_key}")
|
| 61 |
+
return cache_data['value']
|
| 62 |
+
|
| 63 |
+
except (EOFError, pickle.PickleError, KeyError) as e:
|
| 64 |
+
logger.warning(f"Cache corruption for key {cache_key}: {e}")
|
| 65 |
+
cache_path.unlink() # Delete corrupted cache
|
| 66 |
+
return None
|
| 67 |
+
|
| 68 |
+
def set(self, cache_key: str, value: Any, ttl: Optional[int] = None) -> None:
|
| 69 |
+
"""Store a value in cache with optional TTL."""
|
| 70 |
+
if ttl is None:
|
| 71 |
+
ttl = self.default_ttl
|
| 72 |
+
|
| 73 |
+
cache_data = {
|
| 74 |
+
'value': value,
|
| 75 |
+
'created_at': datetime.now(),
|
| 76 |
+
'expires_at': datetime.now() + timedelta(seconds=ttl)
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
cache_path = self._get_cache_path(cache_key)
|
| 80 |
+
|
| 81 |
+
try:
|
| 82 |
+
with open(cache_path, 'wb') as f:
|
| 83 |
+
pickle.dump(cache_data, f)
|
| 84 |
+
logger.debug(f"Cached value for key: {cache_key} (TTL: {ttl}s)")
|
| 85 |
+
except Exception as e:
|
| 86 |
+
logger.error(f"Failed to cache value for key {cache_key}: {e}")
|
| 87 |
+
|
| 88 |
+
def cached_call(self, func: Callable, args: tuple, kwargs: dict, ttl: Optional[int] = None) -> Any:
|
| 89 |
+
"""Make a cached function call."""
|
| 90 |
+
cache_key = self._get_cache_key(func.__name__, args, kwargs)
|
| 91 |
+
|
| 92 |
+
# Try to get from cache first
|
| 93 |
+
cached_result = self.get(cache_key)
|
| 94 |
+
if cached_result is not None:
|
| 95 |
+
return cached_result
|
| 96 |
+
|
| 97 |
+
# Execute function and cache result
|
| 98 |
+
logger.debug(f"Cache miss for {func.__name__}, executing function")
|
| 99 |
+
result = func(*args, **kwargs)
|
| 100 |
+
self.set(cache_key, result, ttl)
|
| 101 |
+
|
| 102 |
+
return result
|
| 103 |
+
|
| 104 |
+
def clear_expired(self) -> int:
|
| 105 |
+
"""Remove all expired cache files and return count of removed files."""
|
| 106 |
+
removed_count = 0
|
| 107 |
+
current_time = datetime.now()
|
| 108 |
+
|
| 109 |
+
for cache_file in self.cache_dir.glob("*.cache"):
|
| 110 |
+
try:
|
| 111 |
+
with open(cache_file, 'rb') as f:
|
| 112 |
+
cache_data = pickle.load(f)
|
| 113 |
+
|
| 114 |
+
if current_time > cache_data['expires_at']:
|
| 115 |
+
cache_file.unlink()
|
| 116 |
+
removed_count += 1
|
| 117 |
+
|
| 118 |
+
except Exception as e:
|
| 119 |
+
logger.warning(f"Error checking cache file {cache_file}: {e}")
|
| 120 |
+
cache_file.unlink() # Remove corrupted files
|
| 121 |
+
removed_count += 1
|
| 122 |
+
|
| 123 |
+
if removed_count > 0:
|
| 124 |
+
logger.info(f"Removed {removed_count} expired cache files")
|
| 125 |
+
|
| 126 |
+
return removed_count
|
| 127 |
+
|
| 128 |
+
def clear_all(self) -> int:
|
| 129 |
+
"""Remove all cache files and return count of removed files."""
|
| 130 |
+
removed_count = 0
|
| 131 |
+
for cache_file in self.cache_dir.glob("*.cache"):
|
| 132 |
+
cache_file.unlink()
|
| 133 |
+
removed_count += 1
|
| 134 |
+
|
| 135 |
+
logger.info(f"Cleared all cache: removed {removed_count} files")
|
| 136 |
+
return removed_count
|
| 137 |
+
|
| 138 |
+
def get_cache_status(self) -> Dict[str, Any]:
|
| 139 |
+
"""Get detailed status information about the cache system."""
|
| 140 |
+
try:
|
| 141 |
+
# Count cache files
|
| 142 |
+
cache_files = list(self.cache_dir.glob("*.cache"))
|
| 143 |
+
cache_count = len(cache_files)
|
| 144 |
+
|
| 145 |
+
# Calculate cache directory size
|
| 146 |
+
total_size = sum(f.stat().st_size for f in cache_files)
|
| 147 |
+
|
| 148 |
+
# Count expired files
|
| 149 |
+
expired_count = 0
|
| 150 |
+
current_time = datetime.now()
|
| 151 |
+
for cache_file in cache_files:
|
| 152 |
+
try:
|
| 153 |
+
with open(cache_file, 'rb') as f:
|
| 154 |
+
cache_data = pickle.load(f)
|
| 155 |
+
|
| 156 |
+
if current_time > cache_data['expires_at']:
|
| 157 |
+
expired_count += 1
|
| 158 |
+
except Exception:
|
| 159 |
+
expired_count += 1 # Count corrupted files as expired
|
| 160 |
+
|
| 161 |
+
# Get cache stats
|
| 162 |
+
return {
|
| 163 |
+
"status": "healthy",
|
| 164 |
+
"cache_dir": str(self.cache_dir),
|
| 165 |
+
"total_files": cache_count,
|
| 166 |
+
"expired_files": expired_count,
|
| 167 |
+
"total_size_bytes": total_size,
|
| 168 |
+
"total_size_mb": round(total_size / (1024 * 1024), 2),
|
| 169 |
+
"default_ttl_seconds": self.default_ttl,
|
| 170 |
+
"timestamp": datetime.now().isoformat()
|
| 171 |
+
}
|
| 172 |
+
except Exception as e:
|
| 173 |
+
logger.error(f"Failed to get cache status: {str(e)}")
|
| 174 |
+
return {
|
| 175 |
+
"status": "error",
|
| 176 |
+
"error": str(e),
|
| 177 |
+
"timestamp": datetime.now().isoformat()
|
| 178 |
+
}
|
| 179 |
+
|
| 180 |
+
# Global cache manager instance
|
| 181 |
+
cache_manager = CacheManager()
|
| 182 |
+
|
| 183 |
+
def cached(ttl: int = 3600):
|
| 184 |
+
"""
|
| 185 |
+
Decorator to cache function results.
|
| 186 |
+
|
| 187 |
+
Args:
|
| 188 |
+
ttl: Time-to-live in seconds
|
| 189 |
+
"""
|
| 190 |
+
def decorator(func: Callable):
|
| 191 |
+
@wraps(func)
|
| 192 |
+
def wrapper(*args, **kwargs):
|
| 193 |
+
return cache_manager.cached_call(func, args, kwargs, ttl)
|
| 194 |
+
return wrapper
|
| 195 |
+
return decorator
|
| 196 |
+
|
| 197 |
+
# Specialized caching functions for common operations
|
| 198 |
+
@cached(ttl=1800) # 30 minutes
|
| 199 |
+
def cached_web_search(query: str) -> Dict[str, Any]:
|
| 200 |
+
"""Cached version of web search - import happens at runtime."""
|
| 201 |
+
# Import at runtime to avoid circular imports
|
| 202 |
+
from tavily import TavilyClient
|
| 203 |
+
client = TavilyClient(api_key="placeholder") # Will be replaced at runtime
|
| 204 |
+
# This is a placeholder - actual implementation would use the real agent
|
| 205 |
+
return {"query": query, "results": [], "cached": True}
|
| 206 |
+
|
| 207 |
+
@cached(ttl=3600) # 1 hour
|
| 208 |
+
def cached_llm_processing(text_input: str, task: str, context: Optional[str] = None) -> Dict[str, Any]:
|
| 209 |
+
"""Cached version of LLM processing - import happens at runtime."""
|
| 210 |
+
# This is a placeholder for the caching pattern
|
| 211 |
+
return {"input_text": text_input, "task": task, "cached": True}
|
mcp_hub/config.py
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Configuration management for the MCP Hub project."""
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
from dataclasses import dataclass
|
| 5 |
+
from dotenv import load_dotenv
|
| 6 |
+
|
| 7 |
+
# Load environment variables
|
| 8 |
+
load_dotenv()
|
| 9 |
+
|
| 10 |
+
@dataclass
|
| 11 |
+
class APIConfig:
|
| 12 |
+
"""API configuration settings."""
|
| 13 |
+
# Provider selection
|
| 14 |
+
llm_provider: str = "nebius" # Options: "nebius", "openai", "anthropic", "huggingface"
|
| 15 |
+
|
| 16 |
+
# Provider API keys
|
| 17 |
+
nebius_api_key: str = ""
|
| 18 |
+
openai_api_key: str = ""
|
| 19 |
+
anthropic_api_key: str = ""
|
| 20 |
+
huggingface_api_key: str = ""
|
| 21 |
+
|
| 22 |
+
# Other APIs
|
| 23 |
+
tavily_api_key: str = ""
|
| 24 |
+
|
| 25 |
+
# Provider URLs
|
| 26 |
+
nebius_base_url: str = "https://api.studio.nebius.com/v1/"
|
| 27 |
+
huggingface_base_url: str = "https://api-inference.huggingface.co"
|
| 28 |
+
|
| 29 |
+
# Other settings
|
| 30 |
+
current_year: str = "2025"
|
| 31 |
+
|
| 32 |
+
def __post_init__(self):
|
| 33 |
+
"""Validate required API keys based on selected provider."""
|
| 34 |
+
# Always require Tavily for search functionality
|
| 35 |
+
if not self.tavily_api_key or not self.tavily_api_key.startswith("tvly-"):
|
| 36 |
+
raise RuntimeError("A valid TAVILY_API_KEY is required in your .env file.")
|
| 37 |
+
|
| 38 |
+
# Validate LLM provider selection
|
| 39 |
+
valid_providers = ["nebius", "openai", "anthropic", "huggingface"]
|
| 40 |
+
if self.llm_provider not in valid_providers:
|
| 41 |
+
raise RuntimeError(f"LLM_PROVIDER must be one of: {', '.join(valid_providers)}")
|
| 42 |
+
|
| 43 |
+
# Validate required API key for selected provider
|
| 44 |
+
if self.llm_provider == "nebius" and not self.nebius_api_key:
|
| 45 |
+
raise RuntimeError("NEBIUS_API_KEY is required when using nebius provider.")
|
| 46 |
+
elif self.llm_provider == "openai" and not self.openai_api_key:
|
| 47 |
+
raise RuntimeError("OPENAI_API_KEY is required when using openai provider.")
|
| 48 |
+
elif self.llm_provider == "anthropic" and not self.anthropic_api_key:
|
| 49 |
+
raise RuntimeError("ANTHROPIC_API_KEY is required when using anthropic provider.")
|
| 50 |
+
elif self.llm_provider == "huggingface" and not self.huggingface_api_key:
|
| 51 |
+
raise RuntimeError("HUGGINGFACE_API_KEY is required when using huggingface provider.")
|
| 52 |
+
|
| 53 |
+
@dataclass
|
| 54 |
+
class ModelConfig:
|
| 55 |
+
"""Model configuration settings."""
|
| 56 |
+
# Default models (Nebius/HuggingFace compatible)
|
| 57 |
+
question_enhancer_model: str = "Qwen/Qwen3-4B-fast"
|
| 58 |
+
llm_processor_model: str = "meta-llama/Meta-Llama-3.1-8B-Instruct"
|
| 59 |
+
code_generator_model: str = "Qwen/Qwen2.5-Coder-32B-Instruct-fast"
|
| 60 |
+
orchestrator_model: str = "Qwen/Qwen3-32B-fast"
|
| 61 |
+
|
| 62 |
+
def get_model_for_provider(self, task: str, provider: str) -> str:
|
| 63 |
+
"""Get appropriate model for the given task and provider."""
|
| 64 |
+
|
| 65 |
+
# Model mappings by provider
|
| 66 |
+
provider_models = {
|
| 67 |
+
"nebius": {
|
| 68 |
+
"question_enhancer": self.question_enhancer_model,
|
| 69 |
+
"llm_processor": self.llm_processor_model,
|
| 70 |
+
"code_generator": self.code_generator_model,
|
| 71 |
+
"orchestrator": self.orchestrator_model,
|
| 72 |
+
},
|
| 73 |
+
"openai": {
|
| 74 |
+
"question_enhancer": "gpt-4.1-nano",
|
| 75 |
+
"llm_processor": "gpt-4.1-nano",
|
| 76 |
+
"code_generator": "gpt-4.1",
|
| 77 |
+
"orchestrator": "gpt-4.1",
|
| 78 |
+
},
|
| 79 |
+
"anthropic": {
|
| 80 |
+
"question_enhancer": "claude-3-5-haiku-latest",#
|
| 81 |
+
"llm_processor": "claude-3-5-sonnet-latest",
|
| 82 |
+
"code_generator": "claude-sonnet-4-0",
|
| 83 |
+
"orchestrator": "claude-sonnet-4-0",
|
| 84 |
+
},
|
| 85 |
+
"huggingface": {
|
| 86 |
+
"question_enhancer": "microsoft/phi-4",
|
| 87 |
+
"llm_processor": "microsoft/phi-4",
|
| 88 |
+
"code_generator": "Qwen/Qwen2.5-Coder-32B-Instruct",
|
| 89 |
+
"orchestrator": "microsoft/phi-4",
|
| 90 |
+
}
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
if provider not in provider_models:
|
| 94 |
+
# Fall back to default models
|
| 95 |
+
return getattr(self, f"{task}_model", self.llm_processor_model)
|
| 96 |
+
|
| 97 |
+
return provider_models[provider].get(task, provider_models[provider]["llm_processor"])
|
| 98 |
+
|
| 99 |
+
@dataclass
|
| 100 |
+
class AppConfig:
|
| 101 |
+
"""Application configuration settings."""
|
| 102 |
+
modal_app_name: str = "my-sandbox-app"
|
| 103 |
+
max_search_results: int = 2
|
| 104 |
+
max_code_generation_attempts: int = 3
|
| 105 |
+
llm_temperature: float = 0.6
|
| 106 |
+
code_gen_temperature: float = 0.1
|
| 107 |
+
|
| 108 |
+
# Create global configuration instances
|
| 109 |
+
api_config = APIConfig(
|
| 110 |
+
llm_provider=os.environ.get("LLM_PROVIDER", "nebius"),
|
| 111 |
+
nebius_api_key=os.environ.get("NEBIUS_API_KEY", ""),
|
| 112 |
+
openai_api_key=os.environ.get("OPENAI_API_KEY", ""),
|
| 113 |
+
anthropic_api_key=os.environ.get("ANTHROPIC_API_KEY", ""),
|
| 114 |
+
huggingface_api_key=os.environ.get("HUGGINGFACE_API_KEY", ""),
|
| 115 |
+
tavily_api_key=os.environ.get("TAVILY_API_KEY", ""),
|
| 116 |
+
current_year=os.environ.get("CURRENT_YEAR", "2025")
|
| 117 |
+
)
|
| 118 |
+
|
| 119 |
+
model_config = ModelConfig()
|
| 120 |
+
app_config = AppConfig()
|
mcp_hub/exceptions.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Custom exception classes for the MCP Hub project."""
|
| 2 |
+
|
| 3 |
+
class MCPHubError(Exception):
|
| 4 |
+
"""Base exception class for MCP Hub errors."""
|
| 5 |
+
pass
|
| 6 |
+
|
| 7 |
+
class APIError(MCPHubError):
|
| 8 |
+
"""Raised when API calls fail."""
|
| 9 |
+
def __init__(self, service: str, message: str):
|
| 10 |
+
self.service = service
|
| 11 |
+
self.message = message
|
| 12 |
+
super().__init__(f"{service} API Error: {message}")
|
| 13 |
+
|
| 14 |
+
class ConfigurationError(MCPHubError):
|
| 15 |
+
"""Raised when there are configuration issues."""
|
| 16 |
+
pass
|
| 17 |
+
|
| 18 |
+
class ValidationError(MCPHubError):
|
| 19 |
+
"""Raised when input validation fails."""
|
| 20 |
+
pass
|
| 21 |
+
|
| 22 |
+
class CodeGenerationError(MCPHubError):
|
| 23 |
+
"""Raised when code generation fails."""
|
| 24 |
+
pass
|
| 25 |
+
|
| 26 |
+
class CodeExecutionError(MCPHubError):
|
| 27 |
+
"""Raised when code execution fails."""
|
| 28 |
+
pass
|
mcp_hub/health_monitoring.py
ADDED
|
@@ -0,0 +1,261 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""System health monitoring and status dashboard functionality."""
|
| 2 |
+
|
| 3 |
+
import time
|
| 4 |
+
import psutil
|
| 5 |
+
from datetime import datetime
|
| 6 |
+
from typing import Dict, Any
|
| 7 |
+
from .config import api_config
|
| 8 |
+
from .logging_config import logger
|
| 9 |
+
from .reliability_utils import health_monitor
|
| 10 |
+
from .performance_monitoring import metrics_collector
|
| 11 |
+
|
| 12 |
+
class SystemHealthChecker:
|
| 13 |
+
"""Comprehensive system health checking."""
|
| 14 |
+
|
| 15 |
+
def __init__(self):
|
| 16 |
+
self.last_check = None
|
| 17 |
+
self.health_status = {}
|
| 18 |
+
|
| 19 |
+
def check_api_connectivity(self) -> Dict[str, Any]:
|
| 20 |
+
"""Check connectivity to external APIs."""
|
| 21 |
+
results = {}
|
| 22 |
+
|
| 23 |
+
# Check Nebius API
|
| 24 |
+
try:
|
| 25 |
+
from openai import OpenAI
|
| 26 |
+
client = OpenAI(
|
| 27 |
+
api_key=api_config.nebius_api_key,
|
| 28 |
+
base_url=api_config.nebius_base_url
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
start_time = time.time()
|
| 32 |
+
# Make a minimal test call
|
| 33 |
+
response = client.chat.completions.create(
|
| 34 |
+
model="meta-llama/Meta-Llama-3.1-8B-Instruct",
|
| 35 |
+
messages=[{"role": "user", "content": "test"}],
|
| 36 |
+
max_tokens=1
|
| 37 |
+
)
|
| 38 |
+
response_time = time.time() - start_time
|
| 39 |
+
|
| 40 |
+
results["nebius"] = {
|
| 41 |
+
"status": "healthy",
|
| 42 |
+
"response_time_ms": response_time * 1000,
|
| 43 |
+
"last_checked": datetime.now().isoformat()
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
except Exception as e:
|
| 47 |
+
results["nebius"] = {
|
| 48 |
+
"status": "unhealthy",
|
| 49 |
+
"error": str(e),
|
| 50 |
+
"last_checked": datetime.now().isoformat()
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
# Check Tavily API
|
| 54 |
+
try:
|
| 55 |
+
from tavily import TavilyClient
|
| 56 |
+
client = TavilyClient(api_key=api_config.tavily_api_key)
|
| 57 |
+
|
| 58 |
+
start_time = time.time()
|
| 59 |
+
# Make a minimal test search
|
| 60 |
+
response = client.search(query="test", max_results=1)
|
| 61 |
+
response_time = time.time() - start_time
|
| 62 |
+
|
| 63 |
+
results["tavily"] = {
|
| 64 |
+
"status": "healthy",
|
| 65 |
+
"response_time_ms": response_time * 1000,
|
| 66 |
+
"last_checked": datetime.now().isoformat()
|
| 67 |
+
}
|
| 68 |
+
|
| 69 |
+
except Exception as e:
|
| 70 |
+
results["tavily"] = {
|
| 71 |
+
"status": "unhealthy",
|
| 72 |
+
"error": str(e),
|
| 73 |
+
"last_checked": datetime.now().isoformat()
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
return results
|
| 77 |
+
|
| 78 |
+
def check_system_resources(self) -> Dict[str, Any]:
|
| 79 |
+
"""Check system resource usage."""
|
| 80 |
+
try:
|
| 81 |
+
# CPU usage
|
| 82 |
+
cpu_percent = psutil.cpu_percent(interval=1)
|
| 83 |
+
|
| 84 |
+
# Memory usage
|
| 85 |
+
memory = psutil.virtual_memory()
|
| 86 |
+
|
| 87 |
+
# Disk usage
|
| 88 |
+
disk = psutil.disk_usage('/')
|
| 89 |
+
|
| 90 |
+
# Process-specific metrics
|
| 91 |
+
process = psutil.Process()
|
| 92 |
+
process_memory = process.memory_info()
|
| 93 |
+
|
| 94 |
+
return {
|
| 95 |
+
"cpu_percent": cpu_percent,
|
| 96 |
+
"memory": {
|
| 97 |
+
"total_gb": memory.total / (1024**3),
|
| 98 |
+
"available_gb": memory.available / (1024**3),
|
| 99 |
+
"percent_used": memory.percent
|
| 100 |
+
},
|
| 101 |
+
"disk": {
|
| 102 |
+
"total_gb": disk.total / (1024**3),
|
| 103 |
+
"free_gb": disk.free / (1024**3),
|
| 104 |
+
"percent_used": (disk.used / disk.total) * 100
|
| 105 |
+
},
|
| 106 |
+
"process": {
|
| 107 |
+
"memory_mb": process_memory.rss / (1024**2),
|
| 108 |
+
"cpu_percent": process.cpu_percent()
|
| 109 |
+
},
|
| 110 |
+
"status": "healthy",
|
| 111 |
+
"last_checked": datetime.now().isoformat()
|
| 112 |
+
}
|
| 113 |
+
|
| 114 |
+
except Exception as e:
|
| 115 |
+
return {
|
| 116 |
+
"status": "unhealthy",
|
| 117 |
+
"error": str(e),
|
| 118 |
+
"last_checked": datetime.now().isoformat()
|
| 119 |
+
}
|
| 120 |
+
|
| 121 |
+
def check_cache_health(self) -> Dict[str, Any]:
|
| 122 |
+
"""Check cache system health."""
|
| 123 |
+
try:
|
| 124 |
+
from cache_utils import cache_manager
|
| 125 |
+
|
| 126 |
+
# Count cache files
|
| 127 |
+
cache_files = list(cache_manager.cache_dir.glob("*.cache"))
|
| 128 |
+
|
| 129 |
+
# Calculate cache directory size
|
| 130 |
+
total_size = sum(f.stat().st_size for f in cache_files)
|
| 131 |
+
|
| 132 |
+
return {
|
| 133 |
+
"cache_files_count": len(cache_files),
|
| 134 |
+
"total_size_mb": total_size / (1024**2),
|
| 135 |
+
"cache_directory": str(cache_manager.cache_dir),
|
| 136 |
+
"status": "healthy",
|
| 137 |
+
"last_checked": datetime.now().isoformat()
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
+
except Exception as e:
|
| 141 |
+
return {
|
| 142 |
+
"status": "unhealthy",
|
| 143 |
+
"error": str(e),
|
| 144 |
+
"last_checked": datetime.now().isoformat()
|
| 145 |
+
}
|
| 146 |
+
|
| 147 |
+
def get_comprehensive_health_report(self) -> Dict[str, Any]:
|
| 148 |
+
"""Get a comprehensive health report of the entire system."""
|
| 149 |
+
logger.info("Generating comprehensive health report")
|
| 150 |
+
|
| 151 |
+
report = {
|
| 152 |
+
"timestamp": datetime.now().isoformat(),
|
| 153 |
+
"overall_status": "healthy" # Will be updated based on checks
|
| 154 |
+
}
|
| 155 |
+
|
| 156 |
+
# Check API connectivity
|
| 157 |
+
api_health = self.check_api_connectivity()
|
| 158 |
+
report["api_connectivity"] = api_health
|
| 159 |
+
|
| 160 |
+
# Check system resources
|
| 161 |
+
system_health = self.check_system_resources()
|
| 162 |
+
report["system_resources"] = system_health
|
| 163 |
+
|
| 164 |
+
# Check cache health
|
| 165 |
+
cache_health = self.check_cache_health()
|
| 166 |
+
report["cache_system"] = cache_health
|
| 167 |
+
|
| 168 |
+
# Get API health stats from monitor
|
| 169 |
+
try:
|
| 170 |
+
nebius_stats = health_monitor.get_health_stats("nebius")
|
| 171 |
+
tavily_stats = health_monitor.get_health_stats("tavily")
|
| 172 |
+
|
| 173 |
+
report["api_performance"] = {
|
| 174 |
+
"nebius": nebius_stats,
|
| 175 |
+
"tavily": tavily_stats
|
| 176 |
+
}
|
| 177 |
+
except Exception as e:
|
| 178 |
+
report["api_performance"] = {"error": str(e)}
|
| 179 |
+
|
| 180 |
+
# Get performance metrics
|
| 181 |
+
try:
|
| 182 |
+
performance_summary = metrics_collector.get_metrics_summary()
|
| 183 |
+
report["performance_metrics"] = performance_summary
|
| 184 |
+
except Exception as e:
|
| 185 |
+
report["performance_metrics"] = {"error": str(e)}
|
| 186 |
+
|
| 187 |
+
# Determine overall status
|
| 188 |
+
unhealthy_components = []
|
| 189 |
+
|
| 190 |
+
for service, status in api_health.items():
|
| 191 |
+
if status.get("status") == "unhealthy":
|
| 192 |
+
unhealthy_components.append(f"API:{service}")
|
| 193 |
+
|
| 194 |
+
if system_health.get("status") == "unhealthy":
|
| 195 |
+
unhealthy_components.append("system_resources")
|
| 196 |
+
|
| 197 |
+
if cache_health.get("status") == "unhealthy":
|
| 198 |
+
unhealthy_components.append("cache_system")
|
| 199 |
+
|
| 200 |
+
if unhealthy_components:
|
| 201 |
+
report["overall_status"] = "degraded"
|
| 202 |
+
report["unhealthy_components"] = unhealthy_components
|
| 203 |
+
|
| 204 |
+
self.last_check = datetime.now()
|
| 205 |
+
self.health_status = report
|
| 206 |
+
|
| 207 |
+
logger.info(f"Health report generated: {report['overall_status']}")
|
| 208 |
+
return report
|
| 209 |
+
|
| 210 |
+
# Global health checker instance
|
| 211 |
+
health_checker = SystemHealthChecker()
|
| 212 |
+
|
| 213 |
+
def create_health_dashboard() -> str:
|
| 214 |
+
"""Create a formatted health dashboard for display."""
|
| 215 |
+
report = health_checker.get_comprehensive_health_report()
|
| 216 |
+
|
| 217 |
+
dashboard = f"""
|
| 218 |
+
# 🏥 System Health Dashboard
|
| 219 |
+
**Last Updated:** {report['timestamp']}
|
| 220 |
+
**Overall Status:** {'🟢' if report['overall_status'] == 'healthy' else '🟡' if report['overall_status'] == 'degraded' else '🔴'} {report['overall_status'].upper()}
|
| 221 |
+
|
| 222 |
+
## 🌐 API Connectivity
|
| 223 |
+
"""
|
| 224 |
+
|
| 225 |
+
for service, status in report.get("api_connectivity", {}).items():
|
| 226 |
+
status_icon = "🟢" if status.get("status") == "healthy" else "🔴"
|
| 227 |
+
response_time = status.get("response_time_ms", 0)
|
| 228 |
+
dashboard += f"- **{service.title()}:** {status_icon} {status.get('status', 'unknown')} ({response_time:.1f}ms)\n"
|
| 229 |
+
|
| 230 |
+
dashboard += "\n## 💻 System Resources\n"
|
| 231 |
+
sys_resources = report.get("system_resources", {})
|
| 232 |
+
if "memory" in sys_resources:
|
| 233 |
+
memory = sys_resources["memory"]
|
| 234 |
+
dashboard += f"- **Memory:** {memory['percent_used']:.1f}% used ({memory['available_gb']:.1f}GB available)\n"
|
| 235 |
+
|
| 236 |
+
if "cpu_percent" in sys_resources:
|
| 237 |
+
dashboard += f"- **CPU:** {sys_resources['cpu_percent']:.1f}% usage\n"
|
| 238 |
+
|
| 239 |
+
if "process" in sys_resources:
|
| 240 |
+
process = sys_resources["process"]
|
| 241 |
+
dashboard += f"- **Process Memory:** {process['memory_mb']:.1f}MB\n"
|
| 242 |
+
|
| 243 |
+
dashboard += "\n## 📊 Performance Metrics\n"
|
| 244 |
+
perf_metrics = report.get("performance_metrics", {})
|
| 245 |
+
if perf_metrics and not perf_metrics.get("error"):
|
| 246 |
+
for metric_name, metric_data in perf_metrics.items():
|
| 247 |
+
if isinstance(metric_data, dict) and "average" in metric_data:
|
| 248 |
+
dashboard += f"- **{metric_name}:** Avg: {metric_data['average']:.3f}, Count: {metric_data['count']}\n"
|
| 249 |
+
|
| 250 |
+
dashboard += "\n## 🔧 Cache System\n"
|
| 251 |
+
cache_info = report.get("cache_system", {})
|
| 252 |
+
if cache_info.get("status") == "healthy":
|
| 253 |
+
dashboard += f"- **Cache Files:** {cache_info.get('cache_files_count', 0)} files\n"
|
| 254 |
+
dashboard += f"- **Cache Size:** {cache_info.get('total_size_mb', 0):.1f}MB\n"
|
| 255 |
+
|
| 256 |
+
if report.get("unhealthy_components"):
|
| 257 |
+
dashboard += "\n## ⚠️ Issues Detected\n"
|
| 258 |
+
for component in report["unhealthy_components"]:
|
| 259 |
+
dashboard += f"- {component}\n"
|
| 260 |
+
|
| 261 |
+
return dashboard
|
mcp_hub/logging_config.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Logging configuration for the MCP Hub project."""
|
| 2 |
+
|
| 3 |
+
import logging
|
| 4 |
+
import sys
|
| 5 |
+
from datetime import datetime
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
|
| 8 |
+
def setup_logging(
|
| 9 |
+
log_level: str = "INFO",
|
| 10 |
+
log_to_file: bool = True,
|
| 11 |
+
log_dir: str = "logs"
|
| 12 |
+
) -> logging.Logger:
|
| 13 |
+
"""Set up logging configuration."""
|
| 14 |
+
|
| 15 |
+
# Create logs directory if it doesn't exist
|
| 16 |
+
if log_to_file:
|
| 17 |
+
log_path = Path(log_dir)
|
| 18 |
+
log_path.mkdir(exist_ok=True)
|
| 19 |
+
|
| 20 |
+
# Create logger
|
| 21 |
+
logger = logging.getLogger("mcp_hub")
|
| 22 |
+
logger.setLevel(getattr(logging, log_level.upper()))
|
| 23 |
+
|
| 24 |
+
# Clear any existing handlers
|
| 25 |
+
logger.handlers = []
|
| 26 |
+
|
| 27 |
+
# Create formatter
|
| 28 |
+
formatter = logging.Formatter(
|
| 29 |
+
"%(asctime)s - %(name)s - %(levelname)s - %(funcName)s:%(lineno)d - %(message)s"
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
# Console handler
|
| 33 |
+
console_handler = logging.StreamHandler(sys.stdout)
|
| 34 |
+
console_handler.setLevel(getattr(logging, log_level.upper()))
|
| 35 |
+
console_handler.setFormatter(formatter)
|
| 36 |
+
logger.addHandler(console_handler)
|
| 37 |
+
|
| 38 |
+
# File handler
|
| 39 |
+
if log_to_file:
|
| 40 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 41 |
+
file_handler = logging.FileHandler(
|
| 42 |
+
log_path / f"mcp_hub_{timestamp}.log"
|
| 43 |
+
)
|
| 44 |
+
file_handler.setLevel(logging.DEBUG)
|
| 45 |
+
file_handler.setFormatter(formatter)
|
| 46 |
+
logger.addHandler(file_handler)
|
| 47 |
+
|
| 48 |
+
return logger
|
| 49 |
+
|
| 50 |
+
# Create global logger instance
|
| 51 |
+
logger = setup_logging()
|
mcp_hub/package_utils.py
ADDED
|
@@ -0,0 +1,192 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Package management utilities for dynamic package installation in Modal sandboxes.
|
| 3 |
+
This module provides functions to analyze code for imports and manage package installation.
|
| 4 |
+
"""
|
| 5 |
+
import ast
|
| 6 |
+
import re
|
| 7 |
+
from typing import Set, List
|
| 8 |
+
|
| 9 |
+
try:
|
| 10 |
+
from mcp_hub.logging_config import logger
|
| 11 |
+
except ImportError:
|
| 12 |
+
# Fallback logger for testing/standalone use
|
| 13 |
+
import logging
|
| 14 |
+
logger = logging.getLogger(__name__)
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
# Core packages that should be preinstalled in the base image
|
| 18 |
+
CORE_PREINSTALLED_PACKAGES = {
|
| 19 |
+
"numpy", "pandas", "matplotlib", "requests", "json", "os", "sys",
|
| 20 |
+
"time", "datetime", "math", "random", "collections", "itertools",
|
| 21 |
+
"functools", "re", "urllib", "csv", "sqlite3", "pathlib", "typing",
|
| 22 |
+
"asyncio", "threading", "multiprocessing", "subprocess", "shutil",
|
| 23 |
+
"tempfile", "io", "gzip", "zipfile", "tarfile", "base64", "hashlib",
|
| 24 |
+
"secrets", "uuid", "pickle", "copy", "operator", "bisect", "heapq",
|
| 25 |
+
"contextlib", "weakref", "gc", "inspect", "types", "enum", "dataclasses",
|
| 26 |
+
"decimal", "fractions", "statistics", "string", "textwrap", "locale",
|
| 27 |
+
"calendar", "timeit", "argparse", "getopt", "logging", "warnings",
|
| 28 |
+
"platform", "signal", "errno", "ctypes", "struct", "array", "queue",
|
| 29 |
+
"socketserver", "http", "urllib2", "html", "xml", "email", "mailbox"
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
# Extended packages that can be dynamically installed
|
| 33 |
+
COMMON_PACKAGES = {
|
| 34 |
+
"scikit-learn": "sklearn",
|
| 35 |
+
"beautifulsoup4": "bs4",
|
| 36 |
+
"pillow": "PIL",
|
| 37 |
+
"opencv-python-headless": "cv2",
|
| 38 |
+
"python-dateutil": "dateutil",
|
| 39 |
+
"plotly": "plotly",
|
| 40 |
+
"seaborn": "seaborn",
|
| 41 |
+
"polars": "polars",
|
| 42 |
+
"lightgbm": "lightgbm",
|
| 43 |
+
"xgboost": "xgboost",
|
| 44 |
+
"flask": "flask",
|
| 45 |
+
"fastapi": "fastapi",
|
| 46 |
+
"httpx": "httpx",
|
| 47 |
+
"networkx": "networkx",
|
| 48 |
+
"wordcloud": "wordcloud",
|
| 49 |
+
"textblob": "textblob",
|
| 50 |
+
"spacy": "spacy",
|
| 51 |
+
"nltk": "nltk"
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
# Map import names to package names
|
| 55 |
+
IMPORT_TO_PACKAGE = {v: k for k, v in COMMON_PACKAGES.items()}
|
| 56 |
+
IMPORT_TO_PACKAGE.update({k: k for k in COMMON_PACKAGES.keys()})
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def extract_imports_from_code(code_str: str) -> Set[str]:
|
| 60 |
+
"""
|
| 61 |
+
Extract all import statements from Python code using AST parsing.
|
| 62 |
+
|
| 63 |
+
Args:
|
| 64 |
+
code_str: The Python code to analyze
|
| 65 |
+
|
| 66 |
+
Returns:
|
| 67 |
+
Set of imported module names (top-level only)
|
| 68 |
+
"""
|
| 69 |
+
imports = set()
|
| 70 |
+
|
| 71 |
+
try:
|
| 72 |
+
tree = ast.parse(code_str)
|
| 73 |
+
for node in ast.walk(tree):
|
| 74 |
+
if isinstance(node, ast.Import):
|
| 75 |
+
for alias in node.names:
|
| 76 |
+
# Get top-level module name
|
| 77 |
+
module_name = alias.name.split('.')[0]
|
| 78 |
+
imports.add(module_name)
|
| 79 |
+
elif isinstance(node, ast.ImportFrom):
|
| 80 |
+
if node.module:
|
| 81 |
+
# Get top-level module name
|
| 82 |
+
module_name = node.module.split('.')[0]
|
| 83 |
+
imports.add(module_name)
|
| 84 |
+
except Exception as e:
|
| 85 |
+
logger.warning(f"Failed to parse code with AST, falling back to regex: {e}")
|
| 86 |
+
# Fallback to regex-based extraction
|
| 87 |
+
imports.update(extract_imports_with_regex(code_str))
|
| 88 |
+
|
| 89 |
+
return imports
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
def extract_imports_with_regex(code_str: str) -> Set[str]:
|
| 93 |
+
"""
|
| 94 |
+
Fallback method to extract imports using regex patterns.
|
| 95 |
+
|
| 96 |
+
Args:
|
| 97 |
+
code_str: The Python code to analyze
|
| 98 |
+
|
| 99 |
+
Returns:
|
| 100 |
+
Set of imported module names
|
| 101 |
+
"""
|
| 102 |
+
imports = set()
|
| 103 |
+
|
| 104 |
+
# Pattern for "import module" statements
|
| 105 |
+
import_pattern = r'^import\s+([a-zA-Z_][a-zA-Z0-9_]*(?:\.[a-zA-Z_][a-zA-Z0-9_]*)*)'
|
| 106 |
+
|
| 107 |
+
# Pattern for "from module import ..." statements
|
| 108 |
+
from_pattern = r'^from\s+([a-zA-Z_][a-zA-Z0-9_]*(?:\.[a-zA-Z_][a-zA-Z0-9_]*)*)\s+import'
|
| 109 |
+
|
| 110 |
+
for line in code_str.split('\n'):
|
| 111 |
+
line = line.strip()
|
| 112 |
+
if not line or line.startswith('#'):
|
| 113 |
+
continue
|
| 114 |
+
|
| 115 |
+
# Check for import statements
|
| 116 |
+
import_match = re.match(import_pattern, line)
|
| 117 |
+
if import_match:
|
| 118 |
+
module_name = import_match.group(1).split('.')[0]
|
| 119 |
+
imports.add(module_name)
|
| 120 |
+
continue
|
| 121 |
+
|
| 122 |
+
# Check for from...import statements
|
| 123 |
+
from_match = re.match(from_pattern, line)
|
| 124 |
+
if from_match:
|
| 125 |
+
module_name = from_match.group(1).split('.')[0]
|
| 126 |
+
imports.add(module_name)
|
| 127 |
+
|
| 128 |
+
return imports
|
| 129 |
+
|
| 130 |
+
|
| 131 |
+
def get_packages_to_install(detected_imports: Set[str]) -> List[str]:
|
| 132 |
+
"""
|
| 133 |
+
Determine which packages need to be installed based on detected imports.
|
| 134 |
+
|
| 135 |
+
Args:
|
| 136 |
+
detected_imports: Set of module names found in the code
|
| 137 |
+
|
| 138 |
+
Returns:
|
| 139 |
+
List of package names that need to be pip installed
|
| 140 |
+
"""
|
| 141 |
+
packages_to_install = []
|
| 142 |
+
|
| 143 |
+
for import_name in detected_imports:
|
| 144 |
+
# Skip if it's a core preinstalled package
|
| 145 |
+
if import_name in CORE_PREINSTALLED_PACKAGES:
|
| 146 |
+
continue
|
| 147 |
+
|
| 148 |
+
# Check if we have a known package mapping
|
| 149 |
+
if import_name in IMPORT_TO_PACKAGE:
|
| 150 |
+
package_name = IMPORT_TO_PACKAGE[import_name]
|
| 151 |
+
packages_to_install.append(package_name)
|
| 152 |
+
# For unknown packages, assume package name matches import name
|
| 153 |
+
elif import_name not in CORE_PREINSTALLED_PACKAGES:
|
| 154 |
+
packages_to_install.append(import_name)
|
| 155 |
+
|
| 156 |
+
return packages_to_install
|
| 157 |
+
|
| 158 |
+
|
| 159 |
+
def get_warmup_import_commands() -> List[str]:
|
| 160 |
+
"""
|
| 161 |
+
Get list of import commands to run during sandbox warmup.
|
| 162 |
+
|
| 163 |
+
Returns:
|
| 164 |
+
List of Python import statements for core packages
|
| 165 |
+
"""
|
| 166 |
+
core_imports = [
|
| 167 |
+
"import numpy",
|
| 168 |
+
"import pandas",
|
| 169 |
+
"import matplotlib.pyplot",
|
| 170 |
+
"import requests",
|
| 171 |
+
"print('Core packages warmed up successfully')"
|
| 172 |
+
]
|
| 173 |
+
|
| 174 |
+
return core_imports
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
def create_package_install_command(packages: List[str]) -> str:
|
| 178 |
+
"""
|
| 179 |
+
Create a pip install command for the given packages.
|
| 180 |
+
|
| 181 |
+
Args:
|
| 182 |
+
packages: List of package names to install
|
| 183 |
+
|
| 184 |
+
Returns:
|
| 185 |
+
Pip install command string
|
| 186 |
+
"""
|
| 187 |
+
if not packages:
|
| 188 |
+
return ""
|
| 189 |
+
|
| 190 |
+
# Remove duplicates and sort
|
| 191 |
+
unique_packages = sorted(set(packages))
|
| 192 |
+
return f"pip install {' '.join(unique_packages)}"
|
mcp_hub/performance_monitoring.py
ADDED
|
@@ -0,0 +1,232 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Performance monitoring and metrics collection for the MCP Hub."""
|
| 2 |
+
|
| 3 |
+
import time
|
| 4 |
+
import psutil
|
| 5 |
+
import threading
|
| 6 |
+
from datetime import datetime, timedelta
|
| 7 |
+
from typing import Dict, Any, Optional
|
| 8 |
+
from collections import defaultdict, deque
|
| 9 |
+
from dataclasses import dataclass
|
| 10 |
+
from contextlib import contextmanager
|
| 11 |
+
from .logging_config import logger
|
| 12 |
+
|
| 13 |
+
@dataclass
|
| 14 |
+
class MetricPoint:
|
| 15 |
+
"""Single metric measurement."""
|
| 16 |
+
timestamp: datetime
|
| 17 |
+
metric_name: str
|
| 18 |
+
value: float
|
| 19 |
+
tags: Dict[str, str]
|
| 20 |
+
|
| 21 |
+
class MetricsCollector:
|
| 22 |
+
"""Collects and stores application metrics."""
|
| 23 |
+
|
| 24 |
+
def __init__(self, max_points: int = 10000):
|
| 25 |
+
"""
|
| 26 |
+
Initialize metrics collector.
|
| 27 |
+
|
| 28 |
+
Args:
|
| 29 |
+
max_points: Maximum number of metric points to store
|
| 30 |
+
"""
|
| 31 |
+
self.max_points = max_points
|
| 32 |
+
self.metrics = defaultdict(lambda: deque(maxlen=max_points))
|
| 33 |
+
self.lock = threading.Lock()
|
| 34 |
+
self.counters = defaultdict(int)
|
| 35 |
+
self.timers = {}
|
| 36 |
+
|
| 37 |
+
# Start system metrics collection thread
|
| 38 |
+
self.system_thread = threading.Thread(target=self._collect_system_metrics, daemon=True)
|
| 39 |
+
self.system_thread.start()
|
| 40 |
+
logger.info("Metrics collector initialized")
|
| 41 |
+
|
| 42 |
+
def record_metric(self, name: str, value: float, tags: Optional[Dict[str, str]] = None):
|
| 43 |
+
"""Record a metric value."""
|
| 44 |
+
if tags is None:
|
| 45 |
+
tags = {}
|
| 46 |
+
|
| 47 |
+
point = MetricPoint(
|
| 48 |
+
timestamp=datetime.now(),
|
| 49 |
+
metric_name=name,
|
| 50 |
+
value=value,
|
| 51 |
+
tags=tags
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
with self.lock:
|
| 55 |
+
self.metrics[name].append(point)
|
| 56 |
+
|
| 57 |
+
def increment_counter(self, name: str, amount: int = 1, tags: Optional[Dict[str, str]] = None):
|
| 58 |
+
"""Increment a counter metric."""
|
| 59 |
+
with self.lock:
|
| 60 |
+
self.counters[name] += amount
|
| 61 |
+
|
| 62 |
+
self.record_metric(f"{name}_count", self.counters[name], tags)
|
| 63 |
+
|
| 64 |
+
@contextmanager
|
| 65 |
+
def timer(self, name: str, tags: Optional[Dict[str, str]] = None):
|
| 66 |
+
"""Context manager for timing operations."""
|
| 67 |
+
start_time = time.time()
|
| 68 |
+
try:
|
| 69 |
+
yield
|
| 70 |
+
finally:
|
| 71 |
+
duration = time.time() - start_time
|
| 72 |
+
self.record_metric(f"{name}_duration_seconds", duration, tags)
|
| 73 |
+
|
| 74 |
+
def get_metrics_summary(self,
|
| 75 |
+
metric_name: Optional[str] = None,
|
| 76 |
+
last_minutes: int = 5) -> Dict[str, Any]:
|
| 77 |
+
"""Get summary statistics for metrics."""
|
| 78 |
+
cutoff_time = datetime.now() - timedelta(minutes=last_minutes)
|
| 79 |
+
|
| 80 |
+
with self.lock:
|
| 81 |
+
if metric_name:
|
| 82 |
+
metrics_to_analyze = {metric_name: self.metrics[metric_name]}
|
| 83 |
+
else:
|
| 84 |
+
metrics_to_analyze = dict(self.metrics)
|
| 85 |
+
|
| 86 |
+
summary = {}
|
| 87 |
+
|
| 88 |
+
for name, points in metrics_to_analyze.items():
|
| 89 |
+
recent_points = [p for p in points if p.timestamp >= cutoff_time]
|
| 90 |
+
|
| 91 |
+
if not recent_points:
|
| 92 |
+
continue
|
| 93 |
+
|
| 94 |
+
values = [p.value for p in recent_points]
|
| 95 |
+
summary[name] = {
|
| 96 |
+
"count": len(values),
|
| 97 |
+
"average": sum(values) / len(values),
|
| 98 |
+
"min": min(values),
|
| 99 |
+
"max": max(values),
|
| 100 |
+
"latest": values[-1] if values else 0,
|
| 101 |
+
"last_updated": recent_points[-1].timestamp.isoformat() if recent_points else None
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
return summary
|
| 105 |
+
|
| 106 |
+
def _collect_system_metrics(self):
|
| 107 |
+
"""Background thread to collect system metrics."""
|
| 108 |
+
while True:
|
| 109 |
+
try:
|
| 110 |
+
# CPU and memory metrics
|
| 111 |
+
cpu_percent = psutil.cpu_percent(interval=1)
|
| 112 |
+
memory = psutil.virtual_memory()
|
| 113 |
+
|
| 114 |
+
self.record_metric("system_cpu_percent", cpu_percent)
|
| 115 |
+
self.record_metric("system_memory_percent", memory.percent)
|
| 116 |
+
self.record_metric("system_memory_available_mb", memory.available / 1024 / 1024)
|
| 117 |
+
|
| 118 |
+
# Process-specific metrics
|
| 119 |
+
process = psutil.Process()
|
| 120 |
+
process_memory = process.memory_info()
|
| 121 |
+
|
| 122 |
+
self.record_metric("process_memory_rss_mb", process_memory.rss / 1024 / 1024)
|
| 123 |
+
self.record_metric("process_cpu_percent", process.cpu_percent())
|
| 124 |
+
|
| 125 |
+
time.sleep(30) # Collect every 30 seconds
|
| 126 |
+
|
| 127 |
+
except Exception as e:
|
| 128 |
+
logger.error(f"Error collecting system metrics: {e}")
|
| 129 |
+
time.sleep(60) # Wait longer if there's an error
|
| 130 |
+
|
| 131 |
+
class PerformanceProfiler:
|
| 132 |
+
"""Profile performance of agent operations."""
|
| 133 |
+
|
| 134 |
+
def __init__(self, metrics_collector: MetricsCollector):
|
| 135 |
+
self.metrics = metrics_collector
|
| 136 |
+
self.operation_stats = defaultdict(list)
|
| 137 |
+
|
| 138 |
+
@contextmanager
|
| 139 |
+
def profile_operation(self, operation_name: str, **tags):
|
| 140 |
+
"""Context manager to profile an operation."""
|
| 141 |
+
start_time = time.time()
|
| 142 |
+
start_memory = psutil.Process().memory_info().rss
|
| 143 |
+
|
| 144 |
+
try:
|
| 145 |
+
yield
|
| 146 |
+
success = True
|
| 147 |
+
except Exception as e:
|
| 148 |
+
success = False
|
| 149 |
+
logger.error(f"Operation {operation_name} failed: {e}")
|
| 150 |
+
raise
|
| 151 |
+
finally:
|
| 152 |
+
end_time = time.time()
|
| 153 |
+
end_memory = psutil.Process().memory_info().rss
|
| 154 |
+
|
| 155 |
+
duration = end_time - start_time
|
| 156 |
+
memory_delta = (end_memory - start_memory) / 1024 / 1024 # MB
|
| 157 |
+
|
| 158 |
+
# Record metrics
|
| 159 |
+
operation_tags = {"operation": operation_name, "success": str(success), **tags}
|
| 160 |
+
self.metrics.record_metric("operation_duration_seconds", duration, operation_tags)
|
| 161 |
+
self.metrics.record_metric("operation_memory_delta_mb", memory_delta, operation_tags)
|
| 162 |
+
|
| 163 |
+
# Update operation stats
|
| 164 |
+
self.operation_stats[operation_name].append({
|
| 165 |
+
"duration": duration,
|
| 166 |
+
"memory_delta": memory_delta,
|
| 167 |
+
"success": success,
|
| 168 |
+
"timestamp": datetime.now()
|
| 169 |
+
})
|
| 170 |
+
|
| 171 |
+
def get_operation_summary(self, operation_name: str = None) -> Dict[str, Any]:
|
| 172 |
+
"""Get summary of operation performance."""
|
| 173 |
+
if operation_name:
|
| 174 |
+
operations_to_analyze = {operation_name: self.operation_stats[operation_name]}
|
| 175 |
+
else:
|
| 176 |
+
operations_to_analyze = dict(self.operation_stats)
|
| 177 |
+
|
| 178 |
+
summary = {}
|
| 179 |
+
|
| 180 |
+
for op_name, stats in operations_to_analyze.items():
|
| 181 |
+
if not stats:
|
| 182 |
+
continue
|
| 183 |
+
|
| 184 |
+
durations = [s["duration"] for s in stats]
|
| 185 |
+
memory_deltas = [s["memory_delta"] for s in stats]
|
| 186 |
+
success_rate = sum(1 for s in stats if s["success"]) / len(stats)
|
| 187 |
+
|
| 188 |
+
summary[op_name] = {
|
| 189 |
+
"total_calls": len(stats),
|
| 190 |
+
"success_rate": success_rate,
|
| 191 |
+
"avg_duration_seconds": sum(durations) / len(durations),
|
| 192 |
+
"avg_memory_delta_mb": sum(memory_deltas) / len(memory_deltas),
|
| 193 |
+
"min_duration": min(durations),
|
| 194 |
+
"max_duration": max(durations)
|
| 195 |
+
}
|
| 196 |
+
|
| 197 |
+
return summary
|
| 198 |
+
|
| 199 |
+
# Global instances
|
| 200 |
+
metrics_collector = MetricsCollector()
|
| 201 |
+
performance_profiler = PerformanceProfiler(metrics_collector)
|
| 202 |
+
|
| 203 |
+
# Convenience decorators
|
| 204 |
+
def track_performance(operation_name: str = None):
|
| 205 |
+
"""Decorator to automatically track function performance."""
|
| 206 |
+
def decorator(func):
|
| 207 |
+
nonlocal operation_name
|
| 208 |
+
if operation_name is None:
|
| 209 |
+
operation_name = f"{func.__module__}.{func.__name__}"
|
| 210 |
+
|
| 211 |
+
def wrapper(*args, **kwargs):
|
| 212 |
+
with performance_profiler.profile_operation(operation_name):
|
| 213 |
+
result = func(*args, **kwargs)
|
| 214 |
+
metrics_collector.increment_counter(f"{operation_name}_calls")
|
| 215 |
+
return result
|
| 216 |
+
return wrapper
|
| 217 |
+
return decorator
|
| 218 |
+
|
| 219 |
+
def track_api_call(service_name: str):
|
| 220 |
+
"""Decorator specifically for tracking API calls."""
|
| 221 |
+
def decorator(func):
|
| 222 |
+
def wrapper(*args, **kwargs):
|
| 223 |
+
with performance_profiler.profile_operation("api_call", service=service_name):
|
| 224 |
+
try:
|
| 225 |
+
result = func(*args, **kwargs)
|
| 226 |
+
metrics_collector.increment_counter("api_calls_success", tags={"service": service_name})
|
| 227 |
+
return result
|
| 228 |
+
except Exception:
|
| 229 |
+
metrics_collector.increment_counter("api_calls_failed", tags={"service": service_name})
|
| 230 |
+
raise
|
| 231 |
+
return wrapper
|
| 232 |
+
return decorator
|
mcp_hub/reliability_utils.py
ADDED
|
@@ -0,0 +1,254 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Rate limiting and circuit breaker patterns for robust API interactions."""
|
| 2 |
+
|
| 3 |
+
import time
|
| 4 |
+
from datetime import datetime
|
| 5 |
+
from typing import Callable, Any, Dict
|
| 6 |
+
from functools import wraps
|
| 7 |
+
from threading import Lock
|
| 8 |
+
from collections import deque
|
| 9 |
+
from .exceptions import APIError
|
| 10 |
+
from .logging_config import logger
|
| 11 |
+
|
| 12 |
+
class RateLimiter:
|
| 13 |
+
"""Token bucket rate limiter for API calls."""
|
| 14 |
+
|
| 15 |
+
def __init__(self, calls_per_second: float = 1.0, burst_size: int = 5):
|
| 16 |
+
"""
|
| 17 |
+
Initialize rate limiter.
|
| 18 |
+
|
| 19 |
+
Args:
|
| 20 |
+
calls_per_second: Maximum calls per second
|
| 21 |
+
burst_size: Maximum burst of calls allowed
|
| 22 |
+
"""
|
| 23 |
+
self.calls_per_second = calls_per_second
|
| 24 |
+
self.burst_size = float(burst_size)
|
| 25 |
+
self.tokens = float(burst_size)
|
| 26 |
+
self.last_update = time.time()
|
| 27 |
+
self.lock = Lock()
|
| 28 |
+
|
| 29 |
+
def acquire(self, timeout: float = None) -> bool:
|
| 30 |
+
"""
|
| 31 |
+
Acquire a token for making an API call.
|
| 32 |
+
|
| 33 |
+
Args:
|
| 34 |
+
timeout: Maximum time to wait for a token
|
| 35 |
+
|
| 36 |
+
Returns:
|
| 37 |
+
True if token acquired, False if timeout
|
| 38 |
+
"""
|
| 39 |
+
start_time = time.time()
|
| 40 |
+
|
| 41 |
+
while True:
|
| 42 |
+
with self.lock:
|
| 43 |
+
now = time.time()
|
| 44 |
+
# Add tokens based on elapsed time
|
| 45 |
+
time_passed = now - self.last_update
|
| 46 |
+
self.tokens = min(
|
| 47 |
+
self.burst_size,
|
| 48 |
+
self.tokens + time_passed * self.calls_per_second
|
| 49 |
+
)
|
| 50 |
+
self.last_update = now
|
| 51 |
+
|
| 52 |
+
if self.tokens >= 1:
|
| 53 |
+
self.tokens -= 1
|
| 54 |
+
return True
|
| 55 |
+
|
| 56 |
+
# Check timeout
|
| 57 |
+
if timeout and (time.time() - start_time) >= timeout:
|
| 58 |
+
return False
|
| 59 |
+
|
| 60 |
+
# Wait before retrying
|
| 61 |
+
time.sleep(0.1)
|
| 62 |
+
|
| 63 |
+
class CircuitBreaker:
|
| 64 |
+
"""Circuit breaker pattern for handling API failures gracefully."""
|
| 65 |
+
|
| 66 |
+
def __init__(
|
| 67 |
+
self,
|
| 68 |
+
failure_threshold: int = 5,
|
| 69 |
+
timeout: int = 60,
|
| 70 |
+
expected_exception: type = Exception
|
| 71 |
+
):
|
| 72 |
+
"""
|
| 73 |
+
Initialize circuit breaker.
|
| 74 |
+
|
| 75 |
+
Args:
|
| 76 |
+
failure_threshold: Number of failures before opening circuit
|
| 77 |
+
timeout: Seconds to wait before trying again
|
| 78 |
+
expected_exception: Exception type that triggers circuit breaker
|
| 79 |
+
"""
|
| 80 |
+
self.failure_threshold = failure_threshold
|
| 81 |
+
self.timeout = timeout
|
| 82 |
+
self.expected_exception = expected_exception
|
| 83 |
+
|
| 84 |
+
self.failure_count = 0
|
| 85 |
+
self.last_failure_time = None
|
| 86 |
+
self.state = "CLOSED" # CLOSED, OPEN, HALF_OPEN
|
| 87 |
+
self.lock = Lock()
|
| 88 |
+
|
| 89 |
+
def _can_attempt(self) -> bool:
|
| 90 |
+
"""Check if we can attempt the operation."""
|
| 91 |
+
if self.state == "CLOSED":
|
| 92 |
+
return True
|
| 93 |
+
elif self.state == "OPEN":
|
| 94 |
+
if (datetime.now() - self.last_failure_time).seconds >= self.timeout:
|
| 95 |
+
self.state = "HALF_OPEN"
|
| 96 |
+
return True
|
| 97 |
+
return False
|
| 98 |
+
else: # HALF_OPEN
|
| 99 |
+
return True
|
| 100 |
+
|
| 101 |
+
def _record_success(self):
|
| 102 |
+
"""Record a successful operation."""
|
| 103 |
+
self.failure_count = 0
|
| 104 |
+
self.state = "CLOSED"
|
| 105 |
+
|
| 106 |
+
def _record_failure(self):
|
| 107 |
+
"""Record a failed operation."""
|
| 108 |
+
self.failure_count += 1
|
| 109 |
+
self.last_failure_time = datetime.now()
|
| 110 |
+
|
| 111 |
+
if self.failure_count >= self.failure_threshold:
|
| 112 |
+
self.state = "OPEN"
|
| 113 |
+
logger.warning(f"Circuit breaker opened after {self.failure_count} failures")
|
| 114 |
+
|
| 115 |
+
def call(self, func: Callable, *args, **kwargs) -> Any:
|
| 116 |
+
"""
|
| 117 |
+
Execute function with circuit breaker protection.
|
| 118 |
+
|
| 119 |
+
Args:
|
| 120 |
+
func: Function to execute
|
| 121 |
+
*args, **kwargs: Arguments for the function
|
| 122 |
+
|
| 123 |
+
Returns:
|
| 124 |
+
Function result
|
| 125 |
+
|
| 126 |
+
Raises:
|
| 127 |
+
APIError: If circuit is open or function fails
|
| 128 |
+
"""
|
| 129 |
+
with self.lock:
|
| 130 |
+
if not self._can_attempt():
|
| 131 |
+
raise APIError(
|
| 132 |
+
"CircuitBreaker",
|
| 133 |
+
f"Circuit breaker is OPEN. Last failure: {self.last_failure_time}"
|
| 134 |
+
)
|
| 135 |
+
|
| 136 |
+
try:
|
| 137 |
+
result = func(*args, **kwargs)
|
| 138 |
+
with self.lock:
|
| 139 |
+
self._record_success()
|
| 140 |
+
return result
|
| 141 |
+
|
| 142 |
+
except self.expected_exception as e:
|
| 143 |
+
with self.lock:
|
| 144 |
+
self._record_failure()
|
| 145 |
+
logger.error(f"Circuit breaker recorded failure: {str(e)}")
|
| 146 |
+
raise APIError("CircuitBreaker", f"Protected function failed: {str(e)}")
|
| 147 |
+
|
| 148 |
+
# Global instances for different services
|
| 149 |
+
nebius_rate_limiter = RateLimiter(calls_per_second=2.0, burst_size=5)
|
| 150 |
+
tavily_rate_limiter = RateLimiter(calls_per_second=1.0, burst_size=3)
|
| 151 |
+
|
| 152 |
+
nebius_circuit_breaker = CircuitBreaker(failure_threshold=3, timeout=30)
|
| 153 |
+
tavily_circuit_breaker = CircuitBreaker(failure_threshold=3, timeout=30)
|
| 154 |
+
|
| 155 |
+
def rate_limited(service: str = "default", timeout: float = 10.0):
|
| 156 |
+
"""
|
| 157 |
+
Decorator to rate limit function calls.
|
| 158 |
+
|
| 159 |
+
Args:
|
| 160 |
+
service: Service name (nebius, tavily, or default)
|
| 161 |
+
timeout: Maximum time to wait for rate limit token
|
| 162 |
+
"""
|
| 163 |
+
def decorator(func: Callable):
|
| 164 |
+
@wraps(func)
|
| 165 |
+
def wrapper(*args, **kwargs):
|
| 166 |
+
# Select appropriate rate limiter
|
| 167 |
+
if service == "nebius":
|
| 168 |
+
limiter = nebius_rate_limiter
|
| 169 |
+
elif service == "tavily":
|
| 170 |
+
limiter = tavily_rate_limiter
|
| 171 |
+
else:
|
| 172 |
+
limiter = RateLimiter() # Default limiter
|
| 173 |
+
|
| 174 |
+
if not limiter.acquire(timeout=timeout):
|
| 175 |
+
raise APIError(service, f"Rate limit timeout after {timeout}s")
|
| 176 |
+
|
| 177 |
+
return func(*args, **kwargs)
|
| 178 |
+
return wrapper
|
| 179 |
+
return decorator
|
| 180 |
+
|
| 181 |
+
def circuit_protected(service: str = "default"):
|
| 182 |
+
"""
|
| 183 |
+
Decorator to protect function calls with circuit breaker.
|
| 184 |
+
|
| 185 |
+
Args:
|
| 186 |
+
service: Service name (nebius, tavily, or default)
|
| 187 |
+
"""
|
| 188 |
+
def decorator(func: Callable):
|
| 189 |
+
@wraps(func)
|
| 190 |
+
def wrapper(*args, **kwargs):
|
| 191 |
+
# Select appropriate circuit breaker
|
| 192 |
+
if service == "nebius":
|
| 193 |
+
breaker = nebius_circuit_breaker
|
| 194 |
+
elif service == "tavily":
|
| 195 |
+
breaker = tavily_circuit_breaker
|
| 196 |
+
else:
|
| 197 |
+
breaker = CircuitBreaker() # Default breaker
|
| 198 |
+
|
| 199 |
+
return breaker.call(func, *args, **kwargs)
|
| 200 |
+
return wrapper
|
| 201 |
+
return decorator
|
| 202 |
+
|
| 203 |
+
class APIHealthMonitor:
|
| 204 |
+
"""Monitor API health and performance metrics."""
|
| 205 |
+
|
| 206 |
+
def __init__(self, window_size: int = 100):
|
| 207 |
+
"""
|
| 208 |
+
Initialize health monitor.
|
| 209 |
+
|
| 210 |
+
Args:
|
| 211 |
+
window_size: Number of recent calls to track
|
| 212 |
+
"""
|
| 213 |
+
self.window_size = window_size
|
| 214 |
+
self.call_history = deque(maxlen=window_size)
|
| 215 |
+
self.lock = Lock()
|
| 216 |
+
|
| 217 |
+
def record_call(self, service: str, success: bool, response_time: float):
|
| 218 |
+
"""Record an API call result."""
|
| 219 |
+
with self.lock:
|
| 220 |
+
self.call_history.append({
|
| 221 |
+
"service": service,
|
| 222 |
+
"success": success,
|
| 223 |
+
"response_time": response_time,
|
| 224 |
+
"timestamp": datetime.now()
|
| 225 |
+
})
|
| 226 |
+
|
| 227 |
+
def get_health_stats(self, service: str = None) -> Dict[str, Any]:
|
| 228 |
+
"""Get health statistics for a service or all services."""
|
| 229 |
+
with self.lock:
|
| 230 |
+
if service:
|
| 231 |
+
calls = [call for call in self.call_history if call["service"] == service]
|
| 232 |
+
else:
|
| 233 |
+
calls = list(self.call_history)
|
| 234 |
+
|
| 235 |
+
if not calls:
|
| 236 |
+
return {"error": "No call history available"}
|
| 237 |
+
|
| 238 |
+
total_calls = len(calls)
|
| 239 |
+
successful_calls = sum(1 for call in calls if call["success"])
|
| 240 |
+
success_rate = successful_calls / total_calls
|
| 241 |
+
|
| 242 |
+
response_times = [call["response_time"] for call in calls]
|
| 243 |
+
avg_response_time = sum(response_times) / len(response_times)
|
| 244 |
+
|
| 245 |
+
return {
|
| 246 |
+
"service": service or "all",
|
| 247 |
+
"total_calls": total_calls,
|
| 248 |
+
"success_rate": success_rate,
|
| 249 |
+
"avg_response_time_ms": avg_response_time * 1000,
|
| 250 |
+
"recent_failures": total_calls - successful_calls
|
| 251 |
+
}
|
| 252 |
+
|
| 253 |
+
# Global health monitor
|
| 254 |
+
health_monitor = APIHealthMonitor()
|
mcp_hub/sandbox_pool.py
ADDED
|
@@ -0,0 +1,701 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Warm Sandbox Pool for Modal - Async Queue-Based Implementation
|
| 3 |
+
This module provides a pre-warmed pool of Modal sandboxes to reduce cold-start latency.
|
| 4 |
+
"""
|
| 5 |
+
import asyncio
|
| 6 |
+
import time
|
| 7 |
+
from typing import Optional, Dict, Any
|
| 8 |
+
from contextlib import asynccontextmanager
|
| 9 |
+
from dataclasses import dataclass
|
| 10 |
+
from enum import Enum
|
| 11 |
+
|
| 12 |
+
import modal
|
| 13 |
+
|
| 14 |
+
from mcp_hub.logging_config import logger
|
| 15 |
+
from mcp_hub.exceptions import CodeExecutionError
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class SandboxHealth(Enum):
|
| 19 |
+
"""Sandbox health status."""
|
| 20 |
+
HEALTHY = "healthy"
|
| 21 |
+
UNHEALTHY = "unhealthy"
|
| 22 |
+
UNKNOWN = "unknown"
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
@dataclass
|
| 26 |
+
class PooledSandbox:
|
| 27 |
+
"""Container for a pooled sandbox with metadata."""
|
| 28 |
+
sandbox: modal.Sandbox
|
| 29 |
+
created_at: float
|
| 30 |
+
last_used: float
|
| 31 |
+
health: SandboxHealth = SandboxHealth.UNKNOWN
|
| 32 |
+
use_count: int = 0
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
class WarmSandboxPool:
|
| 36 |
+
"""Async queue-based warm sandbox pool with health checking."""
|
| 37 |
+
|
| 38 |
+
def __init__(
|
| 39 |
+
self,
|
| 40 |
+
app: modal.App,
|
| 41 |
+
image: modal.Image,
|
| 42 |
+
pool_size: int = 3,
|
| 43 |
+
max_age_seconds: int = 300, # 5 minutes
|
| 44 |
+
max_uses_per_sandbox: int = 10,
|
| 45 |
+
health_check_interval: int = 60, # 1 minute
|
| 46 |
+
):
|
| 47 |
+
self.app = app
|
| 48 |
+
self.image = image
|
| 49 |
+
self.pool_size = pool_size
|
| 50 |
+
self.max_age_seconds = max_age_seconds
|
| 51 |
+
self.max_uses_per_sandbox = max_uses_per_sandbox
|
| 52 |
+
self.health_check_interval = health_check_interval
|
| 53 |
+
|
| 54 |
+
# Queue to hold available sandboxes
|
| 55 |
+
self._sandbox_queue: asyncio.Queue[PooledSandbox] = asyncio.Queue(maxsize=pool_size)
|
| 56 |
+
|
| 57 |
+
# Background tasks
|
| 58 |
+
self._warmup_task: Optional[asyncio.Task] = None
|
| 59 |
+
self._health_check_task: Optional[asyncio.Task] = None
|
| 60 |
+
self._cleanup_task: Optional[asyncio.Task] = None
|
| 61 |
+
|
| 62 |
+
# Pool statistics
|
| 63 |
+
self._stats = {
|
| 64 |
+
"created": 0,
|
| 65 |
+
"reused": 0,
|
| 66 |
+
"recycled": 0,
|
| 67 |
+
"health_checks": 0,
|
| 68 |
+
"failures": 0
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
# Health tracking for better error recovery
|
| 72 |
+
self._consecutive_failures = 0
|
| 73 |
+
self._last_successful_creation = time.time()
|
| 74 |
+
self._pool_reset_threshold = 5 # Reset pool after 5 consecutive failures
|
| 75 |
+
|
| 76 |
+
self._running = False
|
| 77 |
+
|
| 78 |
+
async def start(self):
|
| 79 |
+
"""Start the pool and background tasks."""
|
| 80 |
+
if self._running:
|
| 81 |
+
return
|
| 82 |
+
|
| 83 |
+
self._running = True
|
| 84 |
+
logger.info(f"Starting warm sandbox pool with {self.pool_size} sandboxes")
|
| 85 |
+
|
| 86 |
+
# Start background tasks
|
| 87 |
+
self._warmup_task = asyncio.create_task(self._warmup_pool())
|
| 88 |
+
self._health_check_task = asyncio.create_task(self._health_check_loop())
|
| 89 |
+
self._cleanup_task = asyncio.create_task(self._cleanup_loop())
|
| 90 |
+
|
| 91 |
+
# Wait for initial warmup
|
| 92 |
+
await asyncio.sleep(1) # Give warmup a moment to start
|
| 93 |
+
|
| 94 |
+
async def stop(self):
|
| 95 |
+
"""Stop the pool and cleanup resources."""
|
| 96 |
+
if not self._running:
|
| 97 |
+
return
|
| 98 |
+
|
| 99 |
+
self._running = False
|
| 100 |
+
logger.info("Stopping warm sandbox pool")
|
| 101 |
+
|
| 102 |
+
# Cancel background tasks
|
| 103 |
+
for task in [self._warmup_task, self._health_check_task, self._cleanup_task]:
|
| 104 |
+
if task and not task.done():
|
| 105 |
+
task.cancel()
|
| 106 |
+
try:
|
| 107 |
+
await task
|
| 108 |
+
except asyncio.CancelledError:
|
| 109 |
+
pass
|
| 110 |
+
# Cleanup remaining sandboxes
|
| 111 |
+
while not self._sandbox_queue.empty():
|
| 112 |
+
try:
|
| 113 |
+
pooled_sb = self._sandbox_queue.get_nowait()
|
| 114 |
+
await self._terminate_sandbox(pooled_sb.sandbox)
|
| 115 |
+
except asyncio.QueueEmpty:
|
| 116 |
+
break
|
| 117 |
+
|
| 118 |
+
@asynccontextmanager
|
| 119 |
+
async def get_sandbox(self, timeout: float = 5.0):
|
| 120 |
+
pooled_sb = None
|
| 121 |
+
created_new = False
|
| 122 |
+
try:
|
| 123 |
+
# Check if we need to reset the pool due to consecutive failures
|
| 124 |
+
if self._consecutive_failures >= self._pool_reset_threshold:
|
| 125 |
+
logger.warning(f"Pool has {self._consecutive_failures} consecutive failures, attempting reset")
|
| 126 |
+
await self._emergency_pool_reset()
|
| 127 |
+
|
| 128 |
+
# Try to get a warm sandbox from the pool, retry if not alive
|
| 129 |
+
max_retries = 3 # Increased retries for better reliability
|
| 130 |
+
for attempt in range(max_retries):
|
| 131 |
+
try:
|
| 132 |
+
# Try to get from pool first
|
| 133 |
+
pooled_sb = await asyncio.wait_for(self._sandbox_queue.get(), timeout=timeout)
|
| 134 |
+
# Check if the sandbox is alive
|
| 135 |
+
alive = await self._is_sandbox_alive(pooled_sb.sandbox)
|
| 136 |
+
if not alive:
|
| 137 |
+
logger.info(f"Got dead sandbox from pool on attempt {attempt + 1}, terminating and trying next.")
|
| 138 |
+
await self._terminate_sandbox(pooled_sb.sandbox)
|
| 139 |
+
pooled_sb = None
|
| 140 |
+
continue # Try again
|
| 141 |
+
|
| 142 |
+
# Sandbox is alive, use it
|
| 143 |
+
pooled_sb.last_used = time.time()
|
| 144 |
+
pooled_sb.use_count += 1
|
| 145 |
+
self._stats["reused"] += 1
|
| 146 |
+
self._consecutive_failures = 0 # Reset failure counter on success
|
| 147 |
+
break
|
| 148 |
+
|
| 149 |
+
except asyncio.TimeoutError:
|
| 150 |
+
# Pool empty or taking too long, create a new one
|
| 151 |
+
logger.info(f"Pool timeout on attempt {attempt + 1}, creating new sandbox")
|
| 152 |
+
try:
|
| 153 |
+
sandbox = await self._create_sandbox()
|
| 154 |
+
pooled_sb = PooledSandbox(
|
| 155 |
+
sandbox=sandbox,
|
| 156 |
+
created_at=time.time(),
|
| 157 |
+
last_used=time.time(),
|
| 158 |
+
use_count=1
|
| 159 |
+
)
|
| 160 |
+
created_new = True
|
| 161 |
+
self._stats["created"] += 1
|
| 162 |
+
self._consecutive_failures = 0 # Reset failure counter on success
|
| 163 |
+
self._last_successful_creation = time.time()
|
| 164 |
+
break
|
| 165 |
+
except Exception as create_error:
|
| 166 |
+
logger.error(f"Failed to create sandbox on attempt {attempt + 1}: {create_error}")
|
| 167 |
+
self._consecutive_failures += 1
|
| 168 |
+
if attempt == max_retries - 1: # Last attempt
|
| 169 |
+
raise CodeExecutionError(f"Failed to create sandbox after {max_retries} attempts: {create_error}")
|
| 170 |
+
await asyncio.sleep(2 ** attempt) # Exponential backoff
|
| 171 |
+
else:
|
| 172 |
+
self._consecutive_failures += 1
|
| 173 |
+
raise CodeExecutionError("Could not obtain a live sandbox from the pool after all retry attempts.")
|
| 174 |
+
|
| 175 |
+
logger.info(f"Yielding sandbox of type from sandbox_pool: {type(pooled_sb.sandbox)}")
|
| 176 |
+
yield pooled_sb.sandbox
|
| 177 |
+
|
| 178 |
+
except Exception as e:
|
| 179 |
+
logger.error(f"Error getting sandbox: {e}")
|
| 180 |
+
self._stats["failures"] += 1
|
| 181 |
+
self._consecutive_failures += 1
|
| 182 |
+
raise CodeExecutionError(f"Failed to get sandbox: {e}")
|
| 183 |
+
finally:
|
| 184 |
+
if pooled_sb:
|
| 185 |
+
should_recycle = (
|
| 186 |
+
not created_new and
|
| 187 |
+
self._should_recycle_sandbox(pooled_sb) and
|
| 188 |
+
self._running
|
| 189 |
+
)
|
| 190 |
+
if should_recycle:
|
| 191 |
+
# Double-check sandbox is alive and functional before returning to pool
|
| 192 |
+
if await self._is_sandbox_alive(pooled_sb.sandbox):
|
| 193 |
+
# Additional check: try a quick execution to ensure sandbox is fully functional
|
| 194 |
+
try:
|
| 195 |
+
await asyncio.wait_for(
|
| 196 |
+
asyncio.get_event_loop().run_in_executor(
|
| 197 |
+
None,
|
| 198 |
+
lambda: pooled_sb.sandbox.exec("python", "-c", "import sys; print('ready')", timeout=2)
|
| 199 |
+
),
|
| 200 |
+
timeout=3.0
|
| 201 |
+
)
|
| 202 |
+
|
| 203 |
+
# Sandbox is healthy and functional - return to pool
|
| 204 |
+
try:
|
| 205 |
+
self._sandbox_queue.put_nowait(pooled_sb)
|
| 206 |
+
logger.debug("Returned healthy sandbox to pool")
|
| 207 |
+
except asyncio.QueueFull:
|
| 208 |
+
# Pool is full - terminate excess sandbox
|
| 209 |
+
await self._terminate_sandbox(pooled_sb.sandbox)
|
| 210 |
+
logger.debug("Pool full, terminated excess sandbox")
|
| 211 |
+
except Exception as e:
|
| 212 |
+
# Sandbox failed functional test - terminate it
|
| 213 |
+
logger.debug(f"Sandbox failed functional test, terminating: {e}")
|
| 214 |
+
await self._terminate_sandbox(pooled_sb.sandbox)
|
| 215 |
+
else:
|
| 216 |
+
# Sandbox is dead - terminate it
|
| 217 |
+
logger.debug("Sandbox is dead, terminating instead of recycling")
|
| 218 |
+
await self._terminate_sandbox(pooled_sb.sandbox)
|
| 219 |
+
else:
|
| 220 |
+
# Should not recycle - terminate sandbox
|
| 221 |
+
await self._terminate_sandbox(pooled_sb.sandbox)
|
| 222 |
+
if not created_new:
|
| 223 |
+
self._stats["recycled"] += 1
|
| 224 |
+
logger.debug("Terminated sandbox (exceeded recycle criteria)")
|
| 225 |
+
|
| 226 |
+
async def _create_sandbox(self) -> modal.Sandbox:
|
| 227 |
+
"""Create a new Modal sandbox with timeout protection."""
|
| 228 |
+
try:
|
| 229 |
+
# Add timeout protection for sandbox creation
|
| 230 |
+
sandbox_creation = asyncio.get_event_loop().run_in_executor(
|
| 231 |
+
None,
|
| 232 |
+
lambda: modal.Sandbox.create(
|
| 233 |
+
app=self.app,
|
| 234 |
+
image=self.image,
|
| 235 |
+
cpu=2.0,
|
| 236 |
+
memory=1024,
|
| 237 |
+
timeout=35
|
| 238 |
+
)
|
| 239 |
+
)
|
| 240 |
+
# Wait for sandbox creation with timeout
|
| 241 |
+
sandbox = await asyncio.wait_for(sandbox_creation, timeout=120) # 2 minute timeout
|
| 242 |
+
logger.debug(f"Created new sandbox of type: {type(sandbox)}")
|
| 243 |
+
return sandbox
|
| 244 |
+
except asyncio.TimeoutError:
|
| 245 |
+
logger.error("Sandbox creation timed out after 2 minutes")
|
| 246 |
+
raise Exception("Sandbox creation timed out - Modal may be experiencing issues")
|
| 247 |
+
except Exception as e:
|
| 248 |
+
logger.error(f"Failed to create sandbox: {e}")
|
| 249 |
+
raise
|
| 250 |
+
|
| 251 |
+
async def _terminate_sandbox(self, sandbox: modal.Sandbox):
|
| 252 |
+
"""Safely terminate a sandbox with better error handling."""
|
| 253 |
+
try:
|
| 254 |
+
# Check if sandbox is still responsive before termination
|
| 255 |
+
if hasattr(sandbox, '_terminated') and sandbox._terminated:
|
| 256 |
+
logger.debug("Sandbox already terminated")
|
| 257 |
+
return
|
| 258 |
+
|
| 259 |
+
# Use asyncio timeout for termination
|
| 260 |
+
await asyncio.wait_for(
|
| 261 |
+
asyncio.get_event_loop().run_in_executor(None, sandbox.terminate),
|
| 262 |
+
timeout=10.0 # 10 second timeout for termination
|
| 263 |
+
)
|
| 264 |
+
logger.debug("Terminated sandbox successfully")
|
| 265 |
+
except asyncio.TimeoutError:
|
| 266 |
+
logger.warning("Sandbox termination timed out - may be unresponsive")
|
| 267 |
+
except Exception as e:
|
| 268 |
+
# Log the error but don't fail - sandbox may already be dead
|
| 269 |
+
logger.warning(f"Failed to terminate sandbox (may already be dead): {e}")
|
| 270 |
+
# Mark sandbox as terminated to avoid repeated attempts
|
| 271 |
+
if hasattr(sandbox, '_terminated'):
|
| 272 |
+
sandbox._terminated = True
|
| 273 |
+
|
| 274 |
+
def _should_recycle_sandbox(self, pooled_sb: PooledSandbox) -> bool:
|
| 275 |
+
"""Determine if a sandbox should be recycled back to the pool."""
|
| 276 |
+
now = time.time()
|
| 277 |
+
|
| 278 |
+
# Check age
|
| 279 |
+
if now - pooled_sb.created_at > self.max_age_seconds:
|
| 280 |
+
logger.debug("Sandbox too old, not recycling")
|
| 281 |
+
return False
|
| 282 |
+
|
| 283 |
+
# Check usage count
|
| 284 |
+
if pooled_sb.use_count >= self.max_uses_per_sandbox:
|
| 285 |
+
logger.debug("Sandbox used too many times, not recycling")
|
| 286 |
+
return False
|
| 287 |
+
|
| 288 |
+
# Check health (if we've checked it)
|
| 289 |
+
if pooled_sb.health == SandboxHealth.UNHEALTHY:
|
| 290 |
+
logger.debug("Sandbox unhealthy, not recycling")
|
| 291 |
+
return False
|
| 292 |
+
|
| 293 |
+
return True
|
| 294 |
+
async def _warmup_pool(self):
|
| 295 |
+
"""Background task to maintain warm sandboxes in the pool with aggressive replenishment."""
|
| 296 |
+
while self._running:
|
| 297 |
+
try:
|
| 298 |
+
current_size = self._sandbox_queue.qsize()
|
| 299 |
+
|
| 300 |
+
# More aggressive warmup - start warming when below 90% capacity
|
| 301 |
+
warmup_threshold = max(1, int(self.pool_size * 0.9))
|
| 302 |
+
|
| 303 |
+
if current_size < warmup_threshold:
|
| 304 |
+
needed = self.pool_size - current_size
|
| 305 |
+
logger.info(f"Pool size ({current_size}) below threshold ({warmup_threshold}). Warming {needed} sandboxes...")
|
| 306 |
+
|
| 307 |
+
# Create new sandboxes to fill the pool - but limit concurrent creation
|
| 308 |
+
max_concurrent = min(needed, 2) # Don't overwhelm Modal
|
| 309 |
+
tasks = []
|
| 310 |
+
for _ in range(max_concurrent):
|
| 311 |
+
task = asyncio.create_task(self._create_and_queue_sandbox())
|
| 312 |
+
tasks.append(task)
|
| 313 |
+
|
| 314 |
+
if tasks:
|
| 315 |
+
results = await asyncio.gather(*tasks, return_exceptions=True)
|
| 316 |
+
# Log any failures
|
| 317 |
+
successful = 0
|
| 318 |
+
for i, result in enumerate(results):
|
| 319 |
+
if isinstance(result, Exception):
|
| 320 |
+
logger.warning(f"Failed to create sandbox {i+1}/{max_concurrent}: {result}")
|
| 321 |
+
else:
|
| 322 |
+
successful += 1
|
| 323 |
+
|
| 324 |
+
if successful > 0:
|
| 325 |
+
logger.info(f"Successfully warmed {successful}/{max_concurrent} sandboxes")
|
| 326 |
+
|
| 327 |
+
# Adaptive sleep interval based on pool health
|
| 328 |
+
if current_size == 0:
|
| 329 |
+
# Critical: no sandboxes available
|
| 330 |
+
sleep_interval = 1
|
| 331 |
+
elif current_size < warmup_threshold:
|
| 332 |
+
# Low: need more sandboxes
|
| 333 |
+
sleep_interval = 2
|
| 334 |
+
else:
|
| 335 |
+
# Healthy: normal monitoring
|
| 336 |
+
sleep_interval = 5
|
| 337 |
+
|
| 338 |
+
await asyncio.sleep(sleep_interval)
|
| 339 |
+
|
| 340 |
+
except Exception as e:
|
| 341 |
+
logger.error(f"Error in warmup loop: {e}")
|
| 342 |
+
await asyncio.sleep(10) # Wait longer on error
|
| 343 |
+
|
| 344 |
+
async def _create_and_queue_sandbox(self):
|
| 345 |
+
"""Create a sandbox and add it to the queue."""
|
| 346 |
+
start_time = time.time()
|
| 347 |
+
try:
|
| 348 |
+
# Create the sandbox
|
| 349 |
+
sandbox = await self._create_sandbox()
|
| 350 |
+
creation_time = time.time() - start_time
|
| 351 |
+
logger.info(f"Sandbox creation took {creation_time:.2f}s")
|
| 352 |
+
|
| 353 |
+
# Proactively warm up the sandbox with core imports
|
| 354 |
+
warmup_start = time.time()
|
| 355 |
+
await self._warmup_sandbox_imports(sandbox)
|
| 356 |
+
warmup_time = time.time() - warmup_start
|
| 357 |
+
logger.info(f"Sandbox warmup with imports took {warmup_time:.2f}s")
|
| 358 |
+
|
| 359 |
+
pooled_sb = PooledSandbox(
|
| 360 |
+
sandbox=sandbox,
|
| 361 |
+
created_at=time.time(),
|
| 362 |
+
last_used=time.time()
|
| 363 |
+
)
|
| 364 |
+
|
| 365 |
+
try:
|
| 366 |
+
self._sandbox_queue.put_nowait(pooled_sb)
|
| 367 |
+
total_time = time.time() - start_time
|
| 368 |
+
logger.info(f"Added warm sandbox to pool (total time: {total_time:.2f}s)")
|
| 369 |
+
except asyncio.QueueFull:
|
| 370 |
+
# Pool is full, terminate this sandbox
|
| 371 |
+
await self._terminate_sandbox(sandbox)
|
| 372 |
+
|
| 373 |
+
except Exception as e:
|
| 374 |
+
total_time = time.time() - start_time
|
| 375 |
+
logger.error(f"Failed to create and queue sandbox after {total_time:.2f}s: {e}")
|
| 376 |
+
|
| 377 |
+
async def _warmup_sandbox_imports(self, sandbox: modal.Sandbox):
|
| 378 |
+
"""Warm up sandbox by importing core packages."""
|
| 379 |
+
try:
|
| 380 |
+
from mcp_hub.package_utils import get_warmup_import_commands
|
| 381 |
+
|
| 382 |
+
# Get warmup commands
|
| 383 |
+
import_commands = get_warmup_import_commands()
|
| 384 |
+
warmup_script = "; ".join(import_commands)
|
| 385 |
+
|
| 386 |
+
# Execute the warmup script
|
| 387 |
+
logger.debug("Running sandbox warmup imports...")
|
| 388 |
+
proc = await asyncio.get_event_loop().run_in_executor(
|
| 389 |
+
None,
|
| 390 |
+
lambda: sandbox.exec("python", "-c", warmup_script, timeout=30)
|
| 391 |
+
)
|
| 392 |
+
|
| 393 |
+
# Check if warmup was successful
|
| 394 |
+
if hasattr(proc, 'stdout') and hasattr(proc.stdout, 'read'):
|
| 395 |
+
output = proc.stdout.read()
|
| 396 |
+
if "Core packages warmed up successfully" in output:
|
| 397 |
+
logger.debug("Sandbox warmup imports completed successfully")
|
| 398 |
+
else:
|
| 399 |
+
logger.warning(f"Sandbox warmup completed but output unexpected: {output}")
|
| 400 |
+
else:
|
| 401 |
+
logger.debug("Sandbox warmup imports completed")
|
| 402 |
+
|
| 403 |
+
except Exception as e:
|
| 404 |
+
logger.warning(f"Failed to warm up sandbox imports (sandbox still usable): {e}")
|
| 405 |
+
async def _health_check_loop(self):
|
| 406 |
+
"""Background task to check sandbox health and perform proactive cleanup."""
|
| 407 |
+
while self._running:
|
| 408 |
+
try:
|
| 409 |
+
# Perform regular health checks every interval
|
| 410 |
+
await asyncio.sleep(self.health_check_interval)
|
| 411 |
+
|
| 412 |
+
# First do a quick proactive cleanup
|
| 413 |
+
cleaned = await self._proactive_cleanup()
|
| 414 |
+
|
| 415 |
+
# Then do the full health check
|
| 416 |
+
await self._perform_health_checks()
|
| 417 |
+
|
| 418 |
+
# If we cleaned up sandboxes, trigger warmup
|
| 419 |
+
if cleaned > 0:
|
| 420 |
+
logger.info(f"Health check cleaned {cleaned} sandboxes, pool may need warming")
|
| 421 |
+
|
| 422 |
+
except Exception as e:
|
| 423 |
+
logger.error(f"Error in health check loop: {e}")
|
| 424 |
+
await asyncio.sleep(10) # Wait longer on error
|
| 425 |
+
|
| 426 |
+
async def _perform_health_checks(self):
|
| 427 |
+
"""Perform health checks on sandboxes in the pool."""
|
| 428 |
+
# This is a simplified health check - in practice you might want
|
| 429 |
+
# to run a simple command to verify the sandbox is responsive
|
| 430 |
+
temp_sandboxes = []
|
| 431 |
+
|
| 432 |
+
# Drain the queue to check each sandbox
|
| 433 |
+
while not self._sandbox_queue.empty():
|
| 434 |
+
try:
|
| 435 |
+
pooled_sb = self._sandbox_queue.get_nowait()
|
| 436 |
+
is_healthy = await self._check_sandbox_health(pooled_sb.sandbox)
|
| 437 |
+
pooled_sb.health = SandboxHealth.HEALTHY if is_healthy else SandboxHealth.UNHEALTHY
|
| 438 |
+
if is_healthy:
|
| 439 |
+
temp_sandboxes.append(pooled_sb)
|
| 440 |
+
else:
|
| 441 |
+
# TERMINATE unhealthy sandbox
|
| 442 |
+
await self._terminate_sandbox(pooled_sb.sandbox)
|
| 443 |
+
self._stats["recycled"] += 1
|
| 444 |
+
except asyncio.QueueEmpty:
|
| 445 |
+
break
|
| 446 |
+
|
| 447 |
+
# Put healthy sandboxes back
|
| 448 |
+
for pooled_sb in temp_sandboxes:
|
| 449 |
+
try:
|
| 450 |
+
self._sandbox_queue.put_nowait(pooled_sb)
|
| 451 |
+
except asyncio.QueueFull:
|
| 452 |
+
await self._terminate_sandbox(pooled_sb.sandbox)
|
| 453 |
+
|
| 454 |
+
self._stats["health_checks"] += 1
|
| 455 |
+
logger.debug(f"Health check completed. Pool size: {self._sandbox_queue.qsize()}")
|
| 456 |
+
|
| 457 |
+
async def _check_sandbox_health(self, sandbox: modal.Sandbox) -> bool:
|
| 458 |
+
"""Check if a sandbox is healthy."""
|
| 459 |
+
try:
|
| 460 |
+
# Run a simple Python command to check if the sandbox is responsive
|
| 461 |
+
proc = await asyncio.get_event_loop().run_in_executor(
|
| 462 |
+
None,
|
| 463 |
+
lambda: sandbox.exec("python", "-c", "print('health_check')", timeout=5)
|
| 464 |
+
)
|
| 465 |
+
output = proc.stdout.read()
|
| 466 |
+
return "health_check" in output
|
| 467 |
+
except Exception as e:
|
| 468 |
+
logger.debug(f"Sandbox health check failed: {e}")
|
| 469 |
+
return False
|
| 470 |
+
|
| 471 |
+
async def _cleanup_loop(self):
|
| 472 |
+
"""Background task to cleanup old sandboxes."""
|
| 473 |
+
while self._running:
|
| 474 |
+
try:
|
| 475 |
+
await asyncio.sleep(30) # Check every 30 seconds
|
| 476 |
+
await self._cleanup_old_sandboxes()
|
| 477 |
+
except Exception as e:
|
| 478 |
+
logger.error(f"Error in cleanup loop: {e}")
|
| 479 |
+
|
| 480 |
+
async def _cleanup_old_sandboxes(self):
|
| 481 |
+
"""Remove old sandboxes from the pool."""
|
| 482 |
+
now = time.time()
|
| 483 |
+
temp_sandboxes = []
|
| 484 |
+
|
| 485 |
+
while not self._sandbox_queue.empty():
|
| 486 |
+
try:
|
| 487 |
+
pooled_sb = self._sandbox_queue.get_nowait()
|
| 488 |
+
if now - pooled_sb.created_at < self.max_age_seconds:
|
| 489 |
+
temp_sandboxes.append(pooled_sb)
|
| 490 |
+
else:
|
| 491 |
+
# TERMINATE expired sandbox
|
| 492 |
+
await self._terminate_sandbox(pooled_sb.sandbox)
|
| 493 |
+
self._stats["recycled"] += 1
|
| 494 |
+
logger.debug("Cleaned up old sandbox")
|
| 495 |
+
except asyncio.QueueEmpty:
|
| 496 |
+
break
|
| 497 |
+
|
| 498 |
+
# Put non-expired sandboxes back
|
| 499 |
+
for pooled_sb in temp_sandboxes:
|
| 500 |
+
try:
|
| 501 |
+
self._sandbox_queue.put_nowait(pooled_sb)
|
| 502 |
+
except asyncio.QueueFull:
|
| 503 |
+
await self._terminate_sandbox(pooled_sb.sandbox)
|
| 504 |
+
|
| 505 |
+
async def _is_sandbox_alive(self, sandbox: modal.Sandbox) -> bool:
|
| 506 |
+
"""Check if a sandbox is alive by running a trivial command with better error handling."""
|
| 507 |
+
try:
|
| 508 |
+
# Check if sandbox was already marked as terminated
|
| 509 |
+
if hasattr(sandbox, '_terminated') and sandbox._terminated:
|
| 510 |
+
return False
|
| 511 |
+
|
| 512 |
+
# Use a shorter timeout for liveness checks
|
| 513 |
+
proc = await asyncio.wait_for(
|
| 514 |
+
asyncio.get_event_loop().run_in_executor(
|
| 515 |
+
None,
|
| 516 |
+
lambda: sandbox.exec("python", "-c", "print('ping')", timeout=3)
|
| 517 |
+
),
|
| 518 |
+
timeout=5.0 # Overall timeout
|
| 519 |
+
)
|
| 520 |
+
|
| 521 |
+
if hasattr(proc, "stdout") and hasattr(proc.stdout, "read"):
|
| 522 |
+
out = proc.stdout.read()
|
| 523 |
+
return "ping" in out
|
| 524 |
+
else:
|
| 525 |
+
# For some Modal versions, output might be returned directly
|
| 526 |
+
out = str(proc)
|
| 527 |
+
return "ping" in out
|
| 528 |
+
|
| 529 |
+
except asyncio.TimeoutError:
|
| 530 |
+
logger.debug("Liveness check timed out - sandbox likely dead")
|
| 531 |
+
return False
|
| 532 |
+
except Exception as e:
|
| 533 |
+
logger.debug(f"Liveness check failed: {e}")
|
| 534 |
+
# Mark sandbox as dead to avoid repeated checks
|
| 535 |
+
if hasattr(sandbox, '_terminated'):
|
| 536 |
+
sandbox._terminated = True
|
| 537 |
+
return False
|
| 538 |
+
|
| 539 |
+
async def _emergency_pool_reset(self):
|
| 540 |
+
"""Emergency reset of the pool when too many consecutive failures occur."""
|
| 541 |
+
logger.warning("Performing emergency pool reset due to consecutive failures")
|
| 542 |
+
|
| 543 |
+
# Drain and terminate all sandboxes in the pool
|
| 544 |
+
terminated_count = 0
|
| 545 |
+
while not self._sandbox_queue.empty():
|
| 546 |
+
try:
|
| 547 |
+
pooled_sb = self._sandbox_queue.get_nowait()
|
| 548 |
+
await self._terminate_sandbox(pooled_sb.sandbox)
|
| 549 |
+
terminated_count += 1
|
| 550 |
+
except asyncio.QueueEmpty:
|
| 551 |
+
break
|
| 552 |
+
|
| 553 |
+
logger.info(f"Emergency reset: terminated {terminated_count} sandboxes")
|
| 554 |
+
|
| 555 |
+
# Reset failure counter
|
| 556 |
+
self._consecutive_failures = 0
|
| 557 |
+
|
| 558 |
+
# Try to create one fresh sandbox to test if the underlying issue is resolved
|
| 559 |
+
try:
|
| 560 |
+
test_sandbox = await self._create_sandbox()
|
| 561 |
+
test_pooled = PooledSandbox(
|
| 562 |
+
sandbox=test_sandbox,
|
| 563 |
+
created_at=time.time(),
|
| 564 |
+
last_used=time.time(),
|
| 565 |
+
use_count=0
|
| 566 |
+
)
|
| 567 |
+
self._sandbox_queue.put_nowait(test_pooled)
|
| 568 |
+
logger.info("Emergency reset successful: created test sandbox")
|
| 569 |
+
except Exception as e:
|
| 570 |
+
logger.error(f"Emergency reset failed to create test sandbox: {e}")
|
| 571 |
+
# Still reset the counter to allow retries
|
| 572 |
+
pass
|
| 573 |
+
|
| 574 |
+
def get_stats(self) -> Dict[str, Any]:
|
| 575 |
+
"""Get pool statistics including health metrics."""
|
| 576 |
+
return {
|
| 577 |
+
**self._stats,
|
| 578 |
+
"pool_size": self._sandbox_queue.qsize(),
|
| 579 |
+
"target_pool_size": self.pool_size,
|
| 580 |
+
"running": self._running,
|
| 581 |
+
"consecutive_failures": self._consecutive_failures,
|
| 582 |
+
"last_successful_creation": self._last_successful_creation,
|
| 583 |
+
"time_since_last_success": time.time() - self._last_successful_creation,
|
| 584 |
+
"health_status": "healthy" if self._consecutive_failures < 3 else "degraded" if self._consecutive_failures < self._pool_reset_threshold else "critical"
|
| 585 |
+
}
|
| 586 |
+
|
| 587 |
+
async def _proactive_cleanup(self):
|
| 588 |
+
"""Proactively clean up dead or unhealthy sandboxes from the pool."""
|
| 589 |
+
temp_sandboxes = []
|
| 590 |
+
cleaned_count = 0
|
| 591 |
+
|
| 592 |
+
# Drain the queue to check each sandbox
|
| 593 |
+
while not self._sandbox_queue.empty():
|
| 594 |
+
try:
|
| 595 |
+
pooled_sb = self._sandbox_queue.get_nowait()
|
| 596 |
+
|
| 597 |
+
# Quick health check
|
| 598 |
+
if await self._is_sandbox_alive(pooled_sb.sandbox):
|
| 599 |
+
# Sandbox is alive - keep it
|
| 600 |
+
temp_sandboxes.append(pooled_sb)
|
| 601 |
+
else:
|
| 602 |
+
# Sandbox is dead - terminate it
|
| 603 |
+
await self._terminate_sandbox(pooled_sb.sandbox)
|
| 604 |
+
cleaned_count += 1
|
| 605 |
+
logger.debug("Cleaned up dead sandbox during proactive cleanup")
|
| 606 |
+
|
| 607 |
+
except asyncio.QueueEmpty:
|
| 608 |
+
break
|
| 609 |
+
|
| 610 |
+
# Put healthy sandboxes back
|
| 611 |
+
for pooled_sb in temp_sandboxes:
|
| 612 |
+
try:
|
| 613 |
+
self._sandbox_queue.put_nowait(pooled_sb)
|
| 614 |
+
except asyncio.QueueFull:
|
| 615 |
+
# Shouldn't happen, but terminate if it does
|
| 616 |
+
await self._terminate_sandbox(pooled_sb.sandbox)
|
| 617 |
+
cleaned_count += 1
|
| 618 |
+
|
| 619 |
+
if cleaned_count > 0:
|
| 620 |
+
logger.info(f"Proactive cleanup removed {cleaned_count} dead sandboxes")
|
| 621 |
+
|
| 622 |
+
return cleaned_count
|
| 623 |
+
|
| 624 |
+
# Helper function for testing and debugging the sandbox pool
|
| 625 |
+
async def test_sandbox_pool_health(pool: WarmSandboxPool) -> Dict[str, Any]:
|
| 626 |
+
"""Test sandbox pool health and return detailed diagnostics."""
|
| 627 |
+
diagnostics: Dict[str, Any] = {
|
| 628 |
+
"timestamp": time.time(),
|
| 629 |
+
"pool_stats": pool.get_stats(),
|
| 630 |
+
"tests": {}
|
| 631 |
+
}
|
| 632 |
+
|
| 633 |
+
logger.info("Starting sandbox pool health test...")
|
| 634 |
+
|
| 635 |
+
# Test 1: Pool basic stats
|
| 636 |
+
stats = pool.get_stats()
|
| 637 |
+
diagnostics["tests"]["pool_stats"] = {
|
| 638 |
+
"passed": True,
|
| 639 |
+
"details": stats
|
| 640 |
+
}
|
| 641 |
+
|
| 642 |
+
# Test 2: Try to get a sandbox
|
| 643 |
+
try:
|
| 644 |
+
async with pool.get_sandbox(timeout=10.0) as sandbox:
|
| 645 |
+
# Test 3: Try to run a simple command
|
| 646 |
+
try:
|
| 647 |
+
proc = await asyncio.get_event_loop().run_in_executor(
|
| 648 |
+
None,
|
| 649 |
+
lambda: sandbox.exec("python", "-c", "print('health_test_ok')", timeout=5)
|
| 650 |
+
)
|
| 651 |
+
output = proc.stdout.read() if hasattr(proc.stdout, "read") else str(proc)
|
| 652 |
+
|
| 653 |
+
diagnostics["tests"]["sandbox_execution"] = {
|
| 654 |
+
"passed": "health_test_ok" in output,
|
| 655 |
+
"output": output[:200], # First 200 chars
|
| 656 |
+
"details": "Successfully executed test command"
|
| 657 |
+
}
|
| 658 |
+
except Exception as e:
|
| 659 |
+
diagnostics["tests"]["sandbox_execution"] = {
|
| 660 |
+
"passed": False,
|
| 661 |
+
"error": str(e),
|
| 662 |
+
"details": "Failed to execute test command in sandbox"
|
| 663 |
+
}
|
| 664 |
+
|
| 665 |
+
diagnostics["tests"]["sandbox_acquisition"] = {
|
| 666 |
+
"passed": True,
|
| 667 |
+
"details": "Successfully acquired and released sandbox"
|
| 668 |
+
}
|
| 669 |
+
|
| 670 |
+
except Exception as e:
|
| 671 |
+
diagnostics["tests"]["sandbox_acquisition"] = {
|
| 672 |
+
"passed": False,
|
| 673 |
+
"error": str(e),
|
| 674 |
+
"details": "Failed to acquire sandbox from pool"
|
| 675 |
+
}
|
| 676 |
+
|
| 677 |
+
diagnostics["tests"]["sandbox_execution"] = {
|
| 678 |
+
"passed": False,
|
| 679 |
+
"error": "Could not test - no sandbox available",
|
| 680 |
+
"details": "Skipped due to sandbox acquisition failure"
|
| 681 |
+
}
|
| 682 |
+
|
| 683 |
+
# Test 4: Check pool warmup status
|
| 684 |
+
if pool._running:
|
| 685 |
+
warmup_needed = pool.pool_size - stats["pool_size"]
|
| 686 |
+
diagnostics["tests"]["pool_warmup"] = {
|
| 687 |
+
"passed": warmup_needed <= 1, # Allow 1 sandbox to be missing
|
| 688 |
+
"details": f"Pool has {stats['pool_size']}/{pool.pool_size} sandboxes, {warmup_needed} needed"
|
| 689 |
+
}
|
| 690 |
+
else:
|
| 691 |
+
diagnostics["tests"]["pool_warmup"] = {
|
| 692 |
+
"passed": False,
|
| 693 |
+
"details": "Pool is not running"
|
| 694 |
+
}
|
| 695 |
+
|
| 696 |
+
# Overall health assessment
|
| 697 |
+
all_tests_passed = all(test.get("passed", False) for test in diagnostics["tests"].values())
|
| 698 |
+
diagnostics["overall_health"] = "healthy" if all_tests_passed else "unhealthy"
|
| 699 |
+
|
| 700 |
+
logger.info(f"Sandbox pool health test completed. Overall health: {diagnostics['overall_health']}")
|
| 701 |
+
return diagnostics
|
mcp_hub/utils.py
ADDED
|
@@ -0,0 +1,439 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Utility functions for the MCP Hub project."""
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
import re
|
| 5 |
+
from typing import Dict, Any, List, Optional, Union
|
| 6 |
+
from openai import OpenAI, AsyncOpenAI
|
| 7 |
+
from .config import api_config, model_config
|
| 8 |
+
from .exceptions import APIError, ValidationError
|
| 9 |
+
from .logging_config import logger
|
| 10 |
+
import aiohttp
|
| 11 |
+
from huggingface_hub import InferenceClient
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def create_nebius_client() -> OpenAI:
|
| 15 |
+
"""Create and return a Nebius OpenAI client."""
|
| 16 |
+
return OpenAI(
|
| 17 |
+
base_url=api_config.nebius_base_url,
|
| 18 |
+
api_key=api_config.nebius_api_key,
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
def create_async_nebius_client() -> AsyncOpenAI:
|
| 22 |
+
"""Create and return an async Nebius OpenAI client."""
|
| 23 |
+
return AsyncOpenAI(
|
| 24 |
+
base_url=api_config.nebius_base_url,
|
| 25 |
+
api_key=api_config.nebius_api_key,
|
| 26 |
+
)
|
| 27 |
+
|
| 28 |
+
def create_llm_client() -> Union[OpenAI, object]:
|
| 29 |
+
"""Create and return an LLM client based on the configured provider."""
|
| 30 |
+
if api_config.llm_provider == "nebius":
|
| 31 |
+
return create_nebius_client()
|
| 32 |
+
elif api_config.llm_provider == "openai":
|
| 33 |
+
return OpenAI(api_key=api_config.openai_api_key)
|
| 34 |
+
elif api_config.llm_provider == "anthropic":
|
| 35 |
+
try:
|
| 36 |
+
import anthropic
|
| 37 |
+
return anthropic.Anthropic(api_key=api_config.anthropic_api_key)
|
| 38 |
+
except ImportError:
|
| 39 |
+
raise APIError("Anthropic", "anthropic package not installed. Install with: pip install anthropic")
|
| 40 |
+
elif api_config.llm_provider == "huggingface":
|
| 41 |
+
# Try different HuggingFace client configurations for better compatibility
|
| 42 |
+
try:
|
| 43 |
+
# First try with hf-inference provider (most recent approach)
|
| 44 |
+
return InferenceClient(
|
| 45 |
+
provider="hf-inference",
|
| 46 |
+
api_key=api_config.huggingface_api_key,
|
| 47 |
+
)
|
| 48 |
+
except Exception:
|
| 49 |
+
# Fallback to token-based authentication
|
| 50 |
+
return InferenceClient(
|
| 51 |
+
token=api_config.huggingface_api_key,
|
| 52 |
+
)
|
| 53 |
+
else:
|
| 54 |
+
raise APIError("Config", f"Unsupported LLM provider: {api_config.llm_provider}")
|
| 55 |
+
|
| 56 |
+
def create_async_llm_client() -> Union[AsyncOpenAI, object]:
|
| 57 |
+
"""Create and return an async LLM client based on the configured provider."""
|
| 58 |
+
if api_config.llm_provider == "nebius":
|
| 59 |
+
return create_async_nebius_client()
|
| 60 |
+
elif api_config.llm_provider == "openai":
|
| 61 |
+
return AsyncOpenAI(api_key=api_config.openai_api_key)
|
| 62 |
+
elif api_config.llm_provider == "anthropic":
|
| 63 |
+
try:
|
| 64 |
+
import anthropic
|
| 65 |
+
return anthropic.AsyncAnthropic(api_key=api_config.anthropic_api_key)
|
| 66 |
+
except ImportError:
|
| 67 |
+
raise APIError("Anthropic", "anthropic package not installed. Install with: pip install anthropic")
|
| 68 |
+
elif api_config.llm_provider == "huggingface":
|
| 69 |
+
# Try different HuggingFace client configurations for better compatibility
|
| 70 |
+
try:
|
| 71 |
+
# First try with hf-inference provider (most recent approach)
|
| 72 |
+
return InferenceClient(
|
| 73 |
+
provider="hf-inference",
|
| 74 |
+
api_key=api_config.huggingface_api_key,
|
| 75 |
+
)
|
| 76 |
+
except Exception:
|
| 77 |
+
# Fallback to token-based authentication
|
| 78 |
+
return InferenceClient(
|
| 79 |
+
token=api_config.huggingface_api_key,
|
| 80 |
+
)
|
| 81 |
+
else:
|
| 82 |
+
raise APIError("Config", f"Unsupported LLM provider: {api_config.llm_provider}")
|
| 83 |
+
|
| 84 |
+
def validate_non_empty_string(value: str, field_name: str) -> None:
|
| 85 |
+
"""Validate that a string is not empty or None."""
|
| 86 |
+
if not value or not value.strip():
|
| 87 |
+
raise ValidationError(f"{field_name} cannot be empty.")
|
| 88 |
+
|
| 89 |
+
def extract_json_from_text(text: str) -> Dict[str, Any]:
|
| 90 |
+
"""Extract JSON object from text that may contain markdown fences."""
|
| 91 |
+
# Remove markdown code fences if present
|
| 92 |
+
if text.startswith("```"):
|
| 93 |
+
parts = text.split("```")
|
| 94 |
+
if len(parts) >= 3:
|
| 95 |
+
text = parts[1].strip()
|
| 96 |
+
else:
|
| 97 |
+
text = text.strip("```").strip()
|
| 98 |
+
|
| 99 |
+
# Find JSON object boundaries
|
| 100 |
+
start_idx = text.find("{")
|
| 101 |
+
end_idx = text.rfind("}")
|
| 102 |
+
|
| 103 |
+
if start_idx == -1 or end_idx == -1 or end_idx < start_idx:
|
| 104 |
+
raise ValidationError("Failed to locate JSON object in text.")
|
| 105 |
+
|
| 106 |
+
json_candidate = text[start_idx:end_idx + 1]
|
| 107 |
+
|
| 108 |
+
try:
|
| 109 |
+
return json.loads(json_candidate)
|
| 110 |
+
except json.JSONDecodeError as e:
|
| 111 |
+
raise ValidationError(f"Failed to parse JSON: {str(e)}")
|
| 112 |
+
|
| 113 |
+
def extract_urls_from_text(text: str) -> List[str]:
|
| 114 |
+
"""Extract URLs from text using regex."""
|
| 115 |
+
url_pattern = r"(https?://[^\s]+)"
|
| 116 |
+
return re.findall(url_pattern, text)
|
| 117 |
+
|
| 118 |
+
def make_nebius_completion(
|
| 119 |
+
model: str,
|
| 120 |
+
messages: List[Dict[str, str]],
|
| 121 |
+
temperature: float = 0.6,
|
| 122 |
+
response_format: Optional[Dict[str, Any]] = None
|
| 123 |
+
) -> str:
|
| 124 |
+
"""Make a completion request to Nebius and return the content."""
|
| 125 |
+
client = create_nebius_client()
|
| 126 |
+
|
| 127 |
+
try:
|
| 128 |
+
kwargs = {
|
| 129 |
+
"model": model,
|
| 130 |
+
"messages": messages,
|
| 131 |
+
"temperature": temperature,
|
| 132 |
+
}
|
| 133 |
+
|
| 134 |
+
if response_format:
|
| 135 |
+
kwargs["response_format"] = response_format
|
| 136 |
+
|
| 137 |
+
completion = client.chat.completions.create(**kwargs)
|
| 138 |
+
return completion.choices[0].message.content.strip()
|
| 139 |
+
except Exception as e:
|
| 140 |
+
raise APIError("Nebius", str(e))
|
| 141 |
+
|
| 142 |
+
async def make_async_nebius_completion(
|
| 143 |
+
model: str,
|
| 144 |
+
messages: List[Dict[str, Any]],
|
| 145 |
+
temperature: float = 0.0,
|
| 146 |
+
response_format: Optional[Dict[str, Any]] = None,
|
| 147 |
+
) -> str:
|
| 148 |
+
"""Make an async completion request to Nebius API."""
|
| 149 |
+
try:
|
| 150 |
+
client = create_async_nebius_client()
|
| 151 |
+
|
| 152 |
+
kwargs = {
|
| 153 |
+
"model": model,
|
| 154 |
+
"messages": messages,
|
| 155 |
+
"temperature": temperature
|
| 156 |
+
}
|
| 157 |
+
|
| 158 |
+
if response_format:
|
| 159 |
+
kwargs["response_format"] = response_format
|
| 160 |
+
|
| 161 |
+
response = await client.chat.completions.create(**kwargs)
|
| 162 |
+
|
| 163 |
+
if not response.choices:
|
| 164 |
+
raise APIError("Nebius", "No completion choices returned")
|
| 165 |
+
|
| 166 |
+
content = response.choices[0].message.content
|
| 167 |
+
if content is None:
|
| 168 |
+
raise APIError("Nebius", "Empty response content")
|
| 169 |
+
|
| 170 |
+
return content.strip()
|
| 171 |
+
|
| 172 |
+
except Exception as e:
|
| 173 |
+
if isinstance(e, APIError):
|
| 174 |
+
raise
|
| 175 |
+
raise APIError("Nebius", f"API call failed: {str(e)}")
|
| 176 |
+
|
| 177 |
+
def make_llm_completion(
|
| 178 |
+
model: str,
|
| 179 |
+
messages: List[Dict[str, str]],
|
| 180 |
+
temperature: float = 0.6,
|
| 181 |
+
response_format: Optional[Dict[str, Any]] = None
|
| 182 |
+
) -> str:
|
| 183 |
+
"""Make a completion request using the configured LLM provider."""
|
| 184 |
+
provider = api_config.llm_provider
|
| 185 |
+
|
| 186 |
+
try:
|
| 187 |
+
if provider == "nebius":
|
| 188 |
+
return make_nebius_completion(model, messages, temperature, response_format)
|
| 189 |
+
|
| 190 |
+
elif provider == "openai":
|
| 191 |
+
client = create_llm_client()
|
| 192 |
+
kwargs = {
|
| 193 |
+
"model": model,
|
| 194 |
+
"messages": messages,
|
| 195 |
+
"temperature": temperature,
|
| 196 |
+
}
|
| 197 |
+
# OpenAI only supports simple response_format, not the extended Nebius format
|
| 198 |
+
if response_format and response_format.get("type") == "json_object":
|
| 199 |
+
kwargs["response_format"] = {"type": "json_object"}
|
| 200 |
+
completion = client.chat.completions.create(**kwargs)
|
| 201 |
+
return completion.choices[0].message.content.strip()
|
| 202 |
+
|
| 203 |
+
elif provider == "anthropic":
|
| 204 |
+
client = create_llm_client()
|
| 205 |
+
# Convert OpenAI format to Anthropic format
|
| 206 |
+
anthropic_messages = []
|
| 207 |
+
system_message = None
|
| 208 |
+
|
| 209 |
+
for msg in messages:
|
| 210 |
+
if msg["role"] == "system":
|
| 211 |
+
system_message = msg["content"]
|
| 212 |
+
else:
|
| 213 |
+
anthropic_messages.append({
|
| 214 |
+
"role": msg["role"],
|
| 215 |
+
"content": msg["content"]
|
| 216 |
+
})
|
| 217 |
+
|
| 218 |
+
kwargs = {
|
| 219 |
+
"model": model,
|
| 220 |
+
"messages": anthropic_messages,
|
| 221 |
+
"temperature": temperature,
|
| 222 |
+
"max_tokens": 1000,
|
| 223 |
+
}
|
| 224 |
+
if system_message:
|
| 225 |
+
kwargs["system"] = system_message
|
| 226 |
+
|
| 227 |
+
response = client.messages.create(**kwargs)
|
| 228 |
+
return response.content[0].text.strip()
|
| 229 |
+
|
| 230 |
+
elif provider == "huggingface":
|
| 231 |
+
# Try HuggingFace with fallback to Nebius
|
| 232 |
+
hf_error = None
|
| 233 |
+
try:
|
| 234 |
+
client = create_llm_client()
|
| 235 |
+
|
| 236 |
+
# Try multiple HuggingFace API approaches
|
| 237 |
+
|
| 238 |
+
# Method 1: Try chat.completions.create (OpenAI-compatible)
|
| 239 |
+
try:
|
| 240 |
+
response = client.chat.completions.create(
|
| 241 |
+
model=model,
|
| 242 |
+
messages=messages,
|
| 243 |
+
temperature=temperature,
|
| 244 |
+
max_tokens=1000,
|
| 245 |
+
)
|
| 246 |
+
|
| 247 |
+
# Extract the response content
|
| 248 |
+
if hasattr(response, 'choices') and response.choices:
|
| 249 |
+
return response.choices[0].message.content.strip()
|
| 250 |
+
else:
|
| 251 |
+
return str(response).strip()
|
| 252 |
+
|
| 253 |
+
except Exception as e1:
|
| 254 |
+
hf_error = e1
|
| 255 |
+
|
| 256 |
+
# Method 2: Try chat_completion method (HuggingFace native)
|
| 257 |
+
try:
|
| 258 |
+
response = client.chat_completion(
|
| 259 |
+
messages=messages,
|
| 260 |
+
model=model,
|
| 261 |
+
temperature=temperature,
|
| 262 |
+
max_tokens=1000,
|
| 263 |
+
)
|
| 264 |
+
|
| 265 |
+
# Handle different response formats
|
| 266 |
+
if hasattr(response, 'generated_text'):
|
| 267 |
+
return response.generated_text.strip()
|
| 268 |
+
elif isinstance(response, dict) and 'generated_text' in response:
|
| 269 |
+
return response['generated_text'].strip()
|
| 270 |
+
elif isinstance(response, list) and len(response) > 0:
|
| 271 |
+
if isinstance(response[0], dict) and 'generated_text' in response[0]:
|
| 272 |
+
return response[0]['generated_text'].strip()
|
| 273 |
+
|
| 274 |
+
return str(response).strip()
|
| 275 |
+
|
| 276 |
+
except Exception as e2:
|
| 277 |
+
# Both HuggingFace methods failed
|
| 278 |
+
hf_error = f"Method 1: {str(e1)}. Method 2: {str(e2)}"
|
| 279 |
+
raise APIError("HuggingFace", f"All HuggingFace methods failed. {hf_error}")
|
| 280 |
+
|
| 281 |
+
except Exception as e:
|
| 282 |
+
# HuggingFace failed, try fallback to Nebius
|
| 283 |
+
if hf_error is None:
|
| 284 |
+
hf_error = str(e)
|
| 285 |
+
logger.warning(f"HuggingFace API failed: {hf_error}, falling back to Nebius")
|
| 286 |
+
|
| 287 |
+
try:
|
| 288 |
+
# Use Nebius model appropriate for the task
|
| 289 |
+
nebius_model = model_config.get_model_for_provider("question_enhancer", "nebius")
|
| 290 |
+
return make_nebius_completion(nebius_model, messages, temperature, response_format)
|
| 291 |
+
except Exception as nebius_error:
|
| 292 |
+
raise APIError("HuggingFace", f"HuggingFace failed: {hf_error}. Nebius fallback also failed: {str(nebius_error)}")
|
| 293 |
+
|
| 294 |
+
else:
|
| 295 |
+
raise APIError("Config", f"Unsupported LLM provider: {provider}")
|
| 296 |
+
|
| 297 |
+
except Exception as e:
|
| 298 |
+
raise APIError(provider.title(), f"Completion failed: {str(e)}")
|
| 299 |
+
|
| 300 |
+
|
| 301 |
+
async def make_async_llm_completion(
|
| 302 |
+
model: str,
|
| 303 |
+
messages: List[Dict[str, Any]],
|
| 304 |
+
temperature: float = 0.0,
|
| 305 |
+
response_format: Optional[Dict[str, Any]] = None,
|
| 306 |
+
) -> str:
|
| 307 |
+
"""Make an async completion request using the configured LLM provider."""
|
| 308 |
+
provider = api_config.llm_provider
|
| 309 |
+
|
| 310 |
+
try:
|
| 311 |
+
if provider == "nebius":
|
| 312 |
+
return await make_async_nebius_completion(model, messages, temperature, response_format)
|
| 313 |
+
|
| 314 |
+
elif provider == "openai":
|
| 315 |
+
client = create_async_llm_client()
|
| 316 |
+
kwargs = {
|
| 317 |
+
"model": model,
|
| 318 |
+
"messages": messages,
|
| 319 |
+
"temperature": temperature
|
| 320 |
+
}
|
| 321 |
+
if response_format and response_format.get("type") == "json_object":
|
| 322 |
+
kwargs["response_format"] = {"type": "json_object"}
|
| 323 |
+
|
| 324 |
+
response = await client.chat.completions.create(**kwargs)
|
| 325 |
+
|
| 326 |
+
if not response.choices:
|
| 327 |
+
raise APIError("OpenAI", "No completion choices returned")
|
| 328 |
+
|
| 329 |
+
content = response.choices[0].message.content
|
| 330 |
+
if content is None:
|
| 331 |
+
raise APIError("OpenAI", "Empty response content")
|
| 332 |
+
|
| 333 |
+
return content.strip()
|
| 334 |
+
|
| 335 |
+
elif provider == "anthropic":
|
| 336 |
+
client = create_async_llm_client()
|
| 337 |
+
anthropic_messages = []
|
| 338 |
+
system_message = None
|
| 339 |
+
|
| 340 |
+
for msg in messages:
|
| 341 |
+
if msg["role"] == "system":
|
| 342 |
+
system_message = msg["content"]
|
| 343 |
+
else:
|
| 344 |
+
anthropic_messages.append({
|
| 345 |
+
"role": msg["role"],
|
| 346 |
+
"content": msg["content"]
|
| 347 |
+
})
|
| 348 |
+
|
| 349 |
+
kwargs = {
|
| 350 |
+
"model": model,
|
| 351 |
+
"messages": anthropic_messages,
|
| 352 |
+
"temperature": temperature,
|
| 353 |
+
"max_tokens": 1000,
|
| 354 |
+
}
|
| 355 |
+
if system_message:
|
| 356 |
+
kwargs["system"] = system_message
|
| 357 |
+
|
| 358 |
+
response = await client.messages.create(**kwargs)
|
| 359 |
+
return response.content[0].text.strip()
|
| 360 |
+
|
| 361 |
+
elif provider == "huggingface":
|
| 362 |
+
# HuggingFace doesn't support async, fallback to Nebius
|
| 363 |
+
logger.warning("HuggingFace does not support async operations, falling back to Nebius")
|
| 364 |
+
|
| 365 |
+
try:
|
| 366 |
+
# Use Nebius model appropriate for the task
|
| 367 |
+
nebius_model = model_config.get_model_for_provider("question_enhancer", "nebius")
|
| 368 |
+
return await make_async_nebius_completion(nebius_model, messages, temperature, response_format)
|
| 369 |
+
except Exception as nebius_error:
|
| 370 |
+
raise APIError("HuggingFace", f"HuggingFace async not supported. Nebius fallback failed: {str(nebius_error)}")
|
| 371 |
+
|
| 372 |
+
else:
|
| 373 |
+
raise APIError("Config", f"Unsupported LLM provider: {provider}")
|
| 374 |
+
|
| 375 |
+
except Exception as e:
|
| 376 |
+
raise APIError(provider.title(), f"Async completion failed: {str(e)}")
|
| 377 |
+
|
| 378 |
+
async def async_tavily_search(query: str, max_results: int = 3) -> Dict[str, Any]:
|
| 379 |
+
"""Perform async web search using Tavily API."""
|
| 380 |
+
try:
|
| 381 |
+
async with aiohttp.ClientSession() as session:
|
| 382 |
+
url = "https://api.tavily.com/search"
|
| 383 |
+
headers = {
|
| 384 |
+
"Content-Type": "application/json"
|
| 385 |
+
}
|
| 386 |
+
data = {
|
| 387 |
+
"api_key": api_config.tavily_api_key,
|
| 388 |
+
"query": query,
|
| 389 |
+
"search_depth": "basic",
|
| 390 |
+
"max_results": max_results,
|
| 391 |
+
"include_answer": True
|
| 392 |
+
}
|
| 393 |
+
|
| 394 |
+
async with session.post(url, headers=headers, json=data) as response:
|
| 395 |
+
if response.status != 200:
|
| 396 |
+
raise APIError("Tavily", f"HTTP {response.status}: {await response.text()}")
|
| 397 |
+
|
| 398 |
+
result = await response.json()
|
| 399 |
+
return {
|
| 400 |
+
"query": result.get("query", query),
|
| 401 |
+
"tavily_answer": result.get("answer"),
|
| 402 |
+
"results": result.get("results", []),
|
| 403 |
+
"data_source": "Tavily Search API",
|
| 404 |
+
}
|
| 405 |
+
|
| 406 |
+
except aiohttp.ClientError as e:
|
| 407 |
+
raise APIError("Tavily", f"HTTP request failed: {str(e)}")
|
| 408 |
+
except Exception as e:
|
| 409 |
+
if isinstance(e, APIError):
|
| 410 |
+
raise
|
| 411 |
+
raise APIError("Tavily", f"Search failed: {str(e)}")
|
| 412 |
+
|
| 413 |
+
def format_search_results(results: List[Dict[str, Any]]) -> str:
|
| 414 |
+
"""Format search results into a readable string."""
|
| 415 |
+
if not results:
|
| 416 |
+
return "No search results found."
|
| 417 |
+
|
| 418 |
+
snippets = []
|
| 419 |
+
for idx, item in enumerate(results, 1):
|
| 420 |
+
title = item.get("title", "No Title")
|
| 421 |
+
url = item.get("url", "")
|
| 422 |
+
content = item.get("content", "")
|
| 423 |
+
|
| 424 |
+
snippet = f"Result {idx}:\nTitle: {title}\nURL: {url}\nSnippet: {content}\n"
|
| 425 |
+
snippets.append(snippet)
|
| 426 |
+
|
| 427 |
+
return "\n".join(snippets).strip()
|
| 428 |
+
|
| 429 |
+
def create_apa_citation(url: str, year: str = None) -> str:
|
| 430 |
+
"""Create a simple APA-style citation from a URL."""
|
| 431 |
+
if not year:
|
| 432 |
+
year = api_config.current_year
|
| 433 |
+
|
| 434 |
+
try:
|
| 435 |
+
domain = url.split("/")[2]
|
| 436 |
+
title = domain.replace("www.", "").split(".")[0].capitalize()
|
| 437 |
+
return f"{title}. ({year}). Retrieved from {url}"
|
| 438 |
+
except (IndexError, AttributeError):
|
| 439 |
+
return f"Unknown Source. ({year}). Retrieved from {url}"
|
pyproject.toml
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[project]
|
| 2 |
+
name = "mcp-hub-project"
|
| 3 |
+
version = "0.2.0"
|
| 4 |
+
description = "Advanced MCP Hub with Inter-Agent Communication and Performance Monitoring"
|
| 5 |
+
readme = "README.md"
|
| 6 |
+
requires-python = ">=3.12"
|
| 7 |
+
dependencies = [
|
| 8 |
+
"gradio-client>=1.10.2",
|
| 9 |
+
"gradio[mcp]>=5.33.0",
|
| 10 |
+
"modal>=1.0.2",
|
| 11 |
+
"openai>=1.84.0",
|
| 12 |
+
"tavily-python>=0.7.4",
|
| 13 |
+
"python-dotenv>=1.0.0",
|
| 14 |
+
"psutil>=5.9.0",
|
| 15 |
+
"aiohttp>=3.8.0",
|
| 16 |
+
"anthropic>=0.52.2",
|
| 17 |
+
"huggingface>=0.0.1",
|
| 18 |
+
"huggingface-hub>=0.32.4",
|
| 19 |
+
]
|
| 20 |
+
|
| 21 |
+
[project.optional-dependencies]
|
| 22 |
+
dev = [
|
| 23 |
+
"pytest>=7.4.0",
|
| 24 |
+
"pytest-cov>=4.1.0",
|
| 25 |
+
"black>=23.0.0",
|
| 26 |
+
"isort>=5.12.0",
|
| 27 |
+
"mypy>=1.5.0",
|
| 28 |
+
]
|
pytest.ini
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[pytest]
|
| 2 |
+
minversion = 6.0
|
| 3 |
+
addopts = -ra --strict-markers --strict-config --cov=app --cov=mcp_hub --cov-report=term-missing --cov-report=html:htmlcov --cov-branch
|
| 4 |
+
testpaths = tests
|
| 5 |
+
markers =
|
| 6 |
+
unit: Unit tests
|
| 7 |
+
integration: Integration tests
|
| 8 |
+
async_test: Async test cases
|
| 9 |
+
slow: Slow running tests
|
| 10 |
+
requires_api: Tests that need API keys
|
| 11 |
+
asyncio_mode = auto
|
requirements.txt
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio-client>=1.10.2,
|
| 2 |
+
gradio[mcp]>=5.33.0,
|
| 3 |
+
modal>=1.0.2
|
| 4 |
+
openai>=1.84.0
|
| 5 |
+
tavily-python>=0.7.4
|
| 6 |
+
python-dotenv>=1.0.0
|
| 7 |
+
psutil>=5.9.0
|
| 8 |
+
aiohttp>=3.8.0
|
| 9 |
+
anthropic>=0.52.2
|
| 10 |
+
huggingface>=0.0.1
|
| 11 |
+
huggingface-hub>=0.32.4
|
tests/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""Test package for MCP Hub."""
|
tests/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (199 Bytes). View file
|
|
|
tests/__pycache__/conftest.cpython-312-pytest-8.4.0.pyc
ADDED
|
Binary file (5.85 kB). View file
|
|
|
tests/conftest.py
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Common test fixtures and configuration."""
|
| 2 |
+
|
| 3 |
+
import pytest
|
| 4 |
+
import asyncio
|
| 5 |
+
import os
|
| 6 |
+
from unittest.mock import Mock, MagicMock, patch
|
| 7 |
+
from typing import Dict, Any, Generator
|
| 8 |
+
|
| 9 |
+
# Mock environment variables for testing - set them globally before any imports
|
| 10 |
+
TEST_ENV_VARS = {
|
| 11 |
+
"TAVILY_API_KEY": "tvly-test-key-12345",
|
| 12 |
+
"NEBIUS_API_KEY": "test-nebius-key",
|
| 13 |
+
"OPENAI_API_KEY": "test-openai-key",
|
| 14 |
+
"ANTHROPIC_API_KEY": "test-anthropic-key",
|
| 15 |
+
"HUGGINGFACE_API_KEY": "test-hf-key",
|
| 16 |
+
"LLM_PROVIDER": "nebius"
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
+
# Set environment variables immediately
|
| 20 |
+
for key, value in TEST_ENV_VARS.items():
|
| 21 |
+
os.environ[key] = value
|
| 22 |
+
|
| 23 |
+
@pytest.fixture
|
| 24 |
+
def mock_tavily_client():
|
| 25 |
+
"""Mock Tavily client for web search tests."""
|
| 26 |
+
mock_client = Mock()
|
| 27 |
+
mock_client.search.return_value = {
|
| 28 |
+
"results": [
|
| 29 |
+
{
|
| 30 |
+
"title": "Test Result 1",
|
| 31 |
+
"url": "https://example.com/1",
|
| 32 |
+
"content": "Test content 1",
|
| 33 |
+
"score": 0.9
|
| 34 |
+
},
|
| 35 |
+
{
|
| 36 |
+
"title": "Test Result 2",
|
| 37 |
+
"url": "https://example.com/2",
|
| 38 |
+
"content": "Test content 2",
|
| 39 |
+
"score": 0.8
|
| 40 |
+
}
|
| 41 |
+
],
|
| 42 |
+
"answer": "Test search summary"
|
| 43 |
+
}
|
| 44 |
+
return mock_client
|
| 45 |
+
|
| 46 |
+
@pytest.fixture
|
| 47 |
+
def mock_llm_response():
|
| 48 |
+
"""Mock LLM completion response."""
|
| 49 |
+
return '{"sub_questions": ["Question 1?", "Question 2?", "Question 3?"]}'
|
| 50 |
+
|
| 51 |
+
@pytest.fixture
|
| 52 |
+
def mock_modal_sandbox():
|
| 53 |
+
"""Mock Modal sandbox for code execution tests."""
|
| 54 |
+
mock_sandbox = Mock()
|
| 55 |
+
mock_sandbox.exec.return_value = Mock(stdout="Test output", stderr="", returncode=0)
|
| 56 |
+
return mock_sandbox
|
| 57 |
+
|
| 58 |
+
@pytest.fixture
|
| 59 |
+
def sample_user_request():
|
| 60 |
+
"""Sample user request for testing."""
|
| 61 |
+
return "Create a Python script to analyze CSV data and generate charts"
|
| 62 |
+
|
| 63 |
+
@pytest.fixture
|
| 64 |
+
def sample_search_results():
|
| 65 |
+
"""Sample search results for testing."""
|
| 66 |
+
return [
|
| 67 |
+
{
|
| 68 |
+
"title": "Python Data Analysis Tutorial",
|
| 69 |
+
"url": "https://example.com/pandas-tutorial",
|
| 70 |
+
"content": "Learn how to analyze CSV data with pandas and matplotlib...",
|
| 71 |
+
"score": 0.95
|
| 72 |
+
},
|
| 73 |
+
{
|
| 74 |
+
"title": "Chart Generation with Python",
|
| 75 |
+
"url": "https://example.com/charts",
|
| 76 |
+
"content": "Create stunning charts and visualizations...",
|
| 77 |
+
"score": 0.87
|
| 78 |
+
}
|
| 79 |
+
]
|
| 80 |
+
|
| 81 |
+
@pytest.fixture
|
| 82 |
+
def sample_code():
|
| 83 |
+
"""Sample Python code for testing."""
|
| 84 |
+
return '''
|
| 85 |
+
import pandas as pd
|
| 86 |
+
import matplotlib.pyplot as plt
|
| 87 |
+
|
| 88 |
+
# Load data
|
| 89 |
+
df = pd.read_csv('data.csv')
|
| 90 |
+
|
| 91 |
+
# Generate chart
|
| 92 |
+
df.plot(kind='bar')
|
| 93 |
+
plt.show()
|
| 94 |
+
'''
|
| 95 |
+
|
| 96 |
+
@pytest.fixture
|
| 97 |
+
def mock_config():
|
| 98 |
+
"""Mock configuration objects."""
|
| 99 |
+
api_config = Mock()
|
| 100 |
+
api_config.tavily_api_key = "tvly-test-key"
|
| 101 |
+
api_config.llm_provider = "nebius"
|
| 102 |
+
api_config.nebius_api_key = "test-nebius-key"
|
| 103 |
+
|
| 104 |
+
model_config = Mock()
|
| 105 |
+
model_config.get_model_for_provider.return_value = "meta-llama/llama-3.1-8b-instruct"
|
| 106 |
+
|
| 107 |
+
return api_config, model_config
|
| 108 |
+
|
| 109 |
+
@pytest.fixture
|
| 110 |
+
def event_loop():
|
| 111 |
+
"""Create an event loop for async tests."""
|
| 112 |
+
loop = asyncio.new_event_loop()
|
| 113 |
+
yield loop
|
| 114 |
+
loop.close()
|
| 115 |
+
|
| 116 |
+
class MockAgent:
|
| 117 |
+
"""Base mock agent class for testing."""
|
| 118 |
+
def __init__(self, name: str):
|
| 119 |
+
self.name = name
|
| 120 |
+
self.call_count = 0
|
| 121 |
+
|
| 122 |
+
def __call__(self, *args, **kwargs):
|
| 123 |
+
self.call_count += 1
|
| 124 |
+
return {"success": True, "agent": self.name, "calls": self.call_count}
|
| 125 |
+
|
| 126 |
+
@pytest.fixture
|
| 127 |
+
def mock_agents():
|
| 128 |
+
"""Mock agent instances for orchestrator testing."""
|
| 129 |
+
return {
|
| 130 |
+
"question_enhancer": MockAgent("question_enhancer"),
|
| 131 |
+
"web_search": MockAgent("web_search"),
|
| 132 |
+
"llm_processor": MockAgent("llm_processor"),
|
| 133 |
+
"citation_formatter": MockAgent("citation_formatter"),
|
| 134 |
+
"code_generator": MockAgent("code_generator"),
|
| 135 |
+
"code_runner": MockAgent("code_runner")
|
| 136 |
+
}
|
| 137 |
+
|
| 138 |
+
@pytest.fixture
|
| 139 |
+
def disable_advanced_features():
|
| 140 |
+
"""Disable advanced features for basic testing."""
|
| 141 |
+
with patch('app.ADVANCED_FEATURES_AVAILABLE', False):
|
| 142 |
+
yield
|
tests/integration/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""Integration tests package."""
|
tests/integration/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (212 Bytes). View file
|
|
|
tests/integration/__pycache__/test_async_sync_error_handling.cpython-312-pytest-8.4.0.pyc
ADDED
|
Binary file (29 kB). View file
|
|
|
tests/integration/__pycache__/test_end_to_end_workflow.cpython-312-pytest-8.4.0.pyc
ADDED
|
Binary file (31 kB). View file
|
|
|
tests/integration/__pycache__/test_performance_resources.cpython-312-pytest-8.4.0.pyc
ADDED
|
Binary file (43.3 kB). View file
|
|
|
tests/integration/__pycache__/test_ui_endpoints.cpython-312-pytest-8.4.0.pyc
ADDED
|
Binary file (48.1 kB). View file
|
|
|
tests/unit/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""Unit tests package."""
|
tests/unit/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (198 Bytes). View file
|
|
|
tests/unit/__pycache__/test_citation_formatter_agent.cpython-312-pytest-8.4.0.pyc
ADDED
|
Binary file (6.26 kB). View file
|
|
|
tests/unit/__pycache__/test_code_generator_agent.cpython-312-pytest-8.4.0.pyc
ADDED
|
Binary file (7.76 kB). View file
|
|
|
tests/unit/__pycache__/test_code_runner_agent.cpython-312-pytest-8.4.0.pyc
ADDED
|
Binary file (29.9 kB). View file
|
|
|
tests/unit/__pycache__/test_llm_processor_agent.cpython-312-pytest-8.4.0.pyc
ADDED
|
Binary file (5.85 kB). View file
|
|
|
tests/unit/__pycache__/test_orchestrator_agent.cpython-312-pytest-8.4.0.pyc
ADDED
|
Binary file (30.1 kB). View file
|
|
|