File size: 14,119 Bytes
40ee6b4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
"""
Production REST API server for LangGraph Multi-Agent MCTS Framework.

Provides:
- OpenAPI/Swagger documentation
- Authentication via API keys
- Rate limiting
- Health and readiness endpoints
- Request validation with Pydantic
- Prometheus metrics exposure
"""

import asyncio
import time
from contextlib import asynccontextmanager
from datetime import datetime
from typing import Any

from fastapi import Depends, FastAPI, Header, HTTPException, Request, Response
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from pydantic import BaseModel, Field

# Import framework components
try:
    from src.adapters.llm import create_client  # noqa: F401
    from src.api.auth import (
        APIKeyAuthenticator,
        ClientInfo,
        RateLimitConfig,
        get_authenticator,
        set_authenticator,
    )
    from src.api.exceptions import (
        AuthenticationError,
        AuthorizationError,  # noqa: F401
        FrameworkError,
        RateLimitError,
        ValidationError,  # noqa: F401
    )
    from src.models.validation import MCTSConfig, QueryInput  # noqa: F401

    IMPORTS_AVAILABLE = True
except ImportError as e:
    IMPORTS_AVAILABLE = False
    import_error = str(e)

# Prometheus metrics (optional)
try:
    from prometheus_client import CONTENT_TYPE_LATEST, Counter, Gauge, Histogram, generate_latest

    PROMETHEUS_AVAILABLE = True

    # Define metrics
    REQUEST_COUNT = Counter("mcts_requests_total", "Total number of requests", ["method", "endpoint", "status"])
    REQUEST_LATENCY = Histogram("mcts_request_duration_seconds", "Request latency in seconds", ["method", "endpoint"])
    ACTIVE_REQUESTS = Gauge("mcts_active_requests", "Number of active requests")
    ERROR_COUNT = Counter("mcts_errors_total", "Total number of errors", ["error_type"])
except ImportError:
    PROMETHEUS_AVAILABLE = False


# Request/Response Models
class QueryRequest(BaseModel):
    """Request model for query processing."""

    query: str = Field(
        ...,
        min_length=1,
        max_length=10000,
        description="User query to process",
        json_schema_extra={"example": "Recommend defensive positions for night attack scenario"},
    )
    use_mcts: bool = Field(default=True, description="Enable MCTS tactical simulation")
    use_rag: bool = Field(default=True, description="Enable RAG context retrieval")
    mcts_iterations: int | None = Field(default=None, ge=1, le=10000, description="Override default MCTS iterations")
    thread_id: str | None = Field(
        default=None,
        max_length=100,
        pattern=r"^[a-zA-Z0-9_-]+$",
        description="Conversation thread ID for state persistence",
    )

    class Config:
        json_schema_extra = {
            "example": {
                "query": "Recommend defensive positions for night attack",
                "use_mcts": True,
                "use_rag": True,
                "mcts_iterations": 200,
                "thread_id": "session_123",
            }
        }


class QueryResponse(BaseModel):
    """Response model for query results."""

    response: str = Field(..., description="Final synthesized response")
    confidence: float = Field(..., ge=0.0, le=1.0, description="Overall confidence score")
    agents_used: list[str] = Field(..., description="List of agents that contributed")
    mcts_stats: dict[str, Any] | None = Field(default=None, description="MCTS simulation statistics")
    processing_time_ms: float = Field(..., description="Total processing time in milliseconds")
    metadata: dict[str, Any] = Field(default_factory=dict, description="Additional metadata")


class HealthResponse(BaseModel):
    """Health check response."""

    status: str = Field(..., description="Service status")
    timestamp: str = Field(..., description="Current timestamp")
    version: str = Field(default="1.0.0", description="API version")
    uptime_seconds: float = Field(..., description="Service uptime")


class ReadinessResponse(BaseModel):
    """Readiness check response."""

    ready: bool = Field(..., description="Whether service is ready")
    checks: dict[str, bool] = Field(..., description="Individual check results")


class ErrorResponse(BaseModel):
    """Error response model."""

    error: bool = Field(default=True)
    error_code: str = Field(..., description="Machine-readable error code")
    message: str = Field(..., description="Human-readable error message")
    timestamp: str = Field(..., description="Error timestamp")


# Application startup
start_time = time.time()
framework_instance = None


@asynccontextmanager
async def lifespan(app: FastAPI):
    """Application lifespan manager."""
    global framework_instance

    # Startup
    print("Starting MCTS Framework API server...")

    # Initialize authenticator with demo key (replace in production)
    authenticator = APIKeyAuthenticator(
        valid_keys=["demo-api-key-replace-in-production"],
        rate_limit_config=RateLimitConfig(
            requests_per_minute=60,
            requests_per_hour=1000,
            requests_per_day=10000,
        ),
    )
    set_authenticator(authenticator)

    # Initialize framework (lazy loading)
    # framework_instance = create_framework()

    print("API server started successfully")

    yield

    # Shutdown
    print("Shutting down API server...")


# Create FastAPI app
app = FastAPI(
    title="LangGraph Multi-Agent MCTS API",
    description="""
## Multi-Agent Reasoning API with MCTS Tactical Simulation

This API provides access to a sophisticated multi-agent reasoning framework that combines:
- **HRM Agent**: Hierarchical decomposition of complex queries
- **TRM Agent**: Iterative refinement for response quality
- **MCTS Engine**: Monte Carlo Tree Search for tactical simulation
- **RAG Integration**: Context retrieval from vector stores

### Features
- Secure API key authentication
- Rate limiting per client
- Real-time metrics (Prometheus)
- Distributed tracing (OpenTelemetry)
- Production-grade error handling

### Quick Start
1. Obtain an API key
2. Include `X-API-Key` header in requests
3. Send queries to `/query` endpoint
4. Monitor health via `/health` endpoint
    """,
    version="1.0.0",
    docs_url="/docs",
    redoc_url="/redoc",
    openapi_tags=[
        {"name": "query", "description": "Query processing operations"},
        {"name": "health", "description": "Health and readiness checks"},
        {"name": "metrics", "description": "Observability endpoints"},
    ],
    lifespan=lifespan,
)

# CORS middleware
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # Configure appropriately for production
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)


# Middleware for metrics
@app.middleware("http")
async def metrics_middleware(request: Request, call_next):
    """Track request metrics."""
    if PROMETHEUS_AVAILABLE:
        ACTIVE_REQUESTS.inc()

    start = time.perf_counter()

    try:
        response = await call_next(request)
        status = response.status_code
    except Exception:
        status = 500
        raise
    finally:
        if PROMETHEUS_AVAILABLE:
            ACTIVE_REQUESTS.dec()
            elapsed = time.perf_counter() - start
            REQUEST_COUNT.labels(method=request.method, endpoint=request.url.path, status=str(status)).inc()
            REQUEST_LATENCY.labels(method=request.method, endpoint=request.url.path).observe(elapsed)

    return response


# Authentication dependency
async def verify_api_key(x_api_key: str = Header(..., description="API key for authentication")):
    """Verify API key and return client info."""
    if not IMPORTS_AVAILABLE:
        raise HTTPException(status_code=500, detail="Authentication module not available")

    try:
        authenticator = get_authenticator()
        client_info = authenticator.require_auth(x_api_key)
        return client_info
    except AuthenticationError as e:
        if PROMETHEUS_AVAILABLE:
            ERROR_COUNT.labels(error_type="authentication").inc()
        raise HTTPException(status_code=401, detail=e.user_message)
    except RateLimitError as e:
        if PROMETHEUS_AVAILABLE:
            ERROR_COUNT.labels(error_type="rate_limit").inc()
        raise HTTPException(
            status_code=429, detail=e.user_message, headers={"Retry-After": str(e.retry_after_seconds or 60)}
        )


# Exception handlers
@app.exception_handler(FrameworkError)
async def framework_error_handler(request: Request, exc: FrameworkError):
    """Handle framework-specific errors."""
    if PROMETHEUS_AVAILABLE:
        ERROR_COUNT.labels(error_type=exc.error_code).inc()

    return JSONResponse(status_code=500, content=exc.to_user_response())


@app.exception_handler(ValidationError)
async def validation_error_handler(request: Request, exc: ValidationError):
    """Handle validation errors."""
    if PROMETHEUS_AVAILABLE:
        ERROR_COUNT.labels(error_type="validation").inc()

    return JSONResponse(status_code=400, content=exc.to_user_response())


# Endpoints
@app.get("/health", response_model=HealthResponse, tags=["health"])
async def health_check():
    """
    Health check endpoint.

    Returns basic service health status. Use this for load balancer health checks.
    """
    return HealthResponse(
        status="healthy",
        timestamp=datetime.utcnow().isoformat(),
        version="1.0.0",
        uptime_seconds=time.time() - start_time,
    )


@app.get("/ready", response_model=ReadinessResponse, tags=["health"])
async def readiness_check():
    """
    Readiness check endpoint.

    Verifies all dependencies are available. Use this for Kubernetes readiness probes.
    """
    checks = {
        "imports_available": IMPORTS_AVAILABLE,
        "authenticator_configured": True,
        "llm_client_available": True,  # Would check actual client
        "prometheus_available": PROMETHEUS_AVAILABLE,
    }

    # Check if all critical services are available
    all_ready = all(
        [
            checks["imports_available"],
            checks["authenticator_configured"],
        ]
    )

    if not all_ready:
        raise HTTPException(status_code=503, detail="Service not ready")

    return ReadinessResponse(ready=all_ready, checks=checks)


@app.get("/metrics", tags=["metrics"])
async def prometheus_metrics():
    """
    Prometheus metrics endpoint.

    Returns metrics in Prometheus text format for scraping.
    """
    if not PROMETHEUS_AVAILABLE:
        raise HTTPException(status_code=501, detail="Prometheus metrics not available")

    return Response(content=generate_latest(), media_type=CONTENT_TYPE_LATEST)


@app.post(
    "/query",
    response_model=QueryResponse,
    tags=["query"],
    responses={
        401: {"model": ErrorResponse, "description": "Authentication failed"},
        429: {"model": ErrorResponse, "description": "Rate limit exceeded"},
        400: {"model": ErrorResponse, "description": "Invalid input"},
        500: {"model": ErrorResponse, "description": "Internal server error"},
    },
)
async def process_query(request: QueryRequest, client_info: ClientInfo = Depends(verify_api_key)):
    """
    Process a query using the multi-agent MCTS framework.

    This endpoint:
    1. Validates the input query
    2. Optionally retrieves context via RAG
    3. Processes through HRM and TRM agents
    4. Optionally runs MCTS simulation
    5. Synthesizes a final response

    **Authentication**: Requires valid API key in X-API-Key header.

    **Rate Limiting**: Subject to rate limits per client.
    """
    start_time = time.perf_counter()

    # Validate input using validation models
    if IMPORTS_AVAILABLE:
        try:
            QueryInput(
                query=request.query,
                use_rag=request.use_rag,
                use_mcts=request.use_mcts,
                thread_id=request.thread_id,
            )
        except Exception as e:
            if PROMETHEUS_AVAILABLE:
                ERROR_COUNT.labels(error_type="validation").inc()
            raise HTTPException(status_code=400, detail=f"Validation failed: {str(e)}")

    # Process query (mock implementation for demo)
    # In production, this would call the actual framework
    await asyncio.sleep(0.1)  # Simulate processing

    processing_time = (time.perf_counter() - start_time) * 1000

    # Mock response
    return QueryResponse(
        response=f"Processed query: {request.query[:100]}...",
        confidence=0.85,
        agents_used=["hrm", "trm"] + (["mcts"] if request.use_mcts else []),
        mcts_stats=(
            {
                "iterations": request.mcts_iterations or 100,
                "best_action": "recommended_action",
                "root_visits": request.mcts_iterations or 100,
            }
            if request.use_mcts
            else None
        ),
        processing_time_ms=processing_time,
        metadata={
            "client_id": client_info.client_id,
            "thread_id": request.thread_id,
            "rag_enabled": request.use_rag,
        },
    )


@app.get("/stats", tags=["metrics"])
async def get_stats(client_info: ClientInfo = Depends(verify_api_key)):
    """
    Get usage statistics for the authenticated client.

    Returns request counts and rate limit information.
    """
    authenticator = get_authenticator()
    stats = authenticator.get_client_stats(client_info.client_id)

    return {
        "client_id": client_info.client_id,
        "roles": list(client_info.roles),
        **stats,
        "rate_limits": {
            "per_minute": authenticator.rate_limit_config.requests_per_minute,
            "per_hour": authenticator.rate_limit_config.requests_per_hour,
            "per_day": authenticator.rate_limit_config.requests_per_day,
        },
    }


# Entry point
if __name__ == "__main__":
    import uvicorn

    uvicorn.run(
        "src.api.rest_server:app",
        host="0.0.0.0",
        port=8000,
        reload=False,
        workers=4,
        log_level="info",
        access_log=True,
    )