File size: 16,203 Bytes
40ee6b4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
"""
Pydantic Settings v2 configuration management for LangGraph Multi-Agent MCTS.

Provides:
- Secure configuration loading from environment variables and .env files
- Type-safe settings with validation
- Secrets protection using SecretStr
- MCTS parameter bounds validation
- Support for multiple LLM providers
"""

from enum import Enum

from pydantic import (
    Field,
    SecretStr,
    field_validator,
    model_validator,
)
from pydantic_settings import BaseSettings, SettingsConfigDict


class LLMProvider(str, Enum):
    """Supported LLM providers."""

    OPENAI = "openai"
    ANTHROPIC = "anthropic"
    LMSTUDIO = "lmstudio"


class LogLevel(str, Enum):
    """Supported log levels."""

    DEBUG = "DEBUG"
    INFO = "INFO"
    WARNING = "WARNING"
    ERROR = "ERROR"
    CRITICAL = "CRITICAL"


class MCTSImplementation(str, Enum):
    """MCTS implementation variants."""

    BASELINE = "baseline"  # Original MCTS core
    NEURAL = "neural"  # Neural-guided AlphaZero-style MCTS


class Settings(BaseSettings):
    """
    Application settings with security-first configuration.

    All sensitive values use SecretStr to prevent accidental exposure in logs.
    Configuration is loaded from environment variables with .env file support.
    """

    model_config = SettingsConfigDict(
        env_file=".env",
        env_file_encoding="utf-8",
        case_sensitive=True,
        extra="ignore",
        validate_default=True,
    )

    # LLM Provider Configuration
    LLM_PROVIDER: LLMProvider = Field(
        default=LLMProvider.OPENAI, description="LLM provider to use (openai, anthropic, lmstudio)"
    )

    # API Keys (Secrets)
    OPENAI_API_KEY: SecretStr | None = Field(
        default=None, description="OpenAI API key (required if using OpenAI provider)"
    )

    ANTHROPIC_API_KEY: SecretStr | None = Field(
        default=None, description="Anthropic API key (required if using Anthropic provider)"
    )

    BRAINTRUST_API_KEY: SecretStr | None = Field(
        default=None, description="Braintrust API key for experiment tracking (optional)"
    )

    PINECONE_API_KEY: SecretStr | None = Field(
        default=None, description="Pinecone API key for vector storage (optional)"
    )

    PINECONE_HOST: str | None = Field(
        default=None, description="Pinecone host URL (e.g., https://index.svc.environment.pinecone.io)"
    )

    # Local LLM Configuration
    LMSTUDIO_BASE_URL: str | None = Field(
        default="http://localhost:1234/v1", description="LM Studio API base URL for local inference"
    )

    LMSTUDIO_MODEL: str | None = Field(default=None, description="LM Studio model identifier (e.g., liquid/lfm2-1.2b)")

    # MCTS Configuration with bounds validation
    MCTS_ENABLED: bool = Field(default=True, description="Enable MCTS for agent decision-making")

    MCTS_IMPL: MCTSImplementation = Field(
        default=MCTSImplementation.BASELINE, description="MCTS implementation variant to use"
    )

    MCTS_ITERATIONS: int = Field(default=100, ge=1, le=10000, description="Number of MCTS iterations (1-10000)")

    MCTS_C: float = Field(
        default=1.414, ge=0.0, le=10.0, description="MCTS exploration weight (UCB1 constant, 0.0-10.0)"
    )

    # Random seed for reproducibility
    SEED: int | None = Field(default=None, ge=0, description="Random seed for reproducibility (optional)")

    # LangSmith Configuration for tracing and evaluation
    LANGSMITH_API_KEY: SecretStr | None = Field(
        default=None, description="LangSmith API key for tracing and evaluation (optional)"
    )

    LANGSMITH_PROJECT: str = Field(default="langgraph-mcts", description="LangSmith project name")

    LANGCHAIN_TRACING_V2: bool = Field(default=False, description="Enable LangChain tracing v2")

    LANGCHAIN_ENDPOINT: str = Field(default="https://api.smith.langchain.com", description="LangChain API endpoint")

    # Weights & Biases Configuration for experiment tracking
    WANDB_API_KEY: SecretStr | None = Field(
        default=None, description="Weights & Biases API key for experiment tracking (optional)"
    )

    WANDB_PROJECT: str = Field(default="langgraph-mcts", description="W&B project name")

    WANDB_ENTITY: str | None = Field(default=None, description="W&B entity (username or team name)")

    WANDB_MODE: str = Field(default="online", description="W&B mode: online, offline, or disabled")

    # Logging Configuration
    LOG_LEVEL: LogLevel = Field(default=LogLevel.INFO, description="Application log level")

    # OpenTelemetry Configuration
    OTEL_EXPORTER_OTLP_ENDPOINT: str | None = Field(
        default=None, description="OpenTelemetry OTLP exporter endpoint URL"
    )

    # S3 Storage Configuration
    S3_BUCKET: str | None = Field(default=None, description="S3 bucket name for artifact storage")

    S3_PREFIX: str = Field(default="mcts-artifacts", description="S3 key prefix for stored artifacts")

    S3_REGION: str = Field(default="us-east-1", description="AWS region for S3 bucket")

    # Network Configuration (security)
    HTTP_TIMEOUT_SECONDS: int = Field(default=30, ge=1, le=300, description="HTTP request timeout in seconds")

    HTTP_MAX_RETRIES: int = Field(default=3, ge=0, le=10, description="Maximum HTTP request retries")

    # Security Settings
    MAX_QUERY_LENGTH: int = Field(
        default=10000, ge=1, le=100000, description="Maximum allowed query length in characters"
    )

    RATE_LIMIT_REQUESTS_PER_MINUTE: int = Field(
        default=60, ge=1, le=1000, description="Rate limit for API requests per minute"
    )

    @field_validator("OPENAI_API_KEY")
    @classmethod
    def validate_openai_key_format(cls, v: SecretStr | None) -> SecretStr | None:
        """Validate OpenAI API key format without exposing the value."""
        if v is not None:
            secret_value = v.get_secret_value()
            # Check for obviously invalid patterns
            if secret_value in ("", "your-api-key-here", "sk-xxx", "REPLACE_ME"):
                raise ValueError("OpenAI API key appears to be a placeholder value")
            if not secret_value.startswith("sk-"):
                raise ValueError("OpenAI API key should start with 'sk-'")
            if len(secret_value) < 20:
                raise ValueError("OpenAI API key appears to be too short")
        return v

    @field_validator("ANTHROPIC_API_KEY")
    @classmethod
    def validate_anthropic_key_format(cls, v: SecretStr | None) -> SecretStr | None:
        """Validate Anthropic API key format without exposing the value."""
        if v is not None:
            secret_value = v.get_secret_value()
            # Check for obviously invalid patterns
            if secret_value in ("", "your-api-key-here", "REPLACE_ME"):
                raise ValueError("Anthropic API key appears to be a placeholder value")
            if len(secret_value) < 20:
                raise ValueError("Anthropic API key appears to be too short")
        return v

    @field_validator("BRAINTRUST_API_KEY")
    @classmethod
    def validate_braintrust_key_format(cls, v: SecretStr | None) -> SecretStr | None:
        """Validate Braintrust API key format without exposing the value."""
        if v is not None:
            secret_value = v.get_secret_value()
            # Check for obviously invalid patterns
            if secret_value in ("", "your-api-key-here", "REPLACE_ME"):
                raise ValueError("Braintrust API key appears to be a placeholder value")
            if len(secret_value) < 20:
                raise ValueError("Braintrust API key appears to be too short")
        return v

    @field_validator("PINECONE_API_KEY")
    @classmethod
    def validate_pinecone_key_format(cls, v: SecretStr | None) -> SecretStr | None:
        """Validate Pinecone API key format without exposing the value."""
        if v is not None:
            secret_value = v.get_secret_value()
            # Check for obviously invalid patterns
            if secret_value in ("", "your-api-key-here", "REPLACE_ME"):
                raise ValueError("Pinecone API key appears to be a placeholder value")
            if len(secret_value) < 20:
                raise ValueError("Pinecone API key appears to be too short")
        return v

    @field_validator("LANGSMITH_API_KEY")
    @classmethod
    def validate_langsmith_key_format(cls, v: SecretStr | None) -> SecretStr | None:
        """Validate LangSmith API key format without exposing the value."""
        if v is not None:
            secret_value = v.get_secret_value()
            if secret_value in ("", "your-api-key-here", "REPLACE_ME"):
                raise ValueError("LangSmith API key appears to be a placeholder value")
            if len(secret_value) < 20:
                raise ValueError("LangSmith API key appears to be too short")
        return v

    @field_validator("WANDB_API_KEY")
    @classmethod
    def validate_wandb_key_format(cls, v: SecretStr | None) -> SecretStr | None:
        """Validate Weights & Biases API key format without exposing the value."""
        if v is not None:
            secret_value = v.get_secret_value()
            if secret_value in ("", "your-api-key-here", "REPLACE_ME"):
                raise ValueError("W&B API key appears to be a placeholder value")
            if len(secret_value) < 20:
                raise ValueError("W&B API key appears to be too short")
        return v

    @field_validator("PINECONE_HOST")
    @classmethod
    def validate_pinecone_host(cls, v: str | None) -> str | None:
        """Validate Pinecone host URL format."""
        if v is not None and v != "":
            if not v.startswith("https://"):
                raise ValueError("Pinecone host must start with https://")
            if "pinecone.io" not in v:
                raise ValueError("Pinecone host should be a valid pinecone.io URL")
        return v

    @field_validator("LMSTUDIO_BASE_URL")
    @classmethod
    def validate_lmstudio_url(cls, v: str | None) -> str | None:
        """Validate LM Studio base URL format."""
        if v is not None:
            if not v.startswith(("http://", "https://")):
                raise ValueError("LM Studio base URL must start with http:// or https://")
            # Warn if not localhost (potential security concern)
            if not any(host in v for host in ("localhost", "127.0.0.1", "::1")):
                import warnings

                warnings.warn(
                    "LM Studio URL points to non-localhost address. Ensure this is intentional and secure.",
                    UserWarning,
                    stacklevel=2,
                )
        return v

    @field_validator("OTEL_EXPORTER_OTLP_ENDPOINT")
    @classmethod
    def validate_otel_endpoint(cls, v: str | None) -> str | None:
        """Validate OpenTelemetry endpoint URL."""
        if v is not None and v != "" and not v.startswith(("http://", "https://", "grpc://")):
            raise ValueError("OpenTelemetry endpoint must start with http://, https://, or grpc://")
        return v

    @field_validator("S3_BUCKET")
    @classmethod
    def validate_s3_bucket_name(cls, v: str | None) -> str | None:
        """Validate S3 bucket name format."""
        if v is not None:
            # S3 bucket naming rules
            if len(v) < 3 or len(v) > 63:
                raise ValueError("S3 bucket name must be 3-63 characters long")
            if not v.replace("-", "").replace(".", "").isalnum():
                raise ValueError("S3 bucket name can only contain lowercase letters, numbers, hyphens, and periods")
            if v.startswith("-") or v.endswith("-"):
                raise ValueError("S3 bucket name cannot start or end with a hyphen")
        return v

    @model_validator(mode="after")
    def validate_provider_credentials(self) -> "Settings":
        """Ensure required API keys are provided for the selected provider."""
        if self.LLM_PROVIDER == LLMProvider.OPENAI:
            if self.OPENAI_API_KEY is None:
                raise ValueError(
                    "OPENAI_API_KEY is required when using OpenAI provider. "
                    "Set the OPENAI_API_KEY environment variable."
                )
        elif self.LLM_PROVIDER == LLMProvider.ANTHROPIC:
            if self.ANTHROPIC_API_KEY is None:
                raise ValueError(
                    "ANTHROPIC_API_KEY is required when using Anthropic provider. "
                    "Set the ANTHROPIC_API_KEY environment variable."
                )
        elif self.LLM_PROVIDER == LLMProvider.LMSTUDIO and self.LMSTUDIO_BASE_URL is None:
            raise ValueError("LMSTUDIO_BASE_URL is required when using LM Studio provider.")
        return self

    def get_api_key(self) -> str | None:
        """
        Get the API key for the current provider.

        Returns the secret value - use with caution to avoid logging.
        """
        if self.LLM_PROVIDER == LLMProvider.OPENAI and self.OPENAI_API_KEY:
            return self.OPENAI_API_KEY.get_secret_value()
        elif self.LLM_PROVIDER == LLMProvider.ANTHROPIC and self.ANTHROPIC_API_KEY:
            return self.ANTHROPIC_API_KEY.get_secret_value()
        return None

    def safe_dict(self) -> dict:
        """
        Return settings as dictionary with secrets masked.

        Safe for logging and display purposes.
        """
        data = self.model_dump()
        # Mask all sensitive fields
        secret_fields = [
            "OPENAI_API_KEY",
            "ANTHROPIC_API_KEY",
            "BRAINTRUST_API_KEY",
            "PINECONE_API_KEY",
            "LANGSMITH_API_KEY",
            "WANDB_API_KEY",
        ]
        for field in secret_fields:
            if field in data and data[field]:
                data[field] = "***MASKED***"
        return data

    def get_braintrust_api_key(self) -> str | None:
        """
        Get the Braintrust API key if configured.

        Returns the secret value - use with caution to avoid logging.
        """
        if self.BRAINTRUST_API_KEY:
            return self.BRAINTRUST_API_KEY.get_secret_value()
        return None

    def get_pinecone_api_key(self) -> str | None:
        """
        Get the Pinecone API key if configured.

        Returns the secret value - use with caution to avoid logging.
        """
        if self.PINECONE_API_KEY:
            return self.PINECONE_API_KEY.get_secret_value()
        return None

    def get_langsmith_api_key(self) -> str | None:
        """
        Get the LangSmith API key if configured.

        Returns the secret value - use with caution to avoid logging.
        """
        if self.LANGSMITH_API_KEY:
            return self.LANGSMITH_API_KEY.get_secret_value()
        return None

    def get_wandb_api_key(self) -> str | None:
        """
        Get the Weights & Biases API key if configured.

        Returns the secret value - use with caution to avoid logging.
        """
        if self.WANDB_API_KEY:
            return self.WANDB_API_KEY.get_secret_value()
        return None

    def __repr__(self) -> str:
        """Safe string representation that doesn't expose secrets."""
        return f"Settings(LLM_PROVIDER={self.LLM_PROVIDER}, MCTS_ENABLED={self.MCTS_ENABLED}, MCTS_IMPL={self.MCTS_IMPL}, LOG_LEVEL={self.LOG_LEVEL})"


# Global settings instance (lazily loaded)
_settings: Settings | None = None


def get_settings() -> Settings:
    """
    Get the global settings instance.

    Settings are loaded once and cached. To reload, call reset_settings() first.

    Returns:
        Settings: Application configuration instance

    Raises:
        ValidationError: If configuration is invalid
    """
    global _settings
    if _settings is None:
        _settings = Settings()
    return _settings


def reset_settings() -> None:
    """
    Reset the global settings instance.

    Forces settings to be reloaded from environment on next get_settings() call.
    Useful for testing.
    """
    global _settings
    _settings = None


# Type exports for external use
__all__ = [
    "Settings",
    "LLMProvider",
    "LogLevel",
    "MCTSImplementation",
    "get_settings",
    "reset_settings",
]