ianshank
feat: add personality output and bug fixes
40ee6b4
"""
Pydantic Settings v2 configuration management for LangGraph Multi-Agent MCTS.
Provides:
- Secure configuration loading from environment variables and .env files
- Type-safe settings with validation
- Secrets protection using SecretStr
- MCTS parameter bounds validation
- Support for multiple LLM providers
"""
from enum import Enum
from pydantic import (
Field,
SecretStr,
field_validator,
model_validator,
)
from pydantic_settings import BaseSettings, SettingsConfigDict
class LLMProvider(str, Enum):
"""Supported LLM providers."""
OPENAI = "openai"
ANTHROPIC = "anthropic"
LMSTUDIO = "lmstudio"
class LogLevel(str, Enum):
"""Supported log levels."""
DEBUG = "DEBUG"
INFO = "INFO"
WARNING = "WARNING"
ERROR = "ERROR"
CRITICAL = "CRITICAL"
class MCTSImplementation(str, Enum):
"""MCTS implementation variants."""
BASELINE = "baseline" # Original MCTS core
NEURAL = "neural" # Neural-guided AlphaZero-style MCTS
class Settings(BaseSettings):
"""
Application settings with security-first configuration.
All sensitive values use SecretStr to prevent accidental exposure in logs.
Configuration is loaded from environment variables with .env file support.
"""
model_config = SettingsConfigDict(
env_file=".env",
env_file_encoding="utf-8",
case_sensitive=True,
extra="ignore",
validate_default=True,
)
# LLM Provider Configuration
LLM_PROVIDER: LLMProvider = Field(
default=LLMProvider.OPENAI, description="LLM provider to use (openai, anthropic, lmstudio)"
)
# API Keys (Secrets)
OPENAI_API_KEY: SecretStr | None = Field(
default=None, description="OpenAI API key (required if using OpenAI provider)"
)
ANTHROPIC_API_KEY: SecretStr | None = Field(
default=None, description="Anthropic API key (required if using Anthropic provider)"
)
BRAINTRUST_API_KEY: SecretStr | None = Field(
default=None, description="Braintrust API key for experiment tracking (optional)"
)
PINECONE_API_KEY: SecretStr | None = Field(
default=None, description="Pinecone API key for vector storage (optional)"
)
PINECONE_HOST: str | None = Field(
default=None, description="Pinecone host URL (e.g., https://index.svc.environment.pinecone.io)"
)
# Local LLM Configuration
LMSTUDIO_BASE_URL: str | None = Field(
default="http://localhost:1234/v1", description="LM Studio API base URL for local inference"
)
LMSTUDIO_MODEL: str | None = Field(default=None, description="LM Studio model identifier (e.g., liquid/lfm2-1.2b)")
# MCTS Configuration with bounds validation
MCTS_ENABLED: bool = Field(default=True, description="Enable MCTS for agent decision-making")
MCTS_IMPL: MCTSImplementation = Field(
default=MCTSImplementation.BASELINE, description="MCTS implementation variant to use"
)
MCTS_ITERATIONS: int = Field(default=100, ge=1, le=10000, description="Number of MCTS iterations (1-10000)")
MCTS_C: float = Field(
default=1.414, ge=0.0, le=10.0, description="MCTS exploration weight (UCB1 constant, 0.0-10.0)"
)
# Random seed for reproducibility
SEED: int | None = Field(default=None, ge=0, description="Random seed for reproducibility (optional)")
# LangSmith Configuration for tracing and evaluation
LANGSMITH_API_KEY: SecretStr | None = Field(
default=None, description="LangSmith API key for tracing and evaluation (optional)"
)
LANGSMITH_PROJECT: str = Field(default="langgraph-mcts", description="LangSmith project name")
LANGCHAIN_TRACING_V2: bool = Field(default=False, description="Enable LangChain tracing v2")
LANGCHAIN_ENDPOINT: str = Field(default="https://api.smith.langchain.com", description="LangChain API endpoint")
# Weights & Biases Configuration for experiment tracking
WANDB_API_KEY: SecretStr | None = Field(
default=None, description="Weights & Biases API key for experiment tracking (optional)"
)
WANDB_PROJECT: str = Field(default="langgraph-mcts", description="W&B project name")
WANDB_ENTITY: str | None = Field(default=None, description="W&B entity (username or team name)")
WANDB_MODE: str = Field(default="online", description="W&B mode: online, offline, or disabled")
# Logging Configuration
LOG_LEVEL: LogLevel = Field(default=LogLevel.INFO, description="Application log level")
# OpenTelemetry Configuration
OTEL_EXPORTER_OTLP_ENDPOINT: str | None = Field(
default=None, description="OpenTelemetry OTLP exporter endpoint URL"
)
# S3 Storage Configuration
S3_BUCKET: str | None = Field(default=None, description="S3 bucket name for artifact storage")
S3_PREFIX: str = Field(default="mcts-artifacts", description="S3 key prefix for stored artifacts")
S3_REGION: str = Field(default="us-east-1", description="AWS region for S3 bucket")
# Network Configuration (security)
HTTP_TIMEOUT_SECONDS: int = Field(default=30, ge=1, le=300, description="HTTP request timeout in seconds")
HTTP_MAX_RETRIES: int = Field(default=3, ge=0, le=10, description="Maximum HTTP request retries")
# Security Settings
MAX_QUERY_LENGTH: int = Field(
default=10000, ge=1, le=100000, description="Maximum allowed query length in characters"
)
RATE_LIMIT_REQUESTS_PER_MINUTE: int = Field(
default=60, ge=1, le=1000, description="Rate limit for API requests per minute"
)
@field_validator("OPENAI_API_KEY")
@classmethod
def validate_openai_key_format(cls, v: SecretStr | None) -> SecretStr | None:
"""Validate OpenAI API key format without exposing the value."""
if v is not None:
secret_value = v.get_secret_value()
# Check for obviously invalid patterns
if secret_value in ("", "your-api-key-here", "sk-xxx", "REPLACE_ME"):
raise ValueError("OpenAI API key appears to be a placeholder value")
if not secret_value.startswith("sk-"):
raise ValueError("OpenAI API key should start with 'sk-'")
if len(secret_value) < 20:
raise ValueError("OpenAI API key appears to be too short")
return v
@field_validator("ANTHROPIC_API_KEY")
@classmethod
def validate_anthropic_key_format(cls, v: SecretStr | None) -> SecretStr | None:
"""Validate Anthropic API key format without exposing the value."""
if v is not None:
secret_value = v.get_secret_value()
# Check for obviously invalid patterns
if secret_value in ("", "your-api-key-here", "REPLACE_ME"):
raise ValueError("Anthropic API key appears to be a placeholder value")
if len(secret_value) < 20:
raise ValueError("Anthropic API key appears to be too short")
return v
@field_validator("BRAINTRUST_API_KEY")
@classmethod
def validate_braintrust_key_format(cls, v: SecretStr | None) -> SecretStr | None:
"""Validate Braintrust API key format without exposing the value."""
if v is not None:
secret_value = v.get_secret_value()
# Check for obviously invalid patterns
if secret_value in ("", "your-api-key-here", "REPLACE_ME"):
raise ValueError("Braintrust API key appears to be a placeholder value")
if len(secret_value) < 20:
raise ValueError("Braintrust API key appears to be too short")
return v
@field_validator("PINECONE_API_KEY")
@classmethod
def validate_pinecone_key_format(cls, v: SecretStr | None) -> SecretStr | None:
"""Validate Pinecone API key format without exposing the value."""
if v is not None:
secret_value = v.get_secret_value()
# Check for obviously invalid patterns
if secret_value in ("", "your-api-key-here", "REPLACE_ME"):
raise ValueError("Pinecone API key appears to be a placeholder value")
if len(secret_value) < 20:
raise ValueError("Pinecone API key appears to be too short")
return v
@field_validator("LANGSMITH_API_KEY")
@classmethod
def validate_langsmith_key_format(cls, v: SecretStr | None) -> SecretStr | None:
"""Validate LangSmith API key format without exposing the value."""
if v is not None:
secret_value = v.get_secret_value()
if secret_value in ("", "your-api-key-here", "REPLACE_ME"):
raise ValueError("LangSmith API key appears to be a placeholder value")
if len(secret_value) < 20:
raise ValueError("LangSmith API key appears to be too short")
return v
@field_validator("WANDB_API_KEY")
@classmethod
def validate_wandb_key_format(cls, v: SecretStr | None) -> SecretStr | None:
"""Validate Weights & Biases API key format without exposing the value."""
if v is not None:
secret_value = v.get_secret_value()
if secret_value in ("", "your-api-key-here", "REPLACE_ME"):
raise ValueError("W&B API key appears to be a placeholder value")
if len(secret_value) < 20:
raise ValueError("W&B API key appears to be too short")
return v
@field_validator("PINECONE_HOST")
@classmethod
def validate_pinecone_host(cls, v: str | None) -> str | None:
"""Validate Pinecone host URL format."""
if v is not None and v != "":
if not v.startswith("https://"):
raise ValueError("Pinecone host must start with https://")
if "pinecone.io" not in v:
raise ValueError("Pinecone host should be a valid pinecone.io URL")
return v
@field_validator("LMSTUDIO_BASE_URL")
@classmethod
def validate_lmstudio_url(cls, v: str | None) -> str | None:
"""Validate LM Studio base URL format."""
if v is not None:
if not v.startswith(("http://", "https://")):
raise ValueError("LM Studio base URL must start with http:// or https://")
# Warn if not localhost (potential security concern)
if not any(host in v for host in ("localhost", "127.0.0.1", "::1")):
import warnings
warnings.warn(
"LM Studio URL points to non-localhost address. Ensure this is intentional and secure.",
UserWarning,
stacklevel=2,
)
return v
@field_validator("OTEL_EXPORTER_OTLP_ENDPOINT")
@classmethod
def validate_otel_endpoint(cls, v: str | None) -> str | None:
"""Validate OpenTelemetry endpoint URL."""
if v is not None and v != "" and not v.startswith(("http://", "https://", "grpc://")):
raise ValueError("OpenTelemetry endpoint must start with http://, https://, or grpc://")
return v
@field_validator("S3_BUCKET")
@classmethod
def validate_s3_bucket_name(cls, v: str | None) -> str | None:
"""Validate S3 bucket name format."""
if v is not None:
# S3 bucket naming rules
if len(v) < 3 or len(v) > 63:
raise ValueError("S3 bucket name must be 3-63 characters long")
if not v.replace("-", "").replace(".", "").isalnum():
raise ValueError("S3 bucket name can only contain lowercase letters, numbers, hyphens, and periods")
if v.startswith("-") or v.endswith("-"):
raise ValueError("S3 bucket name cannot start or end with a hyphen")
return v
@model_validator(mode="after")
def validate_provider_credentials(self) -> "Settings":
"""Ensure required API keys are provided for the selected provider."""
if self.LLM_PROVIDER == LLMProvider.OPENAI:
if self.OPENAI_API_KEY is None:
raise ValueError(
"OPENAI_API_KEY is required when using OpenAI provider. "
"Set the OPENAI_API_KEY environment variable."
)
elif self.LLM_PROVIDER == LLMProvider.ANTHROPIC:
if self.ANTHROPIC_API_KEY is None:
raise ValueError(
"ANTHROPIC_API_KEY is required when using Anthropic provider. "
"Set the ANTHROPIC_API_KEY environment variable."
)
elif self.LLM_PROVIDER == LLMProvider.LMSTUDIO and self.LMSTUDIO_BASE_URL is None:
raise ValueError("LMSTUDIO_BASE_URL is required when using LM Studio provider.")
return self
def get_api_key(self) -> str | None:
"""
Get the API key for the current provider.
Returns the secret value - use with caution to avoid logging.
"""
if self.LLM_PROVIDER == LLMProvider.OPENAI and self.OPENAI_API_KEY:
return self.OPENAI_API_KEY.get_secret_value()
elif self.LLM_PROVIDER == LLMProvider.ANTHROPIC and self.ANTHROPIC_API_KEY:
return self.ANTHROPIC_API_KEY.get_secret_value()
return None
def safe_dict(self) -> dict:
"""
Return settings as dictionary with secrets masked.
Safe for logging and display purposes.
"""
data = self.model_dump()
# Mask all sensitive fields
secret_fields = [
"OPENAI_API_KEY",
"ANTHROPIC_API_KEY",
"BRAINTRUST_API_KEY",
"PINECONE_API_KEY",
"LANGSMITH_API_KEY",
"WANDB_API_KEY",
]
for field in secret_fields:
if field in data and data[field]:
data[field] = "***MASKED***"
return data
def get_braintrust_api_key(self) -> str | None:
"""
Get the Braintrust API key if configured.
Returns the secret value - use with caution to avoid logging.
"""
if self.BRAINTRUST_API_KEY:
return self.BRAINTRUST_API_KEY.get_secret_value()
return None
def get_pinecone_api_key(self) -> str | None:
"""
Get the Pinecone API key if configured.
Returns the secret value - use with caution to avoid logging.
"""
if self.PINECONE_API_KEY:
return self.PINECONE_API_KEY.get_secret_value()
return None
def get_langsmith_api_key(self) -> str | None:
"""
Get the LangSmith API key if configured.
Returns the secret value - use with caution to avoid logging.
"""
if self.LANGSMITH_API_KEY:
return self.LANGSMITH_API_KEY.get_secret_value()
return None
def get_wandb_api_key(self) -> str | None:
"""
Get the Weights & Biases API key if configured.
Returns the secret value - use with caution to avoid logging.
"""
if self.WANDB_API_KEY:
return self.WANDB_API_KEY.get_secret_value()
return None
def __repr__(self) -> str:
"""Safe string representation that doesn't expose secrets."""
return f"Settings(LLM_PROVIDER={self.LLM_PROVIDER}, MCTS_ENABLED={self.MCTS_ENABLED}, MCTS_IMPL={self.MCTS_IMPL}, LOG_LEVEL={self.LOG_LEVEL})"
# Global settings instance (lazily loaded)
_settings: Settings | None = None
def get_settings() -> Settings:
"""
Get the global settings instance.
Settings are loaded once and cached. To reload, call reset_settings() first.
Returns:
Settings: Application configuration instance
Raises:
ValidationError: If configuration is invalid
"""
global _settings
if _settings is None:
_settings = Settings()
return _settings
def reset_settings() -> None:
"""
Reset the global settings instance.
Forces settings to be reloaded from environment on next get_settings() call.
Useful for testing.
"""
global _settings
_settings = None
# Type exports for external use
__all__ = [
"Settings",
"LLMProvider",
"LogLevel",
"MCTSImplementation",
"get_settings",
"reset_settings",
]