removed verbosity

Files changed (5) hide show

app/api/controllers.py +11 -55
app/api/routes/prediction.py +4 -40
app/core/app.py +15 -90
app/services/base.py +7 -109
app/services/inference.py +43 -152

app/api/controllers.py CHANGED Viewed

@@ -1,11 +1,5 @@
-"""
-Controllers for handling API business logic.
-This controller layer orchestrates requests between the API routes and the
-inference service layer. It handles validation and error responses.
-The controller is model-agnostic and works with any InferenceService implementation.
-"""
 from fastapi import HTTPException
 from app.core.logging import logger
@@ -14,66 +8,28 @@ from app.api.models import ImageRequest, PredictionResponse
 class PredictionController:
-    """
-    Controller for ML prediction endpoints.
-    This controller works with any InferenceService implementation,
-    making it easy to swap different models without changing the API layer.
-    """
     @staticmethod
     async def predict(
-            request: ImageRequest,
-            service: InferenceService
     ) -> PredictionResponse:
-        """
-        Run inference using the configured model service.
-        The controller handles request validation and error handling,
-        while the service handles the actual inference logic.
-        Args:
-            request: ImageRequest with base64-encoded image data
-            service: Initialized inference service (can be any model)
-        Returns:
-            PredictionResponse with prediction results
-        Raises:
-            HTTPException: If service unavailable, invalid input, or inference fails
-        """
         try:
-            # Validate service availability
-            if not service:
-                raise HTTPException(
-                    status_code=503,
-                    detail="Service not initialized"
-                )
-            if not service.is_loaded:
-                raise HTTPException(
-                    status_code=503,
-                    detail="Model not loaded"
-                )
-            # Validate media type
             if not request.image.mediaType.startswith('image/'):
-                raise HTTPException(
-                    status_code=400,
-                    detail=f"Invalid media type: {request.image.mediaType}. Must be image/*"
-                )
-            # Call service - it handles decoding and returns typed response
-            response = await service.predict(request)
-            return response
         except HTTPException:
             raise
         except ValueError as e:
-            # Service raises ValueError for invalid input
             logger.error(f"Invalid input: {e}")
-            raise HTTPException(status_code=400, detail=str(e))
         except Exception as e:
-            # Unexpected errors
             logger.error(f"Prediction failed: {e}")
-            raise HTTPException(status_code=500, detail="Internal server error")

+"""API controllers for request handling and validation."""
 from fastapi import HTTPException
 from app.core.logging import logger
 class PredictionController:
+    """Controller for prediction endpoints."""
     @staticmethod
     async def predict(
+        request: ImageRequest,
+        service: InferenceService
     ) -> PredictionResponse:
+        """Run inference using the configured service."""
         try:
+            if not service or not service.is_loaded:
+                raise HTTPException(503, "Service not available")
             if not request.image.mediaType.startswith('image/'):
+                raise HTTPException(400, f"Invalid media type: {request.image.mediaType}")
+            return await service.predict(request)
         except HTTPException:
             raise
         except ValueError as e:
             logger.error(f"Invalid input: {e}")
+            raise HTTPException(400, str(e))
         except Exception as e:
             logger.error(f"Prediction failed: {e}")
+            raise HTTPException(500, "Internal server error")

app/api/routes/prediction.py CHANGED Viewed

@@ -1,9 +1,5 @@
-"""
-ML Prediction routes.
-This module defines the HTTP endpoints for running model inference.
-The routes are model-agnostic and work with any InferenceService implementation.
-"""
 from fastapi import APIRouter, Depends
 from app.api.controllers import PredictionController
@@ -20,40 +16,8 @@ async def predict(
     service: InferenceService = Depends(get_inference_service)
 ):
     """
-    Run inference on an image using the configured model.
-    This endpoint works with any model that implements the InferenceService interface.
-    The actual model used depends on what was configured during app startup.
-    Example Request Body:
-    ```json
-    {
-        "image": {
-            "mediaType": "image/jpeg",
-            "data": "<base64-encoded-image-data>"
-        }
-    }
-    ```
-    Example Response:
-    ```json
-    {
-        "prediction": "tabby cat",
-        "confidence": 0.8542,
-        "model": "microsoft/resnet-18",
-        "predicted_label": 281,
-        "mediaType": "image/jpeg"
-    }
-    ```
-    Args:
-        request: ImageRequest containing base64-encoded image
-        service: Injected inference service (configured at startup)
-    Returns:
-        PredictionResponse with model predictions
-    Raises:
-        HTTPException: 400 for invalid input, 503 if service unavailable, 500 for errors
     """
-    return await PredictionController.predict(request, service)

+"""Prediction API routes."""
 from fastapi import APIRouter, Depends
 from app.api.controllers import PredictionController
     service: InferenceService = Depends(get_inference_service)
 ):
     """
+    Run inference on base64-encoded image.
+    Returns prediction, confidence, predicted label, model name, and media type.
     """
+    return await PredictionController.predict(request, service)

app/core/app.py CHANGED Viewed

@@ -1,15 +1,5 @@
-"""
-FastAPI application factory and core infrastructure.
-This module consolidates all core application components:
-- Configuration management
-- Global service instance (dependency injection)
-- Application lifecycle (startup/shutdown)
-- FastAPI app creation
-By keeping everything in one place, we avoid the complexity of managing
-global variables across multiple modules.
-"""
 import warnings
 from contextlib import asynccontextmanager
 from typing import AsyncGenerator, Optional
@@ -25,100 +15,45 @@ from app.api.routes import prediction
 class Settings(BaseSettings):
-    """
-    Application settings with environment variable support.
-    Settings can be overridden via environment variables or .env file.
-    """
-    # Basic app settings
-    app_name: str = Field(default="ML Inference Service", description="Application name")
-    app_version: str = Field(default="0.1.0", description="Application version")
-    debug: bool = Field(default=False, description="Debug mode")
-    # Server settings
-    host: str = Field(default="0.0.0.0", description="Server host")
-    port: int = Field(default=8000, description="Server port")
     class Config:
-        """Load from .env file if it exists."""
         env_file = ".env"
-# Global settings instance
 settings = Settings()
-# Global inference service instance (initialized during startup)
 _inference_service: Optional[InferenceService] = None
 def get_inference_service() -> Optional[InferenceService]:
-    """
-    Get the inference service instance for dependency injection.
-    This function is used in FastAPI route handlers via Depends().
-    The service is initialized once during app startup and reused
-    for all requests.
-    Returns:
-        The initialized inference service, or None if not yet initialized.
-    Example:
-        ```python
-        @router.post("/predict")
-        async def predict(
-            request: ImageRequest,
-            service: InferenceService = Depends(get_inference_service)
-        ):
-            return await service.predict(request)
-        ```
-    """
     return _inference_service
 def _set_inference_service(service: InferenceService) -> None:
-    """
-    INTERNAL: Set the global inference service instance.
-    Called during application startup to register the service.
-    This is marked as internal (prefixed with _) because it should
-    only be called from the lifespan handler below.
-    Args:
-        service: The initialized inference service instance.
-    """
     global _inference_service
     _inference_service = service
 @asynccontextmanager
 async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
-    """
-    Application lifespan manager.
-    Handles startup and shutdown events for the FastAPI application.
-    During startup, it initializes and loads the inference service.
-    CUSTOMIZATION POINT FOR GRAD STUDENTS:
-    To use your own model, replace ResNetInferenceService below with
-    your implementation that subclasses InferenceService.
-    Example:
-        ```python
-        service = MyCustomService(model_name="my-org/my-model")
-        await service.load_model()
-        _set_inference_service(service)
-        ```
-    """
     logger.info("Starting ML Inference Service...")
     try:
         with warnings.catch_warnings():
             warnings.filterwarnings("ignore", category=FutureWarning)
-            service = ResNetInferenceService(
-                model_name="microsoft/resnet-18"
-            )
             await service.load_model()
             _set_inference_service(service)
@@ -134,17 +69,7 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
 def create_app() -> FastAPI:
-    """
-    Create and configure the FastAPI application.
-    This is the main entry point for the application. It:
-    1. Creates a FastAPI instance with metadata from settings
-    2. Attaches the lifespan handler for startup/shutdown
-    3. Registers API routes
-    Returns:
-        Configured FastAPI application instance.
-    """
     app = FastAPI(
         title=settings.app_name,
         description="ML inference service for image classification",
@@ -155,4 +80,4 @@ def create_app() -> FastAPI:
     app.include_router(prediction.router)
-    return app

+"""FastAPI application factory and core infrastructure."""
 import warnings
 from contextlib import asynccontextmanager
 from typing import AsyncGenerator, Optional
 class Settings(BaseSettings):
+    """Application settings. Override via environment variables or .env file."""
+    app_name: str = Field(default="ML Inference Service")
+    app_version: str = Field(default="0.1.0")
+    debug: bool = Field(default=False)
+    host: str = Field(default="0.0.0.0")
+    port: int = Field(default=8000)
     class Config:
         env_file = ".env"
 settings = Settings()
 _inference_service: Optional[InferenceService] = None
 def get_inference_service() -> Optional[InferenceService]:
+    """Get inference service for dependency injection."""
     return _inference_service
 def _set_inference_service(service: InferenceService) -> None:
+    """Set inference service. Called internally during startup."""
     global _inference_service
     _inference_service = service
 @asynccontextmanager
 async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
+    """Application lifecycle: startup/shutdown."""
     logger.info("Starting ML Inference Service...")
     try:
         with warnings.catch_warnings():
             warnings.filterwarnings("ignore", category=FutureWarning)
+            # Replace ResNetInferenceService with your own implementation
+            service = ResNetInferenceService(model_name="microsoft/resnet-18")
             await service.load_model()
             _set_inference_service(service)
 def create_app() -> FastAPI:
+    """Create and configure FastAPI application."""
     app = FastAPI(
         title=settings.app_name,
         description="ML inference service for image classification",
     app.include_router(prediction.router)
+    return app

app/services/base.py CHANGED Viewed

@@ -1,135 +1,33 @@
-"""
-Abstract base class for ML inference services.
-This module defines the contract that all inference services must implement.
-Grad students should subclass `InferenceService` and implement the abstract methods
-to integrate their models with the serving infrastructure.
-"""
 from abc import ABC, abstractmethod
 from typing import Generic, TypeVar
 from pydantic import BaseModel
-# Type variables for request and response models
 TRequest = TypeVar('TRequest', bound=BaseModel)
 TResponse = TypeVar('TResponse', bound=BaseModel)
 class InferenceService(ABC, Generic[TRequest, TResponse]):
     """
-    Abstract base class for ML inference services.
-    This class defines the interface that all model serving implementations must follow.
-    By subclassing this and implementing the abstract methods, you can integrate any
-    ML model with the serving infrastructure.
-    Type Parameters:
-        TRequest: Pydantic model for input requests (e.g., ImageRequest, TextRequest)
-        TResponse: Pydantic model for prediction responses (e.g., PredictionResponse)
-    Example:
-        ```python
-        class MyModelService(InferenceService[MyRequest, MyResponse]):
-            async def load_model(self) -> None:
-                # Load your model here
-                self.model = torch.load("my_model.pt")
-                self._is_loaded = True
-            async def predict(self, request: MyRequest) -> MyResponse:
-                # Run inference
-                output = self.model(request.data)
-                return MyResponse(result=output)
-            @property
-            def is_loaded(self) -> bool:
-                return self._is_loaded
-        ```
     """
     @abstractmethod
     async def load_model(self) -> None:
-        """
-        Load the model weights and any required processors/tokenizers.
-        This method is called once during application startup (in the lifespan handler).
-        Use this to:
-        - Load model weights from disk
-        - Initialize processors, tokenizers, or other preprocessing components
-        - Set up any required state
-        - Perform model warmup if needed
-        Raises:
-            FileNotFoundError: If model files don't exist
-            RuntimeError: If model loading fails
-        """
         pass
     @abstractmethod
     async def predict(self, request: TRequest) -> TResponse:
-        """
-        Run inference on the input request and return a typed response.
-        This method is called for each prediction request. It should:
-        1. Extract input data from the request
-        2. Preprocess the input (if needed)
-        3. Run the model inference
-        4. Post-process the output
-        5. Return a Pydantic response model
-        Args:
-            request: Input request containing the data to predict on.
-                    Type is specified by the TRequest type parameter.
-        Returns:
-            Typed Pydantic response model containing predictions.
-            Type is specified by the TResponse type parameter.
-        Raises:
-            ValueError: If input data is invalid
-            RuntimeError: If model inference fails
-        Important - Background Threading:
-            For CPU-intensive operations (like deep learning inference), you MUST
-            offload computation to a background thread to avoid blocking the event loop.
-            Pattern to follow:
-            ```python
-            import asyncio
-            def _predict_sync(self, request: TRequest) -> TResponse:
-                # Heavy CPU work here (PyTorch, TensorFlow, etc.)
-                result = self.model(data)
-                return TResponse(result=result)
-            async def predict(self, request: TRequest) -> TResponse:
-                # Offload to thread pool
-                return await asyncio.to_thread(self._predict_sync, request)
-            ```
-            Why this matters:
-            - Inference can take 1-3+ seconds and will freeze the server
-            - asyncio.to_thread() runs the work in a background thread
-            - The event loop stays responsive to handle other requests
-        """
         pass
     @property
     @abstractmethod
     def is_loaded(self) -> bool:
-        """
-        Check if the model is loaded and ready for inference.
-        Returns:
-            True if model is loaded and ready, False otherwise.
-        Example:
-            ```python
-            @property
-            def is_loaded(self) -> bool:
-                return self.model is not None and self._is_loaded
-            ```
-        """
         pass

+"""Abstract base class for ML inference services."""
 from abc import ABC, abstractmethod
 from typing import Generic, TypeVar
 from pydantic import BaseModel
 TRequest = TypeVar('TRequest', bound=BaseModel)
 TResponse = TypeVar('TResponse', bound=BaseModel)
 class InferenceService(ABC, Generic[TRequest, TResponse]):
     """
+    Base class for inference services. Subclass this to integrate your model.
+    For CPU-intensive inference, offload work to a background thread using
+    asyncio.to_thread() to avoid blocking the event loop.
     """
     @abstractmethod
     async def load_model(self) -> None:
+        """Load model weights and processors. Called once at startup."""
         pass
     @abstractmethod
     async def predict(self, request: TRequest) -> TResponse:
+        """Run inference and return typed response."""
         pass
     @property
     @abstractmethod
     def is_loaded(self) -> bool:
+        """Check if model is loaded and ready."""
         pass

app/services/inference.py CHANGED Viewed

@@ -1,16 +1,5 @@
-"""
-Inference service for ResNet image classification models.
-This module provides an EXAMPLE implementation of the InferenceService ABC.
-Grad students should use this as a reference when implementing their own model services.
-This example demonstrates:
-- How to load a HuggingFace transformer model
-- How to preprocess image inputs
-- How to return typed Pydantic responses
-- How to use background threading for CPU-intensive inference
-- Proper error handling and logging
-"""
 import os
 import base64
 import asyncio
@@ -25,173 +14,75 @@ from app.api.models import ImageRequest, PredictionResponse
 class ResNetInferenceService(InferenceService[ImageRequest, PredictionResponse]):
-    """
-    EXAMPLE: ResNet inference service implementation.
-    This is a reference implementation showing how to integrate a HuggingFace
-    image classification model with the serving infrastructure.
-    To create your own service:
-    1. Subclass InferenceService[YourRequest, YourResponse]
-    2. Implement load_model() to load your model
-    3. Implement predict() to run inference and return typed response
-    4. Implement the is_loaded property
-    This service loads a ResNet-18 model for ImageNet classification.
-    """
     def __init__(self, model_name: str = "microsoft/resnet-18"):
-        """
-        Initialize the ResNet service.
-        Args:
-            model_name: Model identifier (e.g., "microsoft/resnet-18").
-                       Model files must exist in models/{model_name}/ directory.
-                       The full org/model structure is preserved.
-        Example:
-            For model_name="microsoft/resnet-18", expects files at:
-            models/microsoft/resnet-18/config.json
-            models/microsoft/resnet-18/pytorch_model.bin
-            etc.
-        """
         self.model_name = model_name
         self.model = None
         self.processor = None
         self._is_loaded = False
-        # Preserve full org/model path structure
         self.model_path = os.path.join("models", model_name)
-        logger.info(f"Initializing ResNet service with local model: {self.model_path}")
     async def load_model(self) -> None:
-        """
-        Load the ResNet model and processor.
-        This method loads the model once during startup and reuses it for all requests.
-        Called by the application lifespan handler.
-        """
         if self._is_loaded:
-            logger.debug("Model already loaded, skipping...")
             return
-        try:
-            if not os.path.exists(self.model_path):
-                raise FileNotFoundError(
-                    f"Model directory not found: {self.model_path}\n"
-                    f"Make sure the model files are downloaded to the correct location."
-                )
-            config_path = os.path.join(self.model_path, "config.json")
-            if not os.path.exists(config_path):
-                raise FileNotFoundError(f"Model config not found: {config_path}")
-            logger.info(f"Loading ResNet model from: {self.model_path}")
-            # Suppress warnings during model loading
-            import warnings
-            with warnings.catch_warnings():
-                warnings.filterwarnings("ignore", category=FutureWarning)
-                warnings.filterwarnings("ignore", message="Could not find image processor class")
-                self.processor = AutoImageProcessor.from_pretrained(
-                    self.model_path,
-                    local_files_only=True
-                )
-                self.model = ResNetForImageClassification.from_pretrained(
-                    self.model_path,
-                    local_files_only=True
-                )
-            self._is_loaded = True
-            logger.info("ResNet model loaded successfully")
-            logger.info(f"Model architecture: {self.model.config.architectures}")
-            logger.info(f"Model has {len(self.model.config.id2label)} classes")
-        except Exception as e:
-            logger.error(f"Failed to load ResNet model: {e}")
-            logger.error(f"Hint: Ensure model files exist at: {self.model_path}")
-            raise
-    def _predict_sync(self, request: ImageRequest) -> PredictionResponse:
-        """
-        INTERNAL: Synchronous prediction logic that runs in a background thread.
-        This method contains all CPU-intensive operations (image decoding,
-        preprocessing, PyTorch inference). It's called from predict() via
-        asyncio.to_thread() to avoid blocking the event loop.
-        Args:
-            request: ImageRequest containing base64-encoded image data
-        Returns:
-            PredictionResponse with prediction, confidence, and metadata
-        Raises:
-            ValueError: If image decoding or processing fails
-        """
-        try:
-            logger.debug("Starting ResNet inference in background thread")
-            image_data = base64.b64decode(request.image.data)
-            image = Image.open(BytesIO(image_data))
-            if image.mode != 'RGB':
-                logger.debug(f"Converting image from {image.mode} to RGB")
-                image = image.convert('RGB')
-            inputs = self.processor(image, return_tensors="pt")
-            with torch.no_grad():
-                logits = self.model(**inputs).logits
-            predicted_label = logits.argmax(-1).item()
-            predicted_class = self.model.config.id2label[predicted_label]
-            probabilities = torch.nn.functional.softmax(logits, dim=-1)
-            confidence = probabilities[0][predicted_label].item()
-            logger.debug(f"Inference completed: {predicted_class} (confidence: {confidence:.4f})")
-            return PredictionResponse(
-                prediction=predicted_class,
-                confidence=round(confidence, 4),
-                model=self.model_name,
-                predicted_label=predicted_label,
-                mediaType=request.image.mediaType
-            )
-        except Exception as e:
-            logger.error(f"Inference failed: {e}")
-            raise ValueError(f"Failed to process image: {str(e)}")
     async def predict(self, request: ImageRequest) -> PredictionResponse:
-        """
-        Perform inference on an image request.
-        This method demonstrates proper async handling for CPU-intensive operations.
-        The actual inference work is offloaded to a background thread using
-        asyncio.to_thread(), which prevents blocking the event loop.
-        Args:
-            request: ImageRequest containing base64-encoded image data
-        Returns:
-            PredictionResponse with prediction, confidence, and metadata
-        Raises:
-            RuntimeError: If model is not loaded
-            ValueError: If image decoding or processing fails
-        """
         if not self._is_loaded:
-            logger.warning("Model not loaded, loading now...")
             await self.load_model()
-        response = await asyncio.to_thread(self._predict_sync, request)
-        return response
     @property
     def is_loaded(self) -> bool:
-        """Check if model is loaded."""
         return self._is_loaded

+"""ResNet inference service implementation."""
 import os
 import base64
 import asyncio
 class ResNetInferenceService(InferenceService[ImageRequest, PredictionResponse]):
+    """ResNet-18 inference service for image classification."""
     def __init__(self, model_name: str = "microsoft/resnet-18"):
         self.model_name = model_name
         self.model = None
         self.processor = None
         self._is_loaded = False
         self.model_path = os.path.join("models", model_name)
+        logger.info(f"Initializing ResNet service: {self.model_path}")
     async def load_model(self) -> None:
         if self._is_loaded:
             return
+        if not os.path.exists(self.model_path):
+            raise FileNotFoundError(f"Model not found: {self.model_path}")
+        config_path = os.path.join(self.model_path, "config.json")
+        if not os.path.exists(config_path):
+            raise FileNotFoundError(f"Config not found: {config_path}")
+        logger.info(f"Loading model from {self.model_path}")
+        import warnings
+        with warnings.catch_warnings():
+            warnings.filterwarnings("ignore", category=FutureWarning)
+            self.processor = AutoImageProcessor.from_pretrained(
+                self.model_path, local_files_only=True
+            )
+            self.model = ResNetForImageClassification.from_pretrained(
+                self.model_path, local_files_only=True
+            )
+        self._is_loaded = True
+        logger.info(f"Model loaded: {len(self.model.config.id2label)} classes")
+    def _predict_sync(self, request: ImageRequest) -> PredictionResponse:
+        """Synchronous inference logic. Runs in background thread."""
+        image_data = base64.b64decode(request.image.data)
+        image = Image.open(BytesIO(image_data))
+        if image.mode != 'RGB':
+            image = image.convert('RGB')
+        inputs = self.processor(image, return_tensors="pt")
+        with torch.no_grad():
+            logits = self.model(**inputs).logits
+        predicted_label = logits.argmax(-1).item()
+        predicted_class = self.model.config.id2label[predicted_label]
+        probabilities = torch.nn.functional.softmax(logits, dim=-1)
+        confidence = probabilities[0][predicted_label].item()
+        return PredictionResponse(
+            prediction=predicted_class,
+            confidence=round(confidence, 4),
+            model=self.model_name,
+            predicted_label=predicted_label,
+            mediaType=request.image.mediaType
+        )
     async def predict(self, request: ImageRequest) -> PredictionResponse:
+        """Run inference with background threading to avoid blocking event loop."""
         if not self._is_loaded:
             await self.load_model()
+        return await asyncio.to_thread(self._predict_sync, request)
     @property
     def is_loaded(self) -> bool:
         return self._is_loaded