Finalize Request and Response schema and update model accordingly.

Browse files

Files changed (3) hide show

.gitignore +2 -0
app/api/models.py +71 -10
app/services/inference.py +21 -14

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ models/
2	+ venv/

app/api/models.py CHANGED Viewed

@@ -1,24 +1,85 @@
 """
 Pydantic models for request/response validation.
 """
-from pydantic import BaseModel
-class ImageData(BaseModel):
     """Image data model for base64 encoded images."""
     mediaType: str
     data: str
-class ImageRequest(BaseModel):
     """Request model for image classification."""
     image: ImageData
-class PredictionResponse(BaseModel):
-    """Response model for image classification results."""
-    prediction: str
-    confidence: float
-    model: str
-    predicted_label: int
-    mediaType: str

 """
 Pydantic models for request/response validation.
 """
+import enum
+from typing import Optional
+import pydantic
+class ImageData(pydantic.BaseModel):
     """Image data model for base64 encoded images."""
     mediaType: str
     data: str
+class ImageRequest(pydantic.BaseModel):
     """Request model for image classification."""
     image: ImageData
+class Labels(enum.IntEnum):
+    Natural = 0
+    FullySynthesized = 1
+    LocallyEdited = 2
+    LocallySynthesized = 3
+class LocalizationMask(pydantic.BaseModel):
+    """A bit mask indicating which pixels are manipulated / synthesized.
+    A bit value of ``1`` means that the model believes the corresponding pixel
+    has been edited or synthesized (i.e., its label would be non-zero).
+    A bit value of ``0`` means that the model believes the pixel is unaltered.
+    The mask ``.width`` and ``.height`` should be the same as the input image.
+    Extra bits at the end of ``.bitsRowMajor`` after the first
+    ``width * height`` bits are **ignored**; for simplicity/efficiency,
+    you should encode your bit mask into a byte array and not worry if the
+    final byte isn't "full", then convert the byte array to base64.
+    """
+    width: int = pydantic.Field(
+        description="The width of the mask."
+    )
+    height: int = pydantic.Field(
+        description="The height of the mask."
+    )
+    bitsRowMajor: str = pydantic.Field(
+        description="A base64 string encoding the bit mask in row-major order.",
+        # Canonical base64 encoding
+        # https://stackoverflow.com/a/64467300/3709935
+        pattern=r"^(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/][AQgw]==|[A-Za-z0-9+/]{2}[AEIMQUYcgkosw048]=)?$",
+    )
+class PredictionResponse(pydantic.BaseModel):
+    """Response model for synthetic image classification results.
+    Detector models will be scored primarily on their ability to classify the
+    entire image into 1 of the 4 label categories::
+        0: (Natural) The image is natural / unaltered.
+        1: (FullySynthesized) The entire image was synthesized by e.g., a
+            generative image model.
+        2: (LocallyEdited) The image is a natural image where a portion has
+            been edited using traditional photo editing techniques such as
+            splicing.
+        3: (LocallySynthesized) The image is a natural image where a portion
+            has been replaced by synthesized content.
+    """
+    logprobs: list[float] = pydantic.Field(
+        description="The log-probabilities for each of the 4 possible labels.",
+        min_length=4,
+        max_length=4,
+    )
+    localizationMask: Optional[LocalizationMask] = pydantic.Field(
+        description="A bit mask localizing predicted edits. Models that are"
+        " not capable of localization may omit this field. It may also be"
+        " omitted if the predicted label is ``0`` or ``1``, in which case the"
+        " mask will be assumed to be all 0's or all 1's, as appropriate."
+    )

app/services/inference.py CHANGED Viewed

@@ -1,15 +1,17 @@
 """ResNet inference service implementation."""
-import os
 import base64
 from io import BytesIO
 import torch
 from PIL import Image
-from transformers import AutoImageProcessor, ResNetForImageClassification
 from app.core.logging import logger
 from app.services.base import InferenceService
-from app.api.models import ImageRequest, PredictionResponse
 class ResNetInferenceService(InferenceService[ImageRequest, PredictionResponse]):
@@ -45,13 +47,20 @@ class ResNetInferenceService(InferenceService[ImageRequest, PredictionResponse])
             self.model = ResNetForImageClassification.from_pretrained(
                 self.model_path, local_files_only=True
             )
         self._is_loaded = True
-        logger.info(f"Model loaded: {len(self.model.config.id2label)} classes")
     def predict(self, request: ImageRequest) -> PredictionResponse:
         image_data = base64.b64decode(request.image.data)
         image = Image.open(BytesIO(image_data))
         if image.mode != 'RGB':
             image = image.convert('RGB')
@@ -59,19 +68,17 @@ class ResNetInferenceService(InferenceService[ImageRequest, PredictionResponse])
         inputs = self.processor(image, return_tensors="pt")
         with torch.no_grad():
-            logits = self.model(**inputs).logits
-        predicted_label = logits.argmax(-1).item()
-        predicted_class = self.model.config.id2label[predicted_label]
-        probabilities = torch.nn.functional.softmax(logits, dim=-1)
-        confidence = probabilities[0][predicted_label].item()
         return PredictionResponse(
-            prediction=predicted_class,
-            confidence=round(confidence, 4),
-            model=self.model_name,
-            predicted_label=predicted_label,
-            mediaType=request.image.mediaType
         )
     @property

 """ResNet inference service implementation."""
 import base64
+import os
+import random
 from io import BytesIO
 import torch
 from PIL import Image
+from transformers import AutoImageProcessor, ResNetForImageClassification  # type: ignore[import-untyped]
 from app.core.logging import logger
 from app.services.base import InferenceService
+from app.api.models import ImageRequest, Labels, LocalizationMask, PredictionResponse
 class ResNetInferenceService(InferenceService[ImageRequest, PredictionResponse]):
             self.model = ResNetForImageClassification.from_pretrained(
                 self.model_path, local_files_only=True
             )
+            assert self.model is not None
         self._is_loaded = True
+        logger.info(f"Model loaded: {len(self.model.config.id2label)} classes")  # pyright: ignore
     def predict(self, request: ImageRequest) -> PredictionResponse:
+        if not self.is_loaded:
+            raise RuntimeError("model is not loaded")
+        assert self.processor is not None
+        assert self.model is not None
         image_data = base64.b64decode(request.image.data)
         image = Image.open(BytesIO(image_data))
+        width, height = image.size
         if image.mode != 'RGB':
             image = image.convert('RGB')
         inputs = self.processor(image, return_tensors="pt")
         with torch.no_grad():
+            logits = self.model(**inputs).logits   # pyright: ignore
+        logprobs = torch.nn.functional.log_softmax(logits[:len(Labels)], dim=-1).tolist()
+        mask_bytes = random.randbytes((width*height + 7) // 8)
+        mask_bits = base64.b64encode(mask_bytes).decode("utf-8")
         return PredictionResponse(
+            logprobs=logprobs,
+            localizationMask=LocalizationMask(
+                width=width, height=height, bitsRowMajor=mask_bits
+            )
         )
     @property