Spaces:

pollen-robotics
/

reachy_mini_conversation_app

Running

App Files Files Community

Alina Lozovskaya commited on Oct 16

Commit

4f57a60

1 Parent(s): c423ee0

Add mypy workflow and solve mypy errors

Browse files

Files changed (18) hide show

.github/workflows/typecheck.yml +29 -0
.gitignore +1 -0
pyproject.toml +12 -2
src/reachy_mini_conversation_demo/audio/head_wobbler.py +7 -5
src/reachy_mini_conversation_demo/audio/speech_tapper.py +13 -11
src/reachy_mini_conversation_demo/camera_worker.py +23 -23
src/reachy_mini_conversation_demo/config.py +4 -4
src/reachy_mini_conversation_demo/console.py +6 -6
src/reachy_mini_conversation_demo/dance_emotion_moves.py +16 -15
src/reachy_mini_conversation_demo/main.py +5 -4
src/reachy_mini_conversation_demo/moves.py +31 -31
src/reachy_mini_conversation_demo/openai_realtime.py +54 -43
src/reachy_mini_conversation_demo/tools.py +36 -34
src/reachy_mini_conversation_demo/utils.py +8 -6
src/reachy_mini_conversation_demo/vision/processors.py +17 -16
src/reachy_mini_conversation_demo/vision/yolo_head_tracker.py +14 -9
tests/audio/test_head_wobbler.py +6 -5
uv.lock +72 -7

.github/workflows/typecheck.yml ADDED Viewed

	@@ -0,0 +1,29 @@

+name: Type check
+on: [push, pull_request]
+permissions:
+  contents: read
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+jobs:
+  mypy:
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+      - uses: astral-sh/setup-uv@v5
+      - name: Install dev deps (locked)
+        run: uv sync --frozen --group dev
+      - name: Run mypy
+        run: uv run mypy --pretty --show-error-codes .

.gitignore CHANGED Viewed

@@ -29,6 +29,7 @@ coverage.xml
 # Linting and formatting
 .ruff_cache/
 # IDE
 .vscode/

 # Linting and formatting
 .ruff_cache/
+.mypy_cache/
 # IDE
 .vscode/

pyproject.toml CHANGED Viewed

@@ -23,7 +23,7 @@ dependencies = [
     #OpenAI
     "openai>=2.1",
-    #Reachy mini
     "reachy_mini_dances_library",
     "reachy_mini_toolbox",
     "reachy_mini>=1.0.0.rc4",
@@ -40,7 +40,11 @@ all_vision = [
 ]
 [dependency-groups]
-dev = ["pytest", "ruff==0.12.0"]
 [project.scripts]
 reachy-mini-conversation-demo = "reachy_mini_conversation_demo.main:main"
@@ -88,3 +92,9 @@ quote-style = "double"
 indent-style = "space"
 skip-magic-trailing-comma = false
 line-ending = "auto"

     #OpenAI
     "openai>=2.1",
+    #Reachy mini
     "reachy_mini_dances_library",
     "reachy_mini_toolbox",
     "reachy_mini>=1.0.0.rc4",
 ]
 [dependency-groups]
+dev = [
+  "pytest",
+  "ruff==0.12.0",
+  "mypy>=1.18.2",
+]
 [project.scripts]
 reachy-mini-conversation-demo = "reachy_mini_conversation_demo.main:main"
 indent-style = "space"
 skip-magic-trailing-comma = false
 line-ending = "auto"
+[tool.mypy]
+python_version = "3.12"
+files = ["src/"]
+ignore_missing_imports = true
+strict = true

src/reachy_mini_conversation_demo/audio/head_wobbler.py CHANGED Viewed

@@ -5,9 +5,11 @@ import queue
 import base64
 import logging
 import threading
-from typing import Tuple, Optional
 import numpy as np
 from reachy_mini_conversation_demo.audio.speech_tapper import HOP_MS, SwayRollRT
@@ -20,13 +22,13 @@ logger = logging.getLogger(__name__)
 class HeadWobbler:
     """Converts audio deltas (base64) into head movement offsets."""
-    def __init__(self, set_speech_offsets):
         """Initialize the head wobbler."""
         self._apply_offsets = set_speech_offsets
-        self._base_ts: Optional[float] = None
         self._hops_done: int = 0
-        self.audio_queue: queue.Queue[Tuple[int, int, np.ndarray]] = queue.Queue()
         self.sway = SwayRollRT()
         # Synchronization primitives
@@ -35,7 +37,7 @@ class HeadWobbler:
         self._generation = 0
         self._stop_event = threading.Event()
-        self._thread: Optional[threading.Thread] = None
     def feed(self, delta_b64: str) -> None:
         """Thread-safe: push audio into the consumer queue."""

 import base64
 import logging
 import threading
+from typing import Any
+from collections.abc import Callable
 import numpy as np
+from numpy.typing import NDArray
 from reachy_mini_conversation_demo.audio.speech_tapper import HOP_MS, SwayRollRT
 class HeadWobbler:
     """Converts audio deltas (base64) into head movement offsets."""
+    def __init__(self, set_speech_offsets: Callable[[tuple[float, float, float, float, float, float]], None]) -> None:
         """Initialize the head wobbler."""
         self._apply_offsets = set_speech_offsets
+        self._base_ts: float | None = None
         self._hops_done: int = 0
+        self.audio_queue: queue.Queue[tuple[int, int, NDArray[Any]]] = queue.Queue()
         self.sway = SwayRollRT()
         # Synchronization primitives
         self._generation = 0
         self._stop_event = threading.Event()
+        self._thread: threading.Thread | None = None
     def feed(self, delta_b64: str) -> None:
         """Thread-safe: push audio into the consumer queue."""

src/reachy_mini_conversation_demo/audio/speech_tapper.py CHANGED Viewed

@@ -1,10 +1,11 @@
 from __future__ import annotations
 import math
-from typing import Dict, List, Optional
 from itertools import islice
 from collections import deque
 import numpy as np
 # Tunables
@@ -48,7 +49,7 @@ SWAY_ATTACK_FR = max(1, int(SWAY_ATTACK_MS / HOP_MS))
 SWAY_RELEASE_FR = max(1, int(SWAY_RELEASE_MS / HOP_MS))
-def _rms_dbfs(x: np.ndarray) -> float:
     """Root-mean-square in dBFS for float32 mono array in [-1,1]."""
     # numerically stable rms (avoid overflow)
     x = x.astype(np.float32, copy=False)
@@ -66,7 +67,7 @@ def _loudness_gain(db: float, offset: float = SENS_DB_OFFSET) -> float:
     return t**LOUDNESS_GAMMA if LOUDNESS_GAMMA != 1.0 else t
-def _to_float32_mono(x: np.ndarray) -> np.ndarray:
     """Convert arbitrary PCM array to float32 mono in [-1,1].
     Accepts shapes: (N,), (1,N), (N,1), (C,N), (N,C).
@@ -94,7 +95,7 @@ def _to_float32_mono(x: np.ndarray) -> np.ndarray:
     return a.astype(np.float32) / (scale if scale != 0.0 else 1.0)
-def _resample_linear(x: np.ndarray, sr_in: int, sr_out: int) -> np.ndarray:
     """Lightweight linear resampler for short buffers."""
     if sr_in == sr_out or x.size == 0:
         return x
@@ -104,7 +105,7 @@ def _resample_linear(x: np.ndarray, sr_in: int, sr_out: int) -> np.ndarray:
         return np.zeros(0, dtype=np.float32)
     t_in = np.linspace(0.0, 1.0, num=x.size, dtype=np.float32, endpoint=True)
     t_out = np.linspace(0.0, 1.0, num=n_out, dtype=np.float32, endpoint=True)
-    return np.interp(t_out, t_in, x).astype(np.float32, copy=False)
 class SwayRollRT:
@@ -118,8 +119,8 @@ class SwayRollRT:
     def __init__(self, rng_seed: int = 7):
         """Initialize state."""
         self._seed = int(rng_seed)
-        self.samples = deque(maxlen=10 * SR)  # sliding window for VAD/env
-        self.carry = np.zeros(0, dtype=np.float32)
         self.vad_on = False
         self.vad_above = 0
@@ -150,7 +151,7 @@ class SwayRollRT:
         self.sway_down = 0
         self.t = 0.0
-    def feed(self, pcm: np.ndarray, sr: Optional[int]) -> List[Dict[str, float]]:
         """Stream in PCM chunk. Returns a list of sway dicts, one per hop (HOP_MS).
         Args:
@@ -173,11 +174,12 @@ class SwayRollRT:
         else:
             self.carry = x
-        out: List[Dict[str, float]] = []
         while self.carry.size >= HOP:
             hop = self.carry[:HOP]
-            self.carry = self.carry[HOP:]
             # keep sliding window for VAD/env computation
             # (deque accepts any iterable; list() for small HOP is fine)
@@ -260,7 +262,7 @@ class SwayRollRT:
                     "x_mm": x_mm,
                     "y_mm": y_mm,
                     "z_mm": z_mm,
-                }
             )
         return out

 from __future__ import annotations
 import math
+from typing import Any
 from itertools import islice
 from collections import deque
 import numpy as np
+from numpy.typing import NDArray
 # Tunables
 SWAY_RELEASE_FR = max(1, int(SWAY_RELEASE_MS / HOP_MS))
+def _rms_dbfs(x: NDArray[np.floating[Any]]) -> float:
     """Root-mean-square in dBFS for float32 mono array in [-1,1]."""
     # numerically stable rms (avoid overflow)
     x = x.astype(np.float32, copy=False)
     return t**LOUDNESS_GAMMA if LOUDNESS_GAMMA != 1.0 else t
+def _to_float32_mono(x: NDArray[Any]) -> NDArray[np.floating[Any]]:
     """Convert arbitrary PCM array to float32 mono in [-1,1].
     Accepts shapes: (N,), (1,N), (N,1), (C,N), (N,C).
     return a.astype(np.float32) / (scale if scale != 0.0 else 1.0)
+def _resample_linear(x: NDArray[np.floating[Any]], sr_in: int, sr_out: int) -> NDArray[np.floating[Any]]:
     """Lightweight linear resampler for short buffers."""
     if sr_in == sr_out or x.size == 0:
         return x
         return np.zeros(0, dtype=np.float32)
     t_in = np.linspace(0.0, 1.0, num=x.size, dtype=np.float32, endpoint=True)
     t_out = np.linspace(0.0, 1.0, num=n_out, dtype=np.float32, endpoint=True)
+    return np.interp(t_out, t_in, x).astype(np.float32, copy=False)  # type: ignore[no-any-return]
 class SwayRollRT:
     def __init__(self, rng_seed: int = 7):
         """Initialize state."""
         self._seed = int(rng_seed)
+        self.samples: deque[float] = deque(maxlen=10 * SR)  # sliding window for VAD/env
+        self.carry: NDArray[np.floating[Any]] = np.zeros(0, dtype=np.float32)
         self.vad_on = False
         self.vad_above = 0
         self.sway_down = 0
         self.t = 0.0
+    def feed(self, pcm: NDArray[Any], sr: int | None) -> list[dict[str, float]]:
         """Stream in PCM chunk. Returns a list of sway dicts, one per hop (HOP_MS).
         Args:
         else:
             self.carry = x
+        out: list[dict[str, float]] = []
         while self.carry.size >= HOP:
             hop = self.carry[:HOP]
+            remaining: NDArray[np.floating[Any]] = self.carry[HOP:]
+            self.carry = remaining
             # keep sliding window for VAD/env computation
             # (deque accepts any iterable; list() for small HOP is fine)
                     "x_mm": x_mm,
                     "y_mm": y_mm,
                     "z_mm": z_mm,
+                },
             )
         return out

src/reachy_mini_conversation_demo/camera_worker.py CHANGED Viewed

@@ -9,10 +9,11 @@ Ported from main_works.py camera_worker() function to provide:
 import time
 import logging
 import threading
-from typing import Tuple, Optional
 import cv2
 import numpy as np
 from scipy.spatial.transform import Rotation as R
 from reachy_mini import ReachyMini
@@ -25,20 +26,20 @@ logger = logging.getLogger(__name__)
 class CameraWorker:
     """Thread-safe camera worker with frame buffering and face tracking."""
-    def __init__(self, reachy_mini: ReachyMini, head_tracker=None):
         """Initialize."""
         self.reachy_mini = reachy_mini
         self.head_tracker = head_tracker
         # Thread-safe frame storage
-        self.latest_frame: Optional[np.ndarray] = None
         self.frame_lock = threading.Lock()
         self._stop_event = threading.Event()
-        self._thread: Optional[threading.Thread] = None
         # Face tracking state
         self.is_head_tracking_enabled = True
-        self.face_tracking_offsets = [
             0.0,
             0.0,
             0.0,
@@ -49,31 +50,31 @@ class CameraWorker:
         self.face_tracking_lock = threading.Lock()
         # Face tracking timing variables (same as main_works.py)
-        self.last_face_detected_time: Optional[float] = None
-        self.interpolation_start_time: Optional[float] = None
-        self.interpolation_start_pose: Optional[np.ndarray] = None
         self.face_lost_delay = 2.0  # seconds to wait before starting interpolation
         self.interpolation_duration = 1.0  # seconds to interpolate back to neutral
         # Track state changes
         self.previous_head_tracking_state = self.is_head_tracking_enabled
-    def get_latest_frame(self) -> Optional[np.ndarray]:
         """Get the latest frame (thread-safe)."""
         with self.frame_lock:
             if self.latest_frame is None:
                 return None
-            else:
-                frame = self.latest_frame.copy()
-                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-                return frame
     def get_face_tracking_offsets(
         self,
-    ) -> Tuple[float, float, float, float, float, float]:
         """Get current face tracking offsets (thread-safe)."""
         with self.face_tracking_lock:
-            return tuple(self.face_tracking_offsets)
     def set_head_tracking_enabled(self, enabled: bool) -> None:
         """Enable/disable head tracking."""
@@ -168,12 +169,11 @@ class CameraWorker:
                                     rotation[2],  # roll, pitch, yaw
                                 ]
-                        else:
-                            # No face detected while tracking enabled - set face lost timestamp
-                            if self.last_face_detected_time is None or self.last_face_detected_time == current_time:
-                                # Only update if we haven't already set a face lost time
-                                # (current_time check prevents overriding the disable-triggered timestamp)
-                                pass
                     # Handle smooth interpolation (works for both face-lost and tracking-disabled cases)
                     if self.last_face_detected_time is not None:
@@ -191,7 +191,7 @@ class CameraWorker:
                                     self.interpolation_start_pose = np.eye(4)
                                     self.interpolation_start_pose[:3, 3] = current_translation
                                     self.interpolation_start_pose[:3, :3] = R.from_euler(
-                                        "xyz", current_rotation_euler
                                     ).as_matrix()
                             # Calculate interpolation progress (t from 0 to 1)
@@ -200,7 +200,7 @@ class CameraWorker:
                             # Interpolate between current pose and neutral pose
                             interpolated_pose = linear_pose_interpolation(
-                                self.interpolation_start_pose, neutral_pose, t
                             )
                             # Extract translation and rotation from interpolated pose

 import time
 import logging
 import threading
+from typing import Any
 import cv2
 import numpy as np
+from numpy.typing import NDArray
 from scipy.spatial.transform import Rotation as R
 from reachy_mini import ReachyMini
 class CameraWorker:
     """Thread-safe camera worker with frame buffering and face tracking."""
+    def __init__(self, reachy_mini: ReachyMini, head_tracker: Any = None) -> None:
         """Initialize."""
         self.reachy_mini = reachy_mini
         self.head_tracker = head_tracker
         # Thread-safe frame storage
+        self.latest_frame: NDArray[np.uint8] | None = None
         self.frame_lock = threading.Lock()
         self._stop_event = threading.Event()
+        self._thread: threading.Thread | None = None
         # Face tracking state
         self.is_head_tracking_enabled = True
+        self.face_tracking_offsets: list[float] = [
             0.0,
             0.0,
             0.0,
         self.face_tracking_lock = threading.Lock()
         # Face tracking timing variables (same as main_works.py)
+        self.last_face_detected_time: float | None = None
+        self.interpolation_start_time: float | None = None
+        self.interpolation_start_pose: NDArray[np.floating[Any]] | None = None
         self.face_lost_delay = 2.0  # seconds to wait before starting interpolation
         self.interpolation_duration = 1.0  # seconds to interpolate back to neutral
         # Track state changes
         self.previous_head_tracking_state = self.is_head_tracking_enabled
+    def get_latest_frame(self) -> NDArray[np.uint8] | None:
         """Get the latest frame (thread-safe)."""
         with self.frame_lock:
             if self.latest_frame is None:
                 return None
+            frame = self.latest_frame.copy()
+            frame_rgb: NDArray[np.uint8] = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)  # type: ignore[assignment]
+            return frame_rgb
     def get_face_tracking_offsets(
         self,
+    ) -> tuple[float, float, float, float, float, float]:
         """Get current face tracking offsets (thread-safe)."""
         with self.face_tracking_lock:
+            offsets = self.face_tracking_offsets
+            return (offsets[0], offsets[1], offsets[2], offsets[3], offsets[4], offsets[5])
     def set_head_tracking_enabled(self, enabled: bool) -> None:
         """Enable/disable head tracking."""
                                     rotation[2],  # roll, pitch, yaw
                                 ]
+                        # No face detected while tracking enabled - set face lost timestamp
+                        elif self.last_face_detected_time is None or self.last_face_detected_time == current_time:
+                            # Only update if we haven't already set a face lost time
+                            # (current_time check prevents overriding the disable-triggered timestamp)
+                            pass
                     # Handle smooth interpolation (works for both face-lost and tracking-disabled cases)
                     if self.last_face_detected_time is not None:
                                     self.interpolation_start_pose = np.eye(4)
                                     self.interpolation_start_pose[:3, 3] = current_translation
                                     self.interpolation_start_pose[:3, :3] = R.from_euler(
+                                        "xyz", current_rotation_euler,
                                     ).as_matrix()
                             # Calculate interpolation progress (t from 0 to 1)
                             # Interpolate between current pose and neutral pose
                             interpolated_pose = linear_pose_interpolation(
+                                self.interpolation_start_pose, neutral_pose, t,
                             )
                             # Extract translation and rotation from interpolated pose

src/reachy_mini_conversation_demo/config.py CHANGED Viewed

@@ -13,13 +13,13 @@ if not env_file.exists():
     raise RuntimeError(
         ".env file not found. Please create one based on .env.example:\n"
         "  cp .env.example .env\n"
-        "Then add your OPENAI_API_KEY to the .env file."
     )
 # Load .env and verify it was loaded successfully
 if not load_dotenv():
     raise RuntimeError(
-        "Failed to load .env file. Please ensure the file is readable and properly formatted."
     )
 logger.info("Configuration loaded from .env file")
@@ -33,11 +33,11 @@ class Config:
     if OPENAI_API_KEY is None:
         raise RuntimeError(
             "OPENAI_API_KEY is not set in .env file. Please add it:\n"
-            "  OPENAI_API_KEY=your_api_key_here"
         )
     if not OPENAI_API_KEY.strip():
         raise RuntimeError(
-            "OPENAI_API_KEY is empty in .env file. Please provide a valid API key."
         )
     # Optional

     raise RuntimeError(
         ".env file not found. Please create one based on .env.example:\n"
         "  cp .env.example .env\n"
+        "Then add your OPENAI_API_KEY to the .env file.",
     )
 # Load .env and verify it was loaded successfully
 if not load_dotenv():
     raise RuntimeError(
+        "Failed to load .env file. Please ensure the file is readable and properly formatted.",
     )
 logger.info("Configuration loaded from .env file")
     if OPENAI_API_KEY is None:
         raise RuntimeError(
             "OPENAI_API_KEY is not set in .env file. Please add it:\n"
+            "  OPENAI_API_KEY=your_api_key_here",
         )
     if not OPENAI_API_KEY.strip():
         raise RuntimeError(
+            "OPENAI_API_KEY is empty in .env file. Please provide a valid API key.",
         )
     # Optional

src/reachy_mini_conversation_demo/console.py CHANGED Viewed

@@ -24,9 +24,9 @@ class LocalStream:
         self.handler = handler
         self._robot = robot
         self._stop_event = asyncio.Event()
-        self._tasks = []
         # Allow the handler to flush the player queue when appropriate.
-        self.handler._clear_queue = self.clear_audio_queue  # type: ignore[assignment]
     def launch(self) -> None:
         """Start the recorder/player and run the async processing loops."""
@@ -105,12 +105,12 @@ class LocalStream:
             elif isinstance(handler_output, tuple):
                 input_sample_rate, audio_frame = handler_output
                 device_sample_rate = self._robot.media.get_audio_samplerate()
-                audio_frame = audio_to_float32(audio_frame.squeeze())
                 if input_sample_rate != device_sample_rate:
-                    audio_frame = librosa.resample(
-                        audio_frame, orig_sr=input_sample_rate, target_sr=device_sample_rate
                     )
-                self._robot.media.push_audio_sample(audio_frame)
             else:
                 logger.debug("Ignoring output type=%s", type(handler_output).__name__)

         self.handler = handler
         self._robot = robot
         self._stop_event = asyncio.Event()
+        self._tasks: list[asyncio.Task[None]] = []
         # Allow the handler to flush the player queue when appropriate.
+        self.handler._clear_queue = self.clear_audio_queue
     def launch(self) -> None:
         """Start the recorder/player and run the async processing loops."""
             elif isinstance(handler_output, tuple):
                 input_sample_rate, audio_frame = handler_output
                 device_sample_rate = self._robot.media.get_audio_samplerate()
+                audio_frame_float = audio_to_float32(audio_frame.squeeze())
                 if input_sample_rate != device_sample_rate:
+                    audio_frame_float = librosa.resample(
+                        audio_frame_float, orig_sr=input_sample_rate, target_sr=device_sample_rate,
                     )
+                self._robot.media.push_audio_sample(audio_frame_float)
             else:
                 logger.debug("Ignoring output type=%s", type(handler_output).__name__)

src/reachy_mini_conversation_demo/dance_emotion_moves.py CHANGED Viewed

@@ -6,9 +6,10 @@ and executed sequentially by the MovementManager.
 from __future__ import annotations
 import logging
-from typing import Tuple
 import numpy as np
 from reachy_mini.motion.move import Move
 from reachy_mini.motion.recorded_move import RecordedMoves
@@ -18,7 +19,7 @@ from reachy_mini_dances_library.dance_move import DanceMove
 logger = logging.getLogger(__name__)
-class DanceQueueMove(Move):
     """Wrapper for dance moves to work with the movement queue system."""
     def __init__(self, move_name: str):
@@ -29,9 +30,9 @@ class DanceQueueMove(Move):
     @property
     def duration(self) -> float:
         """Duration property required by official Move interface."""
-        return self.dance_move.duration
-    def evaluate(self, t: float) -> tuple[np.ndarray | None, np.ndarray | None, float | None]:
         """Evaluate dance move at time t."""
         try:
             # Get the pose from the dance move
@@ -52,7 +53,7 @@ class DanceQueueMove(Move):
             return (neutral_head_pose, np.array([0.0, 0.0]), 0.0)
-class EmotionQueueMove(Move):
     """Wrapper for emotion moves to work with the movement queue system."""
     def __init__(self, emotion_name: str, recorded_moves: RecordedMoves):
@@ -63,9 +64,9 @@ class EmotionQueueMove(Move):
     @property
     def duration(self) -> float:
         """Duration property required by official Move interface."""
-        return self.emotion_move.duration
-    def evaluate(self, t: float) -> tuple[np.ndarray | None, np.ndarray | None, float | None]:
         """Evaluate emotion move at time t."""
         try:
             # Get the pose from the emotion move
@@ -86,17 +87,17 @@ class EmotionQueueMove(Move):
             return (neutral_head_pose, np.array([0.0, 0.0]), 0.0)
-class GotoQueueMove(Move):
     """Wrapper for goto moves to work with the movement queue system."""
     def __init__(
         self,
-        target_head_pose: np.ndarray,
-        start_head_pose: np.ndarray = None,
-        target_antennas: Tuple[float, float] = (0, 0),
-        start_antennas: Tuple[float, float] = None,
         target_body_yaw: float = 0,
-        start_body_yaw: float = None,
         duration: float = 1.0,
     ):
         """Initialize a GotoQueueMove."""
@@ -113,7 +114,7 @@ class GotoQueueMove(Move):
         """Duration property required by official Move interface."""
         return self._duration
-    def evaluate(self, t: float) -> tuple[np.ndarray | None, np.ndarray | None, float | None]:
         """Evaluate goto move at time t using linear interpolation."""
         try:
             from reachy_mini.utils import create_head_pose
@@ -136,7 +137,7 @@ class GotoQueueMove(Move):
                 [
                     self.start_antennas[0] + (self.target_antennas[0] - self.start_antennas[0]) * t_clamped,
                     self.start_antennas[1] + (self.target_antennas[1] - self.start_antennas[1]) * t_clamped,
-                ]
             )
             # Interpolate body yaw

 from __future__ import annotations
 import logging
+from typing import Any
 import numpy as np
+from numpy.typing import NDArray
 from reachy_mini.motion.move import Move
 from reachy_mini.motion.recorded_move import RecordedMoves
 logger = logging.getLogger(__name__)
+class DanceQueueMove(Move):  # type: ignore[misc]
     """Wrapper for dance moves to work with the movement queue system."""
     def __init__(self, move_name: str):
     @property
     def duration(self) -> float:
         """Duration property required by official Move interface."""
+        return float(self.dance_move.duration)
+    def evaluate(self, t: float) -> tuple[NDArray[np.floating[Any]] | None, NDArray[np.floating[Any]] | None, float | None]:
         """Evaluate dance move at time t."""
         try:
             # Get the pose from the dance move
             return (neutral_head_pose, np.array([0.0, 0.0]), 0.0)
+class EmotionQueueMove(Move):  # type: ignore[misc]
     """Wrapper for emotion moves to work with the movement queue system."""
     def __init__(self, emotion_name: str, recorded_moves: RecordedMoves):
     @property
     def duration(self) -> float:
         """Duration property required by official Move interface."""
+        return float(self.emotion_move.duration)
+    def evaluate(self, t: float) -> tuple[NDArray[np.floating[Any]] | None, NDArray[np.floating[Any]] | None, float | None]:
         """Evaluate emotion move at time t."""
         try:
             # Get the pose from the emotion move
             return (neutral_head_pose, np.array([0.0, 0.0]), 0.0)
+class GotoQueueMove(Move):  # type: ignore[misc]
     """Wrapper for goto moves to work with the movement queue system."""
     def __init__(
         self,
+        target_head_pose: NDArray[np.floating[Any]],
+        start_head_pose: NDArray[np.floating[Any]] | None = None,
+        target_antennas: tuple[float, float] = (0, 0),
+        start_antennas: tuple[float, float] | None = None,
         target_body_yaw: float = 0,
+        start_body_yaw: float | None = None,
         duration: float = 1.0,
     ):
         """Initialize a GotoQueueMove."""
         """Duration property required by official Move interface."""
         return self._duration
+    def evaluate(self, t: float) -> tuple[NDArray[np.floating[Any]] | None, NDArray[np.floating[Any]] | None, float | None]:
         """Evaluate goto move at time t using linear interpolation."""
         try:
             from reachy_mini.utils import create_head_pose
                 [
                     self.start_antennas[0] + (self.target_antennas[0] - self.start_antennas[0]) * t_clamped,
                     self.start_antennas[1] + (self.target_antennas[1] - self.start_antennas[1]) * t_clamped,
+                ],
             )
             # Interpolate body yaw

src/reachy_mini_conversation_demo/main.py CHANGED Viewed

@@ -2,6 +2,7 @@
 import os
 import sys
 import gradio as gr
 from fastapi import FastAPI
@@ -20,13 +21,13 @@ from reachy_mini_conversation_demo.openai_realtime import OpenaiRealtimeHandler
 from reachy_mini_conversation_demo.audio.head_wobbler import HeadWobbler
-def update_chatbot(chatbot: list[dict], response: dict):
     """Update the chatbot with AdditionalOutputs."""
     chatbot.append(response)
     return chatbot
-def main():
     """Entrypoint for the Reachy Mini conversation demo."""
     args = parse_args()
@@ -41,7 +42,7 @@ def main():
     # Check if running in simulation mode without --gradio
     if robot.client.get_status()["simulation_enabled"] and not args.gradio:
         logger.error(
-            "Simulation mode requires Gradio interface. Please use --gradio flag when running in simulation mode."
         )
         robot.client.disconnect()
         sys.exit(1)
@@ -76,7 +77,7 @@ def main():
     handler = OpenaiRealtimeHandler(deps)
-    stream_manager = None
     if args.gradio:
         stream = Stream(

 import os
 import sys
+from typing import Any
 import gradio as gr
 from fastapi import FastAPI
 from reachy_mini_conversation_demo.audio.head_wobbler import HeadWobbler
+def update_chatbot(chatbot: list[dict[str, Any]], response: dict[str, Any]) -> list[dict[str, Any]]:
     """Update the chatbot with AdditionalOutputs."""
     chatbot.append(response)
     return chatbot
+def main() -> None:
     """Entrypoint for the Reachy Mini conversation demo."""
     args = parse_args()
     # Check if running in simulation mode without --gradio
     if robot.client.get_status()["simulation_enabled"] and not args.gradio:
         logger.error(
+            "Simulation mode requires Gradio interface. Please use --gradio flag when running in simulation mode.",
         )
         robot.client.disconnect()
         sys.exit(1)
     handler = OpenaiRealtimeHandler(deps)
+    stream_manager: gr.Blocks | LocalStream | None = None
     if args.gradio:
         stream = Stream(

src/reachy_mini_conversation_demo/moves.py CHANGED Viewed

@@ -36,11 +36,12 @@ import time
 import logging
 import threading
 from queue import Empty, Queue
-from typing import Any, Tuple, Optional
 from collections import deque
 from dataclasses import dataclass
 import numpy as np
 from reachy_mini import ReachyMini
 from reachy_mini.utils import create_head_pose
@@ -57,16 +58,16 @@ logger = logging.getLogger(__name__)
 CONTROL_LOOP_FREQUENCY_HZ = 100.0  # Hz - Target frequency for the movement control loop
 # Type definitions
-FullBodyPose = Tuple[np.ndarray, Tuple[float, float], float]  # (head_pose_4x4, antennas, body_yaw)
-class BreathingMove(Move):
     """Breathing move with interpolation to neutral and then continuous breathing patterns."""
     def __init__(
         self,
-        interpolation_start_pose: np.ndarray,
-        interpolation_start_antennas: Tuple[float, float],
         interpolation_duration: float = 1.0,
     ):
         """Initialize breathing move.
@@ -96,7 +97,7 @@ class BreathingMove(Move):
         """Duration property required by official Move interface."""
         return float("inf")  # Continuous breathing (never ends naturally)
-    def evaluate(self, t: float) -> tuple[np.ndarray | None, np.ndarray | None, float | None]:
         """Evaluate breathing move at time t."""
         if t < self.interpolation_duration:
             # Phase 1: Interpolate to neutral base position
@@ -104,7 +105,7 @@ class BreathingMove(Move):
             # Interpolate head pose
             head_pose = linear_pose_interpolation(
-                self.interpolation_start_pose, self.neutral_head_pose, interpolation_t
             )
             # Interpolate antennas
@@ -168,12 +169,12 @@ class MovementState:
     """State tracking for the movement system."""
     # Primary move state
-    current_move: Optional[Move] = None
-    move_start_time: Optional[float] = None
     last_activity_time: float = 0.0
     # Secondary move state (offsets)
-    speech_offsets: Tuple[float, float, float, float, float, float] = (
         0.0,
         0.0,
         0.0,
@@ -181,7 +182,7 @@ class MovementState:
         0.0,
         0.0,
     )
-    face_tracking_offsets: Tuple[float, float, float, float, float, float] = (
         0.0,
         0.0,
         0.0,
@@ -191,7 +192,7 @@ class MovementState:
     )
     # Status flags
-    last_primary_pose: Optional[FullBodyPose] = None
     def update_activity(self) -> None:
         """Update the last activity time."""
@@ -242,7 +243,7 @@ class MovementManager:
     def __init__(
         self,
         current_robot: ReachyMini,
-        camera_worker=None,
     ):
         """Initialize movement manager."""
         self.current_robot = current_robot
@@ -258,7 +259,7 @@ class MovementManager:
         self.state.last_primary_pose = (neutral_pose, (0.0, 0.0), 0.0)
         # Move queue (primary moves)
-        self.move_queue = deque()
         # Configuration
         self.idle_inactivity_delay = 0.3  # seconds
@@ -266,10 +267,10 @@ class MovementManager:
         self.target_period = 1.0 / self.target_frequency
         self._stop_event = threading.Event()
-        self._thread: Optional[threading.Thread] = None
         self._is_listening = False
         self._last_commanded_pose: FullBodyPose = clone_full_body_pose(self.state.last_primary_pose)
-        self._listening_antennas: Tuple[float, float] = self._last_commanded_pose[1]
         self._antenna_unfreeze_blend = 1.0
         self._antenna_blend_duration = 0.4  # seconds to blend back after listening
         self._last_listening_blend_time = self._now()
@@ -283,7 +284,7 @@ class MovementManager:
         # Cross-thread signalling
         self._command_queue: Queue[tuple[str, Any]] = Queue()
         self._speech_offsets_lock = threading.Lock()
-        self._pending_speech_offsets: Tuple[float, float, float, float, float, float] = (
             0.0,
             0.0,
             0.0,
@@ -294,7 +295,7 @@ class MovementManager:
         self._speech_offsets_dirty = False
         self._face_offsets_lock = threading.Lock()
-        self._pending_face_offsets: Tuple[float, float, float, float, float, float] = (
             0.0,
             0.0,
             0.0,
@@ -326,7 +327,7 @@ class MovementManager:
         """
         self._command_queue.put(("clear_queue", None))
-    def set_speech_offsets(self, offsets: Tuple[float, float, float, float, float, float]) -> None:
         """Update speech-induced secondary offsets (x, y, z, roll, pitch, yaw).
         Offsets are interpreted as metres for translation and radians for
@@ -383,7 +384,7 @@ class MovementManager:
     def _apply_pending_offsets(self) -> None:
         """Apply the most recent speech/face offset updates."""
-        speech_offsets: Optional[Tuple[float, float, float, float, float, float]] = None
         with self._speech_offsets_lock:
             if self._speech_offsets_dirty:
                 speech_offsets = self._pending_speech_offsets
@@ -393,7 +394,7 @@ class MovementManager:
             self.state.speech_offsets = speech_offsets
             self.state.update_activity()
-        face_offsets: Optional[Tuple[float, float, float, float, float, float]] = None
         with self._face_offsets_lock:
             if self._face_offsets_dirty:
                 face_offsets = self._pending_face_offsets
@@ -549,14 +550,13 @@ class MovementManager:
             )
             self.state.last_primary_pose = clone_full_body_pose(primary_full_body_pose)
         else:
-            # Otherwise reuse the last primary pose so we avoid jumps between moves
-            if self.state.last_primary_pose is not None:
-                primary_full_body_pose = clone_full_body_pose(self.state.last_primary_pose)
-            else:
-                neutral_head_pose = create_head_pose(0, 0, 0, 0, 0, 0, degrees=True)
-                primary_full_body_pose = (neutral_head_pose, (0.0, 0.0), 0.0)
-                self.state.last_primary_pose = clone_full_body_pose(primary_full_body_pose)
         return primary_full_body_pose
@@ -595,7 +595,7 @@ class MovementManager:
         self._manage_move_queue(current_time)
         self._manage_breathing(current_time)
-    def _calculate_blended_antennas(self, target_antennas: Tuple[float, float]) -> Tuple[float, float]:
         """Blend target antennas with listening freeze state and update blending."""
         now = self._now()
         listening = self._is_listening
@@ -631,7 +631,7 @@ class MovementManager:
         return antennas_cmd
-    def _issue_control_command(self, head: np.ndarray, antennas: Tuple[float, float], body_yaw: float) -> None:
         """Send the fused pose to the robot with throttled error logging."""
         try:
             self.current_robot.set_target(head=head, antennas=antennas, body_yaw=body_yaw)
@@ -651,7 +651,7 @@ class MovementManager:
                 self._last_commanded_pose = clone_full_body_pose((head, antennas, body_yaw))
     def _update_frequency_stats(
-        self, loop_start: float, prev_loop_start: float, stats: LoopFrequencyStats
     ) -> LoopFrequencyStats:
         """Update frequency statistics based on the current loop start time."""
         period = loop_start - prev_loop_start

 import logging
 import threading
 from queue import Empty, Queue
+from typing import Any
 from collections import deque
 from dataclasses import dataclass
 import numpy as np
+from numpy.typing import NDArray
 from reachy_mini import ReachyMini
 from reachy_mini.utils import create_head_pose
 CONTROL_LOOP_FREQUENCY_HZ = 100.0  # Hz - Target frequency for the movement control loop
 # Type definitions
+FullBodyPose = tuple[NDArray[np.floating[Any]], tuple[float, float], float]  # (head_pose_4x4, antennas, body_yaw)
+class BreathingMove(Move):  # type: ignore[misc]
     """Breathing move with interpolation to neutral and then continuous breathing patterns."""
     def __init__(
         self,
+        interpolation_start_pose: NDArray[np.floating[Any]],
+        interpolation_start_antennas: tuple[float, float],
         interpolation_duration: float = 1.0,
     ):
         """Initialize breathing move.
         """Duration property required by official Move interface."""
         return float("inf")  # Continuous breathing (never ends naturally)
+    def evaluate(self, t: float) -> tuple[NDArray[np.floating[Any]] | None, NDArray[np.floating[Any]] | None, float | None]:
         """Evaluate breathing move at time t."""
         if t < self.interpolation_duration:
             # Phase 1: Interpolate to neutral base position
             # Interpolate head pose
             head_pose = linear_pose_interpolation(
+                self.interpolation_start_pose, self.neutral_head_pose, interpolation_t,
             )
             # Interpolate antennas
     """State tracking for the movement system."""
     # Primary move state
+    current_move: Move | None = None
+    move_start_time: float | None = None
     last_activity_time: float = 0.0
     # Secondary move state (offsets)
+    speech_offsets: tuple[float, float, float, float, float, float] = (
         0.0,
         0.0,
         0.0,
         0.0,
         0.0,
     )
+    face_tracking_offsets: tuple[float, float, float, float, float, float] = (
         0.0,
         0.0,
         0.0,
     )
     # Status flags
+    last_primary_pose: FullBodyPose | None = None
     def update_activity(self) -> None:
         """Update the last activity time."""
     def __init__(
         self,
         current_robot: ReachyMini,
+        camera_worker: Any = None,
     ):
         """Initialize movement manager."""
         self.current_robot = current_robot
         self.state.last_primary_pose = (neutral_pose, (0.0, 0.0), 0.0)
         # Move queue (primary moves)
+        self.move_queue: deque[Move] = deque()
         # Configuration
         self.idle_inactivity_delay = 0.3  # seconds
         self.target_period = 1.0 / self.target_frequency
         self._stop_event = threading.Event()
+        self._thread: threading.Thread | None = None
         self._is_listening = False
         self._last_commanded_pose: FullBodyPose = clone_full_body_pose(self.state.last_primary_pose)
+        self._listening_antennas: tuple[float, float] = self._last_commanded_pose[1]
         self._antenna_unfreeze_blend = 1.0
         self._antenna_blend_duration = 0.4  # seconds to blend back after listening
         self._last_listening_blend_time = self._now()
         # Cross-thread signalling
         self._command_queue: Queue[tuple[str, Any]] = Queue()
         self._speech_offsets_lock = threading.Lock()
+        self._pending_speech_offsets: tuple[float, float, float, float, float, float] = (
             0.0,
             0.0,
             0.0,
         self._speech_offsets_dirty = False
         self._face_offsets_lock = threading.Lock()
+        self._pending_face_offsets: tuple[float, float, float, float, float, float] = (
             0.0,
             0.0,
             0.0,
         """
         self._command_queue.put(("clear_queue", None))
+    def set_speech_offsets(self, offsets: tuple[float, float, float, float, float, float]) -> None:
         """Update speech-induced secondary offsets (x, y, z, roll, pitch, yaw).
         Offsets are interpreted as metres for translation and radians for
     def _apply_pending_offsets(self) -> None:
         """Apply the most recent speech/face offset updates."""
+        speech_offsets: tuple[float, float, float, float, float, float] | None = None
         with self._speech_offsets_lock:
             if self._speech_offsets_dirty:
                 speech_offsets = self._pending_speech_offsets
             self.state.speech_offsets = speech_offsets
             self.state.update_activity()
+        face_offsets: tuple[float, float, float, float, float, float] | None = None
         with self._face_offsets_lock:
             if self._face_offsets_dirty:
                 face_offsets = self._pending_face_offsets
             )
             self.state.last_primary_pose = clone_full_body_pose(primary_full_body_pose)
+        # Otherwise reuse the last primary pose so we avoid jumps between moves
+        elif self.state.last_primary_pose is not None:
+            primary_full_body_pose = clone_full_body_pose(self.state.last_primary_pose)
         else:
+            neutral_head_pose = create_head_pose(0, 0, 0, 0, 0, 0, degrees=True)
+            primary_full_body_pose = (neutral_head_pose, (0.0, 0.0), 0.0)
+            self.state.last_primary_pose = clone_full_body_pose(primary_full_body_pose)
         return primary_full_body_pose
         self._manage_move_queue(current_time)
         self._manage_breathing(current_time)
+    def _calculate_blended_antennas(self, target_antennas: tuple[float, float]) -> tuple[float, float]:
         """Blend target antennas with listening freeze state and update blending."""
         now = self._now()
         listening = self._is_listening
         return antennas_cmd
+    def _issue_control_command(self, head: NDArray[np.floating[Any]], antennas: tuple[float, float], body_yaw: float) -> None:
         """Send the fused pose to the robot with throttled error logging."""
         try:
             self.current_robot.set_target(head=head, antennas=antennas, body_yaw=body_yaw)
                 self._last_commanded_pose = clone_full_body_pose((head, antennas, body_yaw))
     def _update_frequency_stats(
+        self, loop_start: float, prev_loop_start: float, stats: LoopFrequencyStats,
     ) -> LoopFrequencyStats:
         """Update frequency statistics based on the current loop start time."""
         period = loop_start - prev_loop_start

src/reachy_mini_conversation_demo/openai_realtime.py CHANGED Viewed

@@ -2,12 +2,14 @@ import json
 import base64
 import asyncio
 import logging
 from datetime import datetime
 import numpy as np
 import gradio as gr
 from openai import AsyncOpenAI
 from fastrtc import AdditionalOutputs, AsyncStreamHandler, wait_for_item
 from reachy_mini_conversation_demo.tools import (
     ALL_TOOL_SPECS,
@@ -33,18 +35,18 @@ class OpenaiRealtimeHandler(AsyncStreamHandler):
         )
         self.deps = deps
-        self.connection = None
-        self.output_queue = asyncio.Queue()
         self.last_activity_time = asyncio.get_event_loop().time()
         self.start_time = asyncio.get_event_loop().time()
         self.is_idle_tool_call = False
-    def copy(self):
         """Create a copy of the handler."""
         return OpenaiRealtimeHandler(self.deps)
-    async def start_up(self):
         """Start the handler."""
         self.client = AsyncOpenAI(api_key=config.OPENAI_API_KEY)
         async with self.client.beta.realtime.connect(model=config.MODEL_NAME) as conn:
@@ -59,10 +61,10 @@ class OpenaiRealtimeHandler(AsyncStreamHandler):
                     },
                     "voice": "ballad",
                     "instructions": SESSION_INSTRUCTIONS,
-                    "tools": ALL_TOOL_SPECS,
                     "tool_choice": "auto",
                     "temperature": 0.7,
-                }
             )
             # Manage event received from the openai server
@@ -70,9 +72,10 @@ class OpenaiRealtimeHandler(AsyncStreamHandler):
             async for event in self.connection:
                 logger.debug(f"OpenAI event: {event.type}")
                 if event.type == "input_audio_buffer.speech_started":
-                    if hasattr(self, '_clear_queue'):
                         self._clear_queue()
-                    self.deps.head_wobbler.reset()
                     self.deps.movement_manager.set_listening(True)
                     logger.debug("User speech started")
@@ -83,7 +86,8 @@ class OpenaiRealtimeHandler(AsyncStreamHandler):
                 if event.type in ("response.audio.completed", "response.completed"):
                     # Doesn't seem to be called
                     logger.debug("response completed")
-                    self.deps.head_wobbler.reset()
                 if event.type == "response.created":
                     logger.debug("Response created")
@@ -91,7 +95,6 @@ class OpenaiRealtimeHandler(AsyncStreamHandler):
                 if event.type == "response.done":
                     # Doesn't mean the audio is done playing
                     logger.debug("Response done")
-                    pass
                 if event.type == "conversation.item.input_audio_transcription.completed":
                     logger.debug(f"User transcript: {event.transcript}")
@@ -102,7 +105,8 @@ class OpenaiRealtimeHandler(AsyncStreamHandler):
                     await self.output_queue.put(AdditionalOutputs({"role": "assistant", "content": event.transcript}))
                 if event.type == "response.audio.delta":
-                    self.deps.head_wobbler.feed(event.delta)
                     self.last_activity_time = asyncio.get_event_loop().time()
                     logger.debug("last activity time updated to %s", self.last_activity_time)
                     await self.output_queue.put(
@@ -118,6 +122,10 @@ class OpenaiRealtimeHandler(AsyncStreamHandler):
                     args_json_str = getattr(event, "arguments", None)
                     call_id = getattr(event, "call_id", None)
                     try:
                         tool_result = await dispatch_tool_call(tool_name, args_json_str, self.deps)
                         logger.debug("Tool '%s' executed successfully", tool_name)
@@ -127,22 +135,23 @@ class OpenaiRealtimeHandler(AsyncStreamHandler):
                         tool_result = {"error": str(e)}
                     # send the tool result back
-                    await self.connection.conversation.item.create(
-                        item={
-                            "type": "function_call_output",
-                            "call_id": call_id,
-                            "output": json.dumps(tool_result),
-                        }
-                    )
                     await self.output_queue.put(
                         AdditionalOutputs(
                             {
                                 "role": "assistant",
                                 "content": json.dumps(tool_result),
-                                "metadata": {"title": "🛠️ Used tool " + tool_name, "status": "done"},
                             },
-                        )
                     )
                     if tool_name == "camera" and "b64_im" in tool_result:
@@ -157,37 +166,39 @@ class OpenaiRealtimeHandler(AsyncStreamHandler):
                                 "role": "user",
                                 "content": [
                                     {
-                                        "type": "input_image",
                                         "image_url": f"data:image/jpeg;base64,{b64_im}",
-                                    }
                                 ],
-                            }
                         )
                         logger.info("Added camera image to conversation")
-                        np_img = self.deps.camera_worker.get_latest_frame()
-                        img = gr.Image(value=np_img)
-                        await self.output_queue.put(
-                            AdditionalOutputs(
-                                {
-                                    "role": "assistant",
-                                    "content": img,
-                                }
                             )
-                        )
                     if not self.is_idle_tool_call:
                         await self.connection.response.create(
                             response={
-                                "instructions": "Use the tool result just returned and answer concisely in speech."
-                            }
                         )
                     else:
                         self.is_idle_tool_call = False
                     # re synchronize the head wobble after a tool call that may have taken some time
-                    self.deps.head_wobbler.reset()
                 # server error
                 if event.type == "error":
@@ -197,7 +208,7 @@ class OpenaiRealtimeHandler(AsyncStreamHandler):
                     await self.output_queue.put(AdditionalOutputs({"role": "assistant", "content": f"[error] {msg}"}))
     # Microphone receive
-    async def receive(self, frame: tuple[int, np.ndarray]) -> None:
         """Receive audio frame from the microphone and send it to the openai server."""
         if not self.connection:
             return
@@ -205,9 +216,9 @@ class OpenaiRealtimeHandler(AsyncStreamHandler):
         array = array.squeeze()
         audio_message = base64.b64encode(array.tobytes()).decode("utf-8")
         # Fills the input audio buffer to be sent to the server
-        await self.connection.input_audio_buffer.append(audio=audio_message)  # type: ignore
-    async def emit(self) -> tuple[int, np.ndarray] | AdditionalOutputs | None:
         """Emit audio frame to be played by the speaker."""
         # sends to the stream the stuff put in the output queue by the openai event handler
         # This is called periodically by the fastrtc Stream
@@ -219,7 +230,7 @@ class OpenaiRealtimeHandler(AsyncStreamHandler):
             self.last_activity_time = asyncio.get_event_loop().time()  # avoid repeated resets
-        return await wait_for_item(self.output_queue)
     async def shutdown(self) -> None:
         """Shutdown the handler."""
@@ -227,7 +238,7 @@ class OpenaiRealtimeHandler(AsyncStreamHandler):
             await self.connection.close()
             self.connection = None
-    def format_timestamp(self):
         """Format current timestamp with date, time and elapsed seconds."""
         current_time = asyncio.get_event_loop().time()
         elapsed_seconds = current_time - self.start_time
@@ -236,7 +247,7 @@ class OpenaiRealtimeHandler(AsyncStreamHandler):
-    async def send_idle_signal(self, idle_duration) -> None:
         """Send an idle signal to the openai server."""
         logger.debug("Sending idle signal")
         self.is_idle_tool_call = True
@@ -249,12 +260,12 @@ class OpenaiRealtimeHandler(AsyncStreamHandler):
                 "type": "message",
                 "role": "user",
                 "content": [{"type": "input_text", "text": timestamp_msg}],
-            }
         )
         await self.connection.response.create(
             response={
                 "modalities": ["text"],
                 "instructions": "You MUST respond with function calls only - no speech or text. Choose appropriate actions for idle behavior.",
                 "tool_choice": "required",
-            }
         )

 import base64
 import asyncio
 import logging
+from typing import Any
 from datetime import datetime
 import numpy as np
 import gradio as gr
 from openai import AsyncOpenAI
 from fastrtc import AdditionalOutputs, AsyncStreamHandler, wait_for_item
+from numpy.typing import NDArray
 from reachy_mini_conversation_demo.tools import (
     ALL_TOOL_SPECS,
         )
         self.deps = deps
+        self.connection: Any | None = None
+        self.output_queue: asyncio.Queue[tuple[int, NDArray[np.int16]] | AdditionalOutputs] = asyncio.Queue()
         self.last_activity_time = asyncio.get_event_loop().time()
         self.start_time = asyncio.get_event_loop().time()
         self.is_idle_tool_call = False
+    def copy(self) -> "OpenaiRealtimeHandler":
         """Create a copy of the handler."""
         return OpenaiRealtimeHandler(self.deps)
+    async def start_up(self) -> None:
         """Start the handler."""
         self.client = AsyncOpenAI(api_key=config.OPENAI_API_KEY)
         async with self.client.beta.realtime.connect(model=config.MODEL_NAME) as conn:
                     },
                     "voice": "ballad",
                     "instructions": SESSION_INSTRUCTIONS,
+                    "tools": ALL_TOOL_SPECS,  # type: ignore[typeddict-item]
                     "tool_choice": "auto",
                     "temperature": 0.7,
+                },
             )
             # Manage event received from the openai server
             async for event in self.connection:
                 logger.debug(f"OpenAI event: {event.type}")
                 if event.type == "input_audio_buffer.speech_started":
+                    if hasattr(self, "_clear_queue") and callable(self._clear_queue):
                         self._clear_queue()
+                    if self.deps.head_wobbler is not None:
+                        self.deps.head_wobbler.reset()
                     self.deps.movement_manager.set_listening(True)
                     logger.debug("User speech started")
                 if event.type in ("response.audio.completed", "response.completed"):
                     # Doesn't seem to be called
                     logger.debug("response completed")
+                    if self.deps.head_wobbler is not None:
+                        self.deps.head_wobbler.reset()
                 if event.type == "response.created":
                     logger.debug("Response created")
                 if event.type == "response.done":
                     # Doesn't mean the audio is done playing
                     logger.debug("Response done")
                 if event.type == "conversation.item.input_audio_transcription.completed":
                     logger.debug(f"User transcript: {event.transcript}")
                     await self.output_queue.put(AdditionalOutputs({"role": "assistant", "content": event.transcript}))
                 if event.type == "response.audio.delta":
+                    if self.deps.head_wobbler is not None:
+                        self.deps.head_wobbler.feed(event.delta)
                     self.last_activity_time = asyncio.get_event_loop().time()
                     logger.debug("last activity time updated to %s", self.last_activity_time)
                     await self.output_queue.put(
                     args_json_str = getattr(event, "arguments", None)
                     call_id = getattr(event, "call_id", None)
+                    if not isinstance(tool_name, str) or not isinstance(args_json_str, str):
+                        logger.error("Invalid tool call: tool_name=%s, args=%s", tool_name, args_json_str)
+                        continue
                     try:
                         tool_result = await dispatch_tool_call(tool_name, args_json_str, self.deps)
                         logger.debug("Tool '%s' executed successfully", tool_name)
                         tool_result = {"error": str(e)}
                     # send the tool result back
+                    if isinstance(call_id, str):
+                        await self.connection.conversation.item.create(
+                            item={
+                                "type": "function_call_output",
+                                "call_id": call_id,
+                                "output": json.dumps(tool_result),
+                            },
+                        )
                     await self.output_queue.put(
                         AdditionalOutputs(
                             {
                                 "role": "assistant",
                                 "content": json.dumps(tool_result),
+                                "metadata": {"title": f"🛠️ Used tool {tool_name}", "status": "done"},
                             },
+                        ),
                     )
                     if tool_name == "camera" and "b64_im" in tool_result:
                                 "role": "user",
                                 "content": [
                                     {
+                                        "type": "input_image",  # type: ignore[typeddict-item]
                                         "image_url": f"data:image/jpeg;base64,{b64_im}",
+                                    },
                                 ],
+                            },
                         )
                         logger.info("Added camera image to conversation")
+                        if self.deps.camera_worker is not None:
+                            np_img = self.deps.camera_worker.get_latest_frame()
+                            img = gr.Image(value=np_img)
+                            await self.output_queue.put(
+                                AdditionalOutputs(
+                                    {
+                                        "role": "assistant",
+                                        "content": img,
+                                    },
+                                ),
                             )
                     if not self.is_idle_tool_call:
                         await self.connection.response.create(
                             response={
+                                "instructions": "Use the tool result just returned and answer concisely in speech.",
+                            },
                         )
                     else:
                         self.is_idle_tool_call = False
                     # re synchronize the head wobble after a tool call that may have taken some time
+                    if self.deps.head_wobbler is not None:
+                        self.deps.head_wobbler.reset()
                 # server error
                 if event.type == "error":
                     await self.output_queue.put(AdditionalOutputs({"role": "assistant", "content": f"[error] {msg}"}))
     # Microphone receive
+    async def receive(self, frame: tuple[int, NDArray[np.int16]]) -> None:
         """Receive audio frame from the microphone and send it to the openai server."""
         if not self.connection:
             return
         array = array.squeeze()
         audio_message = base64.b64encode(array.tobytes()).decode("utf-8")
         # Fills the input audio buffer to be sent to the server
+        await self.connection.input_audio_buffer.append(audio=audio_message)
+    async def emit(self) -> tuple[int, NDArray[np.int16]] | AdditionalOutputs | None:
         """Emit audio frame to be played by the speaker."""
         # sends to the stream the stuff put in the output queue by the openai event handler
         # This is called periodically by the fastrtc Stream
             self.last_activity_time = asyncio.get_event_loop().time()  # avoid repeated resets
+        return await wait_for_item(self.output_queue)  # type: ignore[no-any-return]
     async def shutdown(self) -> None:
         """Shutdown the handler."""
             await self.connection.close()
             self.connection = None
+    def format_timestamp(self) -> str:
         """Format current timestamp with date, time and elapsed seconds."""
         current_time = asyncio.get_event_loop().time()
         elapsed_seconds = current_time - self.start_time
+    async def send_idle_signal(self, idle_duration: float) -> None:
         """Send an idle signal to the openai server."""
         logger.debug("Sending idle signal")
         self.is_idle_tool_call = True
                 "type": "message",
                 "role": "user",
                 "content": [{"type": "input_text", "text": timestamp_msg}],
+            },
         )
         await self.connection.response.create(
             response={
                 "modalities": ["text"],
                 "instructions": "You MUST respond with function calls only - no speech or text. Choose appropriate actions for idle behavior.",
                 "tool_choice": "required",
+            },
         )

src/reachy_mini_conversation_demo/tools.py CHANGED Viewed

@@ -4,7 +4,7 @@ import json
 import asyncio
 import inspect
 import logging
-from typing import Any, Dict, Literal, Optional
 from dataclasses import dataclass
 from reachy_mini import ReachyMini
@@ -36,9 +36,9 @@ except ImportError as e:
     EMOTION_AVAILABLE = False
-def get_concrete_subclasses(base):
     """Recursively find all concrete (non-abstract) subclasses of a base class."""
-    result = []
     for cls in base.__subclasses__():
         if not inspect.isabstract(cls):
             result.append(cls)
@@ -58,9 +58,9 @@ class ToolDependencies:
     reachy_mini: ReachyMini
     movement_manager: Any  # MovementManager from moves.py
     # Optional deps
-    camera_worker: Optional[Any] = None  # CameraWorker for frame buffering
-    vision_manager: Optional[Any] = None
-    head_wobbler: Optional[Any] = None  # HeadWobbler for audio-reactive motion
     motion_duration_s: float = 1.0
@@ -76,9 +76,9 @@ class Tool(abc.ABC):
     name: str
     description: str
-    parameters_schema: Dict[str, Any]
-    def spec(self) -> Dict[str, Any]:
         """Return the function spec for LLM consumption."""
         return {
             "type": "function",
@@ -88,7 +88,7 @@ class Tool(abc.ABC):
         }
     @abc.abstractmethod
-    async def __call__(self, deps: ToolDependencies, **kwargs) -> Dict[str, Any]:
         """Async tool execution entrypoint."""
         raise NotImplementedError
@@ -121,9 +121,12 @@ class MoveHead(Tool):
         "front": (0, 0, 0, 0, 0, 0),
     }
-    async def __call__(self, deps: ToolDependencies, **kwargs) -> Dict[str, Any]:
         """Move head in a given direction."""
-        direction: Direction = kwargs.get("direction")
         logger.info("Tool call: move_head direction=%s", direction)
         deltas = self.DELTAS.get(direction, self.DELTAS["front"])
@@ -177,7 +180,7 @@ class Camera(Tool):
         "required": ["question"],
     }
-    async def __call__(self, deps: ToolDependencies, **kwargs) -> Dict[str, Any]:
         """Take a picture with the camera and ask a question about it."""
         image_query = (kwargs.get("question") or "").strip()
         if not image_query:
@@ -199,7 +202,7 @@ class Camera(Tool):
         # Use vision manager for processing if available
         if deps.vision_manager is not None:
             vision_result = await asyncio.to_thread(
-                deps.vision_manager.processor.process_image, frame, image_query
             )
             if isinstance(vision_result, dict) and "error" in vision_result:
                 return vision_result
@@ -208,17 +211,16 @@ class Camera(Tool):
                 if isinstance(vision_result, str)
                 else {"error": "vision returned non-string"}
             )
-        else:
-            # Return base64 encoded image like main_works.py camera tool
-            import base64
-            import cv2
-            temp_path = "/tmp/camera_frame.jpg"
-            cv2.imwrite(temp_path, frame)
-            with open(temp_path, "rb") as f:
-                b64_encoded = base64.b64encode(f.read()).decode("utf-8")
-            return {"b64_im": b64_encoded}
 class HeadTracking(Tool):
@@ -232,7 +234,7 @@ class HeadTracking(Tool):
         "required": ["start"],
     }
-    async def __call__(self, deps: ToolDependencies, **kwargs) -> Dict[str, Any]:
         """Enable or disable head tracking."""
         enable = bool(kwargs.get("start"))
@@ -288,12 +290,12 @@ class Dance(Tool):
         "required": [],
     }
-    async def __call__(self, deps: ToolDependencies, **kwargs) -> Dict[str, Any]:
         """Play a named or random dance move once (or repeat). Non-blocking."""
         if not DANCE_AVAILABLE:
             return {"error": "Dance system not available"}
-        move_name = kwargs.get("move", None)
         repeat = int(kwargs.get("repeat", 1))
         logger.info("Tool call: dance move=%s repeat=%d", move_name, repeat)
@@ -326,12 +328,12 @@ class StopDance(Tool):
             "dummy": {
                 "type": "boolean",
                 "description": "dummy boolean, set it to true",
-            }
         },
         "required": ["dummy"],
     }
-    async def __call__(self, deps: ToolDependencies, **kwargs) -> Dict[str, Any]:
         """Stop the current dance move."""
         logger.info("Tool call: stop_dance")
         movement_manager = deps.movement_manager
@@ -373,7 +375,7 @@ class PlayEmotion(Tool):
         "required": ["emotion"],
     }
-    async def __call__(self, deps: ToolDependencies, **kwargs) -> Dict[str, Any]:
         """Play a pre-recorded emotion."""
         if not EMOTION_AVAILABLE:
             return {"error": "Emotion system not available"}
@@ -399,7 +401,7 @@ class PlayEmotion(Tool):
         except Exception as e:
             logger.exception("Failed to play emotion")
-            return {"error": f"Failed to play emotion: {str(e)}"}
 class StopEmotion(Tool):
@@ -413,12 +415,12 @@ class StopEmotion(Tool):
             "dummy": {
                 "type": "boolean",
                 "description": "dummy boolean, set it to true",
-            }
         },
         "required": ["dummy"],
     }
-    async def __call__(self, deps: ToolDependencies, **kwargs) -> Dict[str, Any]:
         """Stop the current emotion."""
         logger.info("Tool call: stop_emotion")
         movement_manager = deps.movement_manager
@@ -442,7 +444,7 @@ class DoNothing(Tool):
         "required": [],
     }
-    async def __call__(self, deps: ToolDependencies, **kwargs) -> Dict[str, Any]:
         """Do nothing - stay still and silent."""
         reason = kwargs.get("reason", "just chilling")
         logger.info("Tool call: do_nothing reason=%s", reason)
@@ -452,7 +454,7 @@ class DoNothing(Tool):
 # Registry & specs (dynamic)
 # List of available tool classes
-ALL_TOOLS: Dict[str, Tool] = {cls.name: cls() for cls in get_concrete_subclasses(Tool)}
 ALL_TOOL_SPECS = [tool.spec() for tool in ALL_TOOLS.values()]
@@ -466,7 +468,7 @@ def _safe_load_obj(args_json: str) -> dict[str, Any]:
         return {}
-async def dispatch_tool_call(tool_name: str, args_json: str, deps: ToolDependencies) -> Dict[str, Any]:
     """Dispatch a tool call by name with JSON args and dependencies."""
     tool = ALL_TOOLS.get(tool_name)

 import asyncio
 import inspect
 import logging
+from typing import Any, Literal
 from dataclasses import dataclass
 from reachy_mini import ReachyMini
     EMOTION_AVAILABLE = False
+def get_concrete_subclasses(base: type[Tool]) -> list[type[Tool]]:
     """Recursively find all concrete (non-abstract) subclasses of a base class."""
+    result: list[type[Tool]] = []
     for cls in base.__subclasses__():
         if not inspect.isabstract(cls):
             result.append(cls)
     reachy_mini: ReachyMini
     movement_manager: Any  # MovementManager from moves.py
     # Optional deps
+    camera_worker: Any | None = None  # CameraWorker for frame buffering
+    vision_manager: Any | None = None
+    head_wobbler: Any | None = None  # HeadWobbler for audio-reactive motion
     motion_duration_s: float = 1.0
     name: str
     description: str
+    parameters_schema: dict[str, Any]
+    def spec(self) -> dict[str, Any]:
         """Return the function spec for LLM consumption."""
         return {
             "type": "function",
         }
     @abc.abstractmethod
+    async def __call__(self, deps: ToolDependencies, **kwargs: Any) -> dict[str, Any]:
         """Async tool execution entrypoint."""
         raise NotImplementedError
         "front": (0, 0, 0, 0, 0, 0),
     }
+    async def __call__(self, deps: ToolDependencies, **kwargs: Any) -> dict[str, Any]:
         """Move head in a given direction."""
+        direction_raw = kwargs.get("direction")
+        if not isinstance(direction_raw, str):
+            return {"error": "direction must be a string"}
+        direction: Direction = direction_raw  # type: ignore[assignment]
         logger.info("Tool call: move_head direction=%s", direction)
         deltas = self.DELTAS.get(direction, self.DELTAS["front"])
         "required": ["question"],
     }
+    async def __call__(self, deps: ToolDependencies, **kwargs: Any) -> dict[str, Any]:
         """Take a picture with the camera and ask a question about it."""
         image_query = (kwargs.get("question") or "").strip()
         if not image_query:
         # Use vision manager for processing if available
         if deps.vision_manager is not None:
             vision_result = await asyncio.to_thread(
+                deps.vision_manager.processor.process_image, frame, image_query,
             )
             if isinstance(vision_result, dict) and "error" in vision_result:
                 return vision_result
                 if isinstance(vision_result, str)
                 else {"error": "vision returned non-string"}
             )
+        # Return base64 encoded image like main_works.py camera tool
+        import base64
+        import cv2
+        temp_path = "/tmp/camera_frame.jpg"
+        cv2.imwrite(temp_path, frame)
+        with open(temp_path, "rb") as f:
+            b64_encoded = base64.b64encode(f.read()).decode("utf-8")
+        return {"b64_im": b64_encoded}
 class HeadTracking(Tool):
         "required": ["start"],
     }
+    async def __call__(self, deps: ToolDependencies, **kwargs: Any) -> dict[str, Any]:
         """Enable or disable head tracking."""
         enable = bool(kwargs.get("start"))
         "required": [],
     }
+    async def __call__(self, deps: ToolDependencies, **kwargs: Any) -> dict[str, Any]:
         """Play a named or random dance move once (or repeat). Non-blocking."""
         if not DANCE_AVAILABLE:
             return {"error": "Dance system not available"}
+        move_name = kwargs.get("move")
         repeat = int(kwargs.get("repeat", 1))
         logger.info("Tool call: dance move=%s repeat=%d", move_name, repeat)
             "dummy": {
                 "type": "boolean",
                 "description": "dummy boolean, set it to true",
+            },
         },
         "required": ["dummy"],
     }
+    async def __call__(self, deps: ToolDependencies, **kwargs: Any) -> dict[str, Any]:
         """Stop the current dance move."""
         logger.info("Tool call: stop_dance")
         movement_manager = deps.movement_manager
         "required": ["emotion"],
     }
+    async def __call__(self, deps: ToolDependencies, **kwargs: Any) -> dict[str, Any]:
         """Play a pre-recorded emotion."""
         if not EMOTION_AVAILABLE:
             return {"error": "Emotion system not available"}
         except Exception as e:
             logger.exception("Failed to play emotion")
+            return {"error": f"Failed to play emotion: {e!s}"}
 class StopEmotion(Tool):
             "dummy": {
                 "type": "boolean",
                 "description": "dummy boolean, set it to true",
+            },
         },
         "required": ["dummy"],
     }
+    async def __call__(self, deps: ToolDependencies, **kwargs: Any) -> dict[str, Any]:
         """Stop the current emotion."""
         logger.info("Tool call: stop_emotion")
         movement_manager = deps.movement_manager
         "required": [],
     }
+    async def __call__(self, deps: ToolDependencies, **kwargs: Any) -> dict[str, Any]:
         """Do nothing - stay still and silent."""
         reason = kwargs.get("reason", "just chilling")
         logger.info("Tool call: do_nothing reason=%s", reason)
 # Registry & specs (dynamic)
 # List of available tool classes
+ALL_TOOLS: dict[str, Tool] = {cls.name: cls() for cls in get_concrete_subclasses(Tool)}  # type: ignore[type-abstract]
 ALL_TOOL_SPECS = [tool.spec() for tool in ALL_TOOLS.values()]
         return {}
+async def dispatch_tool_call(tool_name: str, args_json: str, deps: ToolDependencies) -> dict[str, Any]:
     """Dispatch a tool call by name with JSON args and dependencies."""
     tool = ALL_TOOLS.get(tool_name)

src/reachy_mini_conversation_demo/utils.py CHANGED Viewed

@@ -1,11 +1,13 @@
 import logging
 import argparse
 import warnings
 from reachy_mini_conversation_demo.camera_worker import CameraWorker
-def parse_args():
     """Parse command line arguments."""
     parser = argparse.ArgumentParser("Reachy Mini Conversation Demo")
     parser.add_argument(
@@ -26,7 +28,7 @@ def parse_args():
     return parser.parse_args()
-def handle_vision_stuff(args, current_robot):
     """Initialize camera, head tracker, camera worker, and vision manager.
     By default, vision is handled by gpt-realtime model when camera tool is used.
@@ -44,7 +46,7 @@ def handle_vision_stuff(args, current_robot):
                 head_tracker = HeadTracker()
             elif args.head_tracker == "mediapipe":
-                from reachy_mini_toolbox.vision import HeadTracker
                 head_tracker = HeadTracker()
@@ -59,17 +61,17 @@ def handle_vision_stuff(args, current_robot):
                 vision_manager = initialize_vision_manager(camera_worker)
             except ImportError as e:
                 raise ImportError(
-                    "To use --local-vision, please install the extra dependencies: pip install '.[local_vision]'"
                 ) from e
         else:
             logging.getLogger(__name__).info(
-                "Using gpt-realtime for vision (default). Use --local-vision for local processing."
             )
     return camera_worker, head_tracker, vision_manager
-def setup_logger(debug):
     """Setups the logger."""
     log_level = "DEBUG" if debug else "INFO"
     logging.basicConfig(

 import logging
 import argparse
 import warnings
+from typing import Any
+from reachy_mini import ReachyMini
 from reachy_mini_conversation_demo.camera_worker import CameraWorker
+def parse_args() -> argparse.Namespace:
     """Parse command line arguments."""
     parser = argparse.ArgumentParser("Reachy Mini Conversation Demo")
     parser.add_argument(
     return parser.parse_args()
+def handle_vision_stuff(args: argparse.Namespace, current_robot: ReachyMini) -> tuple[CameraWorker | None, Any, Any]:
     """Initialize camera, head tracker, camera worker, and vision manager.
     By default, vision is handled by gpt-realtime model when camera tool is used.
                 head_tracker = HeadTracker()
             elif args.head_tracker == "mediapipe":
+                from reachy_mini_toolbox.vision import HeadTracker  # type: ignore[no-redef]
                 head_tracker = HeadTracker()
                 vision_manager = initialize_vision_manager(camera_worker)
             except ImportError as e:
                 raise ImportError(
+                    "To use --local-vision, please install the extra dependencies: pip install '.[local_vision]'",
                 ) from e
         else:
             logging.getLogger(__name__).info(
+                "Using gpt-realtime for vision (default). Use --local-vision for local processing.",
             )
     return camera_worker, head_tracker, vision_manager
+def setup_logger(debug: bool) -> logging.Logger:
     """Setups the logger."""
     log_level = "DEBUG" if debug else "INFO"
     logging.basicConfig(

src/reachy_mini_conversation_demo/vision/processors.py CHANGED Viewed

@@ -3,7 +3,7 @@ import time
 import base64
 import logging
 import threading
-from typing import Any, Dict, Optional
 from dataclasses import dataclass
 import cv2
@@ -34,7 +34,7 @@ class VisionConfig:
 class VisionProcessor:
     """Handles SmolVLM2 model loading and inference."""
-    def __init__(self, vision_config: VisionConfig = None):
         """Initialize the vision processor."""
         self.vision_config = vision_config or VisionConfig()
         self.model_path = self.vision_config.model_path
@@ -60,7 +60,7 @@ class VisionProcessor:
         """Load model and processor onto the selected device."""
         try:
             logger.info(f"Loading SmolVLM2 model on {self.device} (HF_HOME={config.HF_HOME})")
-            self.processor = AutoProcessor.from_pretrained(self.model_path)
             # Select dtype depending on device
             if self.device == "cuda":
@@ -74,12 +74,13 @@ class VisionProcessor:
             # flash_attention_2 is CUDA-only; skip on MPS/CPU
             if self.device == "cuda":
-                model_kwargs["_attn_implementation"] = "flash_attention_2"
             # Load model weights
-            self.model = AutoModelForImageTextToText.from_pretrained(self.model_path, **model_kwargs).to(self.device)
-            self.model.eval()
             self._initialized = True
             return True
@@ -89,11 +90,11 @@ class VisionProcessor:
     def process_image(
         self,
-        cv2_image: np.ndarray,
         prompt: str = "Briefly describe what you see in one sentence.",
     ) -> str:
         """Process CV2 image and return description with retry logic."""
-        if not self._initialized:
             return "Vision model not initialized"
         for attempt in range(self.vision_config.max_retries):
@@ -189,7 +190,7 @@ class VisionProcessor:
         # Fallback: return the full text cleaned up
         return full_text.strip()
-    def get_model_info(self) -> Dict[str, Any]:
         """Get information about the loaded model."""
         return {
             "initialized": self._initialized,
@@ -205,16 +206,16 @@ class VisionProcessor:
 class VisionManager:
     """Manages periodic vision processing and scene understanding."""
-    def __init__(self, camera, vision_config: VisionConfig = None):
         """Initialize vision manager with camera and configuration."""
         self.camera = camera
         self.vision_config = vision_config or VisionConfig()
         self.vision_interval = self.vision_config.vision_interval
         self.processor = VisionProcessor(self.vision_config)
-        self._last_processed_time = 0
         self._stop_event = threading.Event()
-        self._thread: Optional[threading.Thread] = None
         # Initialize processor
         if not self.processor.initialize():
@@ -245,7 +246,7 @@ class VisionManager:
                     frame = self.camera.get_latest_frame()
                     if frame is not None:
                         description = self.processor.process_image(
-                            frame, "Briefly describe what you see in one sentence."
                         )
                         # Only update if we got a valid response
@@ -263,7 +264,7 @@ class VisionManager:
         logger.info("Vision loop finished")
-    def get_status(self) -> Dict[str, Any]:
         """Get comprehensive status information."""
         return {
             "last_processed": self._last_processed_time,
@@ -274,7 +275,7 @@ class VisionManager:
         }
-def initialize_vision_manager(camera_worker) -> Optional[VisionManager]:
     """Initialize vision manager with model download and configuration.
     Args:
@@ -318,7 +319,7 @@ def initialize_vision_manager(camera_worker) -> Optional[VisionManager]:
         # Log device info
         device_info = vision_manager.processor.get_model_info()
         logger.info(
-            f"Vision processing enabled: {device_info.get('model_path')} on {device_info.get('device')}"
         )
         return vision_manager

 import base64
 import logging
 import threading
+from typing import Any
 from dataclasses import dataclass
 import cv2
 class VisionProcessor:
     """Handles SmolVLM2 model loading and inference."""
+    def __init__(self, vision_config: VisionConfig | None = None):
         """Initialize the vision processor."""
         self.vision_config = vision_config or VisionConfig()
         self.model_path = self.vision_config.model_path
         """Load model and processor onto the selected device."""
         try:
             logger.info(f"Loading SmolVLM2 model on {self.device} (HF_HOME={config.HF_HOME})")
+            self.processor = AutoProcessor.from_pretrained(self.model_path)  # type: ignore[no-untyped-call]
             # Select dtype depending on device
             if self.device == "cuda":
             # flash_attention_2 is CUDA-only; skip on MPS/CPU
             if self.device == "cuda":
+                model_kwargs["_attn_implementation"] = "flash_attention_2"  # type: ignore[assignment]
             # Load model weights
+            self.model = AutoModelForImageTextToText.from_pretrained(self.model_path, **model_kwargs).to(self.device)  # type: ignore[arg-type]
+            if self.model is not None:
+                self.model.eval()
             self._initialized = True
             return True
     def process_image(
         self,
+        cv2_image: np.ndarray[Any, Any],
         prompt: str = "Briefly describe what you see in one sentence.",
     ) -> str:
         """Process CV2 image and return description with retry logic."""
+        if not self._initialized or self.processor is None or self.model is None:
             return "Vision model not initialized"
         for attempt in range(self.vision_config.max_retries):
         # Fallback: return the full text cleaned up
         return full_text.strip()
+    def get_model_info(self) -> dict[str, Any]:
         """Get information about the loaded model."""
         return {
             "initialized": self._initialized,
 class VisionManager:
     """Manages periodic vision processing and scene understanding."""
+    def __init__(self, camera: Any, vision_config: VisionConfig | None = None):
         """Initialize vision manager with camera and configuration."""
         self.camera = camera
         self.vision_config = vision_config or VisionConfig()
         self.vision_interval = self.vision_config.vision_interval
         self.processor = VisionProcessor(self.vision_config)
+        self._last_processed_time = 0.0
         self._stop_event = threading.Event()
+        self._thread: threading.Thread | None = None
         # Initialize processor
         if not self.processor.initialize():
                     frame = self.camera.get_latest_frame()
                     if frame is not None:
                         description = self.processor.process_image(
+                            frame, "Briefly describe what you see in one sentence.",
                         )
                         # Only update if we got a valid response
         logger.info("Vision loop finished")
+    def get_status(self) -> dict[str, Any]:
         """Get comprehensive status information."""
         return {
             "last_processed": self._last_processed_time,
         }
+def initialize_vision_manager(camera_worker: Any) -> VisionManager | None:
     """Initialize vision manager with model download and configuration.
     Args:
         # Log device info
         device_info = vision_manager.processor.get_model_info()
         logger.info(
+            f"Vision processing enabled: {device_info.get('model_path')} on {device_info.get('device')}",
         )
         return vision_manager

src/reachy_mini_conversation_demo/vision/yolo_head_tracker.py CHANGED Viewed

@@ -1,16 +1,16 @@
 from __future__ import annotations
 import logging
-from typing import Tuple, Optional
 import numpy as np
 try:
     from supervision import Detections
-    from ultralytics import YOLO
 except ImportError as e:
     raise ImportError(
-        "To use YOLO head tracker, please install the extra dependencies: pip install '.[yolo_vision]'"
     ) from e
 from huggingface_hub import hf_hub_download
@@ -48,7 +48,7 @@ class HeadTracker:
             logger.error(f"Failed to load YOLO model: {e}")
             raise
-    def _select_best_face(self, detections: Detections) -> Optional[int]:
         """Select the best face based on confidence and area (largest face with highest confidence).
         Args:
@@ -61,6 +61,10 @@ class HeadTracker:
         if detections.xyxy.shape[0] == 0:
             return None
         # Filter by confidence threshold
         valid_mask = detections.confidence >= self.confidence_threshold
         if not np.any(valid_mask):
@@ -78,9 +82,9 @@ class HeadTracker:
         # Return index of best face
         best_idx = valid_indices[np.argmax(scores)]
-        return best_idx
-    def _bbox_to_mp_coords(self, bbox: np.ndarray, w: int, h: int) -> np.ndarray:
         """Convert bounding box center to MediaPipe-style coordinates [-1, 1].
         Args:
@@ -101,7 +105,7 @@ class HeadTracker:
         return np.array([norm_x, norm_y], dtype=np.float32)
-    def get_head_position(self, img: np.ndarray) -> Tuple[Optional[np.ndarray], Optional[float]]:
         """Get head position from face detection.
         Args:
@@ -125,9 +129,10 @@ class HeadTracker:
                 return None, None
             bbox = detections.xyxy[face_idx]
-            confidence = detections.confidence[face_idx]
-            logger.debug(f"Face detected with confidence: {confidence:.2f}")
             # Get face center in [-1, 1] coordinates
             face_center = self._bbox_to_mp_coords(bbox, w, h)

 from __future__ import annotations
 import logging
+from typing import Any
 import numpy as np
 try:
     from supervision import Detections
+    from ultralytics import YOLO  # type: ignore[attr-defined]
 except ImportError as e:
     raise ImportError(
+        "To use YOLO head tracker, please install the extra dependencies: pip install '.[yolo_vision]'",
     ) from e
 from huggingface_hub import hf_hub_download
             logger.error(f"Failed to load YOLO model: {e}")
             raise
+    def _select_best_face(self, detections: Detections) -> int | None:
         """Select the best face based on confidence and area (largest face with highest confidence).
         Args:
         if detections.xyxy.shape[0] == 0:
             return None
+        # Check if confidence is available
+        if detections.confidence is None:
+            return None
         # Filter by confidence threshold
         valid_mask = detections.confidence >= self.confidence_threshold
         if not np.any(valid_mask):
         # Return index of best face
         best_idx = valid_indices[np.argmax(scores)]
+        return int(best_idx)
+    def _bbox_to_mp_coords(self, bbox: np.ndarray[Any, Any], w: int, h: int) -> np.ndarray[Any, Any]:
         """Convert bounding box center to MediaPipe-style coordinates [-1, 1].
         Args:
         return np.array([norm_x, norm_y], dtype=np.float32)
+    def get_head_position(self, img: np.ndarray[Any, Any]) -> tuple[np.ndarray[Any, Any] | None, float | None]:
         """Get head position from face detection.
         Args:
                 return None, None
             bbox = detections.xyxy[face_idx]
+            if detections.confidence is not None:
+                confidence = detections.confidence[face_idx]
+                logger.debug(f"Face detected with confidence: {confidence:.2f}")
             # Get face center in [-1, 1] coordinates
             face_center = self._bbox_to_mp_coords(bbox, w, h)

tests/audio/test_head_wobbler.py CHANGED Viewed

@@ -4,7 +4,8 @@ import math
 import time
 import base64
 import threading
-from typing import List, Tuple, Callable
 import numpy as np
@@ -31,10 +32,10 @@ def _wait_for(predicate: Callable[[], bool], timeout: float = 0.6) -> bool:
     return False
-def _start_wobbler() -> Tuple[HeadWobbler, List[Tuple[float, Tuple[float, float, float, float, float, float]]]]:
-    captured: List[Tuple[float, Tuple[float, float, float, float, float, float]]] = []
-    def capture(offsets: Tuple[float, float, float, float, float, float]) -> None:
         captured.append((time.time(), offsets))
     wobbler = HeadWobbler(set_speech_offsets=capture)
@@ -74,7 +75,7 @@ def test_reset_allows_future_offsets() -> None:
         wobbler.stop()
-def test_reset_during_inflight_chunk_keeps_worker(monkeypatch) -> None:
     """Simulate reset during chunk processing to ensure the worker survives."""
     wobbler, captured = _start_wobbler()
     ready = threading.Event()

 import time
 import base64
 import threading
+from typing import Any
+from collections.abc import Callable
 import numpy as np
     return False
+def _start_wobbler() -> tuple[HeadWobbler, list[tuple[float, tuple[float, float, float, float, float, float]]]]:
+    captured: list[tuple[float, tuple[float, float, float, float, float, float]]] = []
+    def capture(offsets: tuple[float, float, float, float, float, float]) -> None:
         captured.append((time.time(), offsets))
     wobbler = HeadWobbler(set_speech_offsets=capture)
         wobbler.stop()
+def test_reset_during_inflight_chunk_keeps_worker(monkeypatch: Any) -> None:
     """Simulate reset during chunk processing to ensure the worker survives."""
     wobbler, captured = _start_wobbler()
     ready = threading.Event()

uv.lock CHANGED Viewed

@@ -962,7 +962,7 @@ name = "exceptiongroup"
 version = "1.3.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "typing-extensions", marker = "python_full_version < '3.12'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/0b/9f/a65090624ecf468cdca03533906e7c69ed7588582240cfe7cc9e770b50eb/exceptiongroup-1.3.0.tar.gz", hash = "sha256:b241f5885f560bc56a59ee63ca4c6a8bfa46ae4ad651af316d4e81817bb9fd88", size = 29749, upload-time = "2025-05-10T17:42:51.123Z" }
 wheels = [
@@ -2320,6 +2320,60 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b7/da/7d22601b625e241d4f23ef1ebff8acfc60da633c9e7e7922e24d10f592b3/multidict-6.7.0-py3-none-any.whl", hash = "sha256:394fc5c42a333c9ffc3e421a4c85e08580d990e08b99f6bf35b4132114c5dcb3", size = 12317, upload-time = "2025-10-06T14:52:29.272Z" },
 ]
 [[package]]
 name = "networkx"
 version = "3.4.2"
@@ -2493,7 +2547,7 @@ name = "nvidia-cudnn-cu12"
 version = "9.10.2.21"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "nvidia-cublas-cu12" },
 ]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467, upload-time = "2025-06-06T21:54:08.597Z" },
@@ -2504,7 +2558,7 @@ name = "nvidia-cufft-cu12"
 version = "11.3.3.83"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "nvidia-nvjitlink-cu12" },
 ]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/1f/13/ee4e00f30e676b66ae65b4f08cb5bcbb8392c03f54f2d5413ea99a5d1c80/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74", size = 193118695, upload-time = "2025-03-07T01:45:27.821Z" },
@@ -2531,9 +2585,9 @@ name = "nvidia-cusolver-cu12"
 version = "11.7.3.90"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "nvidia-cublas-cu12" },
-    { name = "nvidia-cusparse-cu12" },
-    { name = "nvidia-nvjitlink-cu12" },
 ]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/85/48/9a13d2975803e8cf2777d5ed57b87a0b6ca2cc795f9a4f59796a910bfb80/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450", size = 267506905, upload-time = "2025-03-07T01:47:16.273Z" },
@@ -2544,7 +2598,7 @@ name = "nvidia-cusparse-cu12"
 version = "12.5.8.93"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "nvidia-nvjitlink-cu12" },
 ]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/c2/f5/e1854cb2f2bcd4280c44736c93550cc300ff4b8c95ebe370d0aa7d2b473d/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b", size = 288216466, upload-time = "2025-03-07T01:48:13.779Z" },
@@ -2799,6 +2853,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/70/44/5191d2e4026f86a2a109053e194d3ba7a31a2d10a9c2348368c63ed4e85a/pandas-2.3.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:3869faf4bd07b3b66a9f462417d0ca3a9df29a9f6abd5d0d0dbab15dac7abe87", size = 13202175, upload-time = "2025-09-29T23:31:59.173Z" },
 ]
 [[package]]
 name = "pillow"
 version = "11.3.0"
@@ -3600,6 +3663,7 @@ yolo-vision = [
 [package.dev-dependencies]
 dev = [
     { name = "pytest" },
     { name = "ruff" },
 ]
@@ -3630,6 +3694,7 @@ provides-extras = ["local-vision", "yolo-vision", "mediapipe-vision", "all-visio
 [package.metadata.requires-dev]
 dev = [
     { name = "pytest" },
     { name = "ruff", specifier = "==0.12.0" },
 ]

 version = "1.3.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
+    { name = "typing-extensions", marker = "python_full_version < '3.11'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/0b/9f/a65090624ecf468cdca03533906e7c69ed7588582240cfe7cc9e770b50eb/exceptiongroup-1.3.0.tar.gz", hash = "sha256:b241f5885f560bc56a59ee63ca4c6a8bfa46ae4ad651af316d4e81817bb9fd88", size = 29749, upload-time = "2025-05-10T17:42:51.123Z" }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/b7/da/7d22601b625e241d4f23ef1ebff8acfc60da633c9e7e7922e24d10f592b3/multidict-6.7.0-py3-none-any.whl", hash = "sha256:394fc5c42a333c9ffc3e421a4c85e08580d990e08b99f6bf35b4132114c5dcb3", size = 12317, upload-time = "2025-10-06T14:52:29.272Z" },
 ]
+[[package]]
+name = "mypy"
+version = "1.18.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "mypy-extensions" },
+    { name = "pathspec" },
+    { name = "tomli", marker = "python_full_version < '3.11'" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c0/77/8f0d0001ffad290cef2f7f216f96c814866248a0b92a722365ed54648e7e/mypy-1.18.2.tar.gz", hash = "sha256:06a398102a5f203d7477b2923dda3634c36727fa5c237d8f859ef90c42a9924b", size = 3448846, upload-time = "2025-09-19T00:11:10.519Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/03/6f/657961a0743cff32e6c0611b63ff1c1970a0b482ace35b069203bf705187/mypy-1.18.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c1eab0cf6294dafe397c261a75f96dc2c31bffe3b944faa24db5def4e2b0f77c", size = 12807973, upload-time = "2025-09-19T00:10:35.282Z" },
+    { url = "https://files.pythonhosted.org/packages/10/e9/420822d4f661f13ca8900f5fa239b40ee3be8b62b32f3357df9a3045a08b/mypy-1.18.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7a780ca61fc239e4865968ebc5240bb3bf610ef59ac398de9a7421b54e4a207e", size = 11896527, upload-time = "2025-09-19T00:10:55.791Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/73/a05b2bbaa7005f4642fcfe40fb73f2b4fb6bb44229bd585b5878e9a87ef8/mypy-1.18.2-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:448acd386266989ef11662ce3c8011fd2a7b632e0ec7d61a98edd8e27472225b", size = 12507004, upload-time = "2025-09-19T00:11:05.411Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/01/f6e4b9f0d031c11ccbd6f17da26564f3a0f3c4155af344006434b0a05a9d/mypy-1.18.2-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f9e171c465ad3901dc652643ee4bffa8e9fef4d7d0eece23b428908c77a76a66", size = 13245947, upload-time = "2025-09-19T00:10:46.923Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/97/19727e7499bfa1ae0773d06afd30ac66a58ed7437d940c70548634b24185/mypy-1.18.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:592ec214750bc00741af1f80cbf96b5013d81486b7bb24cb052382c19e40b428", size = 13499217, upload-time = "2025-09-19T00:09:39.472Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/4f/90dc8c15c1441bf31cf0f9918bb077e452618708199e530f4cbd5cede6ff/mypy-1.18.2-cp310-cp310-win_amd64.whl", hash = "sha256:7fb95f97199ea11769ebe3638c29b550b5221e997c63b14ef93d2e971606ebed", size = 9766753, upload-time = "2025-09-19T00:10:49.161Z" },
+    { url = "https://files.pythonhosted.org/packages/88/87/cafd3ae563f88f94eec33f35ff722d043e09832ea8530ef149ec1efbaf08/mypy-1.18.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:807d9315ab9d464125aa9fcf6d84fde6e1dc67da0b6f80e7405506b8ac72bc7f", size = 12731198, upload-time = "2025-09-19T00:09:44.857Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/e0/1e96c3d4266a06d4b0197ace5356d67d937d8358e2ee3ffac71faa843724/mypy-1.18.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:776bb00de1778caf4db739c6e83919c1d85a448f71979b6a0edd774ea8399341", size = 11817879, upload-time = "2025-09-19T00:09:47.131Z" },
+    { url = "https://files.pythonhosted.org/packages/72/ef/0c9ba89eb03453e76bdac5a78b08260a848c7bfc5d6603634774d9cd9525/mypy-1.18.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1379451880512ffce14505493bd9fe469e0697543717298242574882cf8cdb8d", size = 12427292, upload-time = "2025-09-19T00:10:22.472Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/52/ec4a061dd599eb8179d5411d99775bec2a20542505988f40fc2fee781068/mypy-1.18.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1331eb7fd110d60c24999893320967594ff84c38ac6d19e0a76c5fd809a84c86", size = 13163750, upload-time = "2025-09-19T00:09:51.472Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/5f/2cf2ceb3b36372d51568f2208c021870fe7834cf3186b653ac6446511839/mypy-1.18.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:3ca30b50a51e7ba93b00422e486cbb124f1c56a535e20eff7b2d6ab72b3b2e37", size = 13351827, upload-time = "2025-09-19T00:09:58.311Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/7d/2697b930179e7277529eaaec1513f8de622818696857f689e4a5432e5e27/mypy-1.18.2-cp311-cp311-win_amd64.whl", hash = "sha256:664dc726e67fa54e14536f6e1224bcfce1d9e5ac02426d2326e2bb4e081d1ce8", size = 9757983, upload-time = "2025-09-19T00:10:09.071Z" },
+    { url = "https://files.pythonhosted.org/packages/07/06/dfdd2bc60c66611dd8335f463818514733bc763e4760dee289dcc33df709/mypy-1.18.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:33eca32dd124b29400c31d7cf784e795b050ace0e1f91b8dc035672725617e34", size = 12908273, upload-time = "2025-09-19T00:10:58.321Z" },
+    { url = "https://files.pythonhosted.org/packages/81/14/6a9de6d13a122d5608e1a04130724caf9170333ac5a924e10f670687d3eb/mypy-1.18.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a3c47adf30d65e89b2dcd2fa32f3aeb5e94ca970d2c15fcb25e297871c8e4764", size = 11920910, upload-time = "2025-09-19T00:10:20.043Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/a9/b29de53e42f18e8cc547e38daa9dfa132ffdc64f7250e353f5c8cdd44bee/mypy-1.18.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5d6c838e831a062f5f29d11c9057c6009f60cb294fea33a98422688181fe2893", size = 12465585, upload-time = "2025-09-19T00:10:33.005Z" },
+    { url = "https://files.pythonhosted.org/packages/77/ae/6c3d2c7c61ff21f2bee938c917616c92ebf852f015fb55917fd6e2811db2/mypy-1.18.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:01199871b6110a2ce984bde85acd481232d17413868c9807e95c1b0739a58914", size = 13348562, upload-time = "2025-09-19T00:10:11.51Z" },
+    { url = "https://files.pythonhosted.org/packages/4d/31/aec68ab3b4aebdf8f36d191b0685d99faa899ab990753ca0fee60fb99511/mypy-1.18.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a2afc0fa0b0e91b4599ddfe0f91e2c26c2b5a5ab263737e998d6817874c5f7c8", size = 13533296, upload-time = "2025-09-19T00:10:06.568Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/83/abcb3ad9478fca3ebeb6a5358bb0b22c95ea42b43b7789c7fb1297ca44f4/mypy-1.18.2-cp312-cp312-win_amd64.whl", hash = "sha256:d8068d0afe682c7c4897c0f7ce84ea77f6de953262b12d07038f4d296d547074", size = 9828828, upload-time = "2025-09-19T00:10:28.203Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/04/7f462e6fbba87a72bc8097b93f6842499c428a6ff0c81dd46948d175afe8/mypy-1.18.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:07b8b0f580ca6d289e69209ec9d3911b4a26e5abfde32228a288eb79df129fcc", size = 12898728, upload-time = "2025-09-19T00:10:01.33Z" },
+    { url = "https://files.pythonhosted.org/packages/99/5b/61ed4efb64f1871b41fd0b82d29a64640f3516078f6c7905b68ab1ad8b13/mypy-1.18.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ed4482847168439651d3feee5833ccedbf6657e964572706a2adb1f7fa4dfe2e", size = 11910758, upload-time = "2025-09-19T00:10:42.607Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/46/d297d4b683cc89a6e4108c4250a6a6b717f5fa96e1a30a7944a6da44da35/mypy-1.18.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c3ad2afadd1e9fea5cf99a45a822346971ede8685cc581ed9cd4d42eaf940986", size = 12475342, upload-time = "2025-09-19T00:11:00.371Z" },
+    { url = "https://files.pythonhosted.org/packages/83/45/4798f4d00df13eae3bfdf726c9244bcb495ab5bd588c0eed93a2f2dd67f3/mypy-1.18.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a431a6f1ef14cf8c144c6b14793a23ec4eae3db28277c358136e79d7d062f62d", size = 13338709, upload-time = "2025-09-19T00:11:03.358Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/09/479f7358d9625172521a87a9271ddd2441e1dab16a09708f056e97007207/mypy-1.18.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7ab28cc197f1dd77a67e1c6f35cd1f8e8b73ed2217e4fc005f9e6a504e46e7ba", size = 13529806, upload-time = "2025-09-19T00:10:26.073Z" },
+    { url = "https://files.pythonhosted.org/packages/71/cf/ac0f2c7e9d0ea3c75cd99dff7aec1c9df4a1376537cb90e4c882267ee7e9/mypy-1.18.2-cp313-cp313-win_amd64.whl", hash = "sha256:0e2785a84b34a72ba55fb5daf079a1003a34c05b22238da94fcae2bbe46f3544", size = 9833262, upload-time = "2025-09-19T00:10:40.035Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/0c/7d5300883da16f0063ae53996358758b2a2df2a09c72a5061fa79a1f5006/mypy-1.18.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:62f0e1e988ad41c2a110edde6c398383a889d95b36b3e60bcf155f5164c4fdce", size = 12893775, upload-time = "2025-09-19T00:10:03.814Z" },
+    { url = "https://files.pythonhosted.org/packages/50/df/2cffbf25737bdb236f60c973edf62e3e7b4ee1c25b6878629e88e2cde967/mypy-1.18.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:8795a039bab805ff0c1dfdb8cd3344642c2b99b8e439d057aba30850b8d3423d", size = 11936852, upload-time = "2025-09-19T00:10:51.631Z" },
+    { url = "https://files.pythonhosted.org/packages/be/50/34059de13dd269227fb4a03be1faee6e2a4b04a2051c82ac0a0b5a773c9a/mypy-1.18.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6ca1e64b24a700ab5ce10133f7ccd956a04715463d30498e64ea8715236f9c9c", size = 12480242, upload-time = "2025-09-19T00:11:07.955Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/11/040983fad5132d85914c874a2836252bbc57832065548885b5bb5b0d4359/mypy-1.18.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d924eef3795cc89fecf6bedc6ed32b33ac13e8321344f6ddbf8ee89f706c05cb", size = 13326683, upload-time = "2025-09-19T00:09:55.572Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/ba/89b2901dd77414dd7a8c8729985832a5735053be15b744c18e4586e506ef/mypy-1.18.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:20c02215a080e3a2be3aa50506c67242df1c151eaba0dcbc1e4e557922a26075", size = 13514749, upload-time = "2025-09-19T00:10:44.827Z" },
+    { url = "https://files.pythonhosted.org/packages/25/bc/cc98767cffd6b2928ba680f3e5bc969c4152bf7c2d83f92f5a504b92b0eb/mypy-1.18.2-cp314-cp314-win_amd64.whl", hash = "sha256:749b5f83198f1ca64345603118a6f01a4e99ad4bf9d103ddc5a3200cc4614adf", size = 9982959, upload-time = "2025-09-19T00:10:37.344Z" },
+    { url = "https://files.pythonhosted.org/packages/87/e3/be76d87158ebafa0309946c4a73831974d4d6ab4f4ef40c3b53a385a66fd/mypy-1.18.2-py3-none-any.whl", hash = "sha256:22a1748707dd62b58d2ae53562ffc4d7f8bcc727e8ac7cbc69c053ddc874d47e", size = 2352367, upload-time = "2025-09-19T00:10:15.489Z" },
+]
+[[package]]
+name = "mypy-extensions"
+version = "1.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a2/6e/371856a3fb9d31ca8dac321cda606860fa4548858c0cc45d9d1d4ca2628b/mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558", size = 6343, upload-time = "2025-04-22T14:54:24.164Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" },
+]
 [[package]]
 name = "networkx"
 version = "3.4.2"
 version = "9.10.2.21"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
+    { name = "nvidia-cublas-cu12", marker = "sys_platform != 'win32'" },
 ]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467, upload-time = "2025-06-06T21:54:08.597Z" },
 version = "11.3.3.83"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
+    { name = "nvidia-nvjitlink-cu12", marker = "sys_platform != 'win32'" },
 ]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/1f/13/ee4e00f30e676b66ae65b4f08cb5bcbb8392c03f54f2d5413ea99a5d1c80/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74", size = 193118695, upload-time = "2025-03-07T01:45:27.821Z" },
 version = "11.7.3.90"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
+    { name = "nvidia-cublas-cu12", marker = "sys_platform != 'win32'" },
+    { name = "nvidia-cusparse-cu12", marker = "sys_platform != 'win32'" },
+    { name = "nvidia-nvjitlink-cu12", marker = "sys_platform != 'win32'" },
 ]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/85/48/9a13d2975803e8cf2777d5ed57b87a0b6ca2cc795f9a4f59796a910bfb80/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450", size = 267506905, upload-time = "2025-03-07T01:47:16.273Z" },
 version = "12.5.8.93"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
+    { name = "nvidia-nvjitlink-cu12", marker = "sys_platform != 'win32'" },
 ]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/c2/f5/e1854cb2f2bcd4280c44736c93550cc300ff4b8c95ebe370d0aa7d2b473d/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b", size = 288216466, upload-time = "2025-03-07T01:48:13.779Z" },
     { url = "https://files.pythonhosted.org/packages/70/44/5191d2e4026f86a2a109053e194d3ba7a31a2d10a9c2348368c63ed4e85a/pandas-2.3.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:3869faf4bd07b3b66a9f462417d0ca3a9df29a9f6abd5d0d0dbab15dac7abe87", size = 13202175, upload-time = "2025-09-29T23:31:59.173Z" },
 ]
+[[package]]
+name = "pathspec"
+version = "0.12.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ca/bc/f35b8446f4531a7cb215605d100cd88b7ac6f44ab3fc94870c120ab3adbf/pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712", size = 51043, upload-time = "2023-12-10T22:30:45Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191, upload-time = "2023-12-10T22:30:43.14Z" },
+]
 [[package]]
 name = "pillow"
 version = "11.3.0"
 [package.dev-dependencies]
 dev = [
+    { name = "mypy" },
     { name = "pytest" },
     { name = "ruff" },
 ]
 [package.metadata.requires-dev]
 dev = [
+    { name = "mypy", specifier = ">=1.18.2" },
     { name = "pytest" },
     { name = "ruff", specifier = "==0.12.0" },
 ]