apirrone commited on
Commit
cc113a1
·
2 Parent(s): e2c0966 42bded7

Merge branch 'develop' into 62-appify-the-demo

Browse files
Files changed (35) hide show
  1. .env.example +3 -0
  2. .github/workflows/tests.yml +3 -3
  3. .github/workflows/typecheck.yml +1 -1
  4. README.md +47 -5
  5. pyproject.toml +9 -2
  6. src/reachy_mini_conversation_app/config.py +2 -0
  7. src/reachy_mini_conversation_app/console.py +18 -19
  8. src/reachy_mini_conversation_app/openai_realtime.py +101 -39
  9. src/reachy_mini_conversation_app/profiles/__init__.py +1 -0
  10. src/reachy_mini_conversation_app/profiles/default/instructions.txt +1 -0
  11. src/reachy_mini_conversation_app/profiles/default/tools.txt +8 -0
  12. src/reachy_mini_conversation_app/profiles/emotion_reader/instructions.txt +112 -0
  13. src/reachy_mini_conversation_app/profiles/emotion_reader/tools.txt +6 -0
  14. src/reachy_mini_conversation_app/profiles/example/instructions.txt +3 -0
  15. src/reachy_mini_conversation_app/profiles/example/sweep_look.py +127 -0
  16. src/reachy_mini_conversation_app/profiles/example/tools.txt +14 -0
  17. src/reachy_mini_conversation_app/prompts.py +84 -53
  18. src/reachy_mini_conversation_app/prompts/behaviors/silent_robot.txt +6 -0
  19. src/reachy_mini_conversation_app/prompts/default_prompt.txt +47 -0
  20. src/reachy_mini_conversation_app/prompts/identities/basic_info.txt +4 -0
  21. src/reachy_mini_conversation_app/prompts/identities/witty_identity.txt +4 -0
  22. src/reachy_mini_conversation_app/prompts/passion_for_lobster_jokes.txt +1 -0
  23. src/reachy_mini_conversation_app/tools.py +0 -484
  24. src/reachy_mini_conversation_app/tools/__init__.py +4 -0
  25. src/reachy_mini_conversation_app/tools/camera.py +67 -0
  26. src/reachy_mini_conversation_app/tools/core_tools.py +224 -0
  27. src/reachy_mini_conversation_app/tools/dance.py +87 -0
  28. src/reachy_mini_conversation_app/tools/do_nothing.py +30 -0
  29. src/reachy_mini_conversation_app/tools/head_tracking.py +31 -0
  30. src/reachy_mini_conversation_app/tools/move_head.py +79 -0
  31. src/reachy_mini_conversation_app/tools/play_emotion.py +84 -0
  32. src/reachy_mini_conversation_app/tools/stop_dance.py +31 -0
  33. src/reachy_mini_conversation_app/tools/stop_emotion.py +31 -0
  34. tests/test_openai_realtime.py +1 -1
  35. uv.lock +0 -0
.env.example CHANGED
@@ -10,3 +10,6 @@ HF_HOME=./cache
10
 
11
  # Hugging Face token for accessing datasets/models
12
  HF_TOKEN=
 
 
 
 
10
 
11
  # Hugging Face token for accessing datasets/models
12
  HF_TOKEN=
13
+
14
+ # To select a specific profile with custom instructions and tools, to be placed in profiles/<myprofile>/__init__.py
15
+ REACHY_MINI_CUSTOM_PROFILE="example"
.github/workflows/tests.yml CHANGED
@@ -60,15 +60,15 @@ jobs:
60
  # Prefetch HF dataset to avoid download during test collection
61
  - name: Prefetch HF dataset
62
  run: |
63
- uv run python - <<'PY'
64
  from huggingface_hub import snapshot_download
65
  snapshot_download(
66
  repo_id="pollen-robotics/reachy-mini-emotions-library",
67
  repo_type="dataset",
68
  etag_timeout=120,
69
- max_workers=4
70
  )
71
  PY
72
 
73
  - name: Run tests
74
- run: uv run pytest -q
 
60
  # Prefetch HF dataset to avoid download during test collection
61
  - name: Prefetch HF dataset
62
  run: |
63
+ .venv/bin/python - <<'PY'
64
  from huggingface_hub import snapshot_download
65
  snapshot_download(
66
  repo_id="pollen-robotics/reachy-mini-emotions-library",
67
  repo_type="dataset",
68
  etag_timeout=120,
69
+ max_workers=4,
70
  )
71
  PY
72
 
73
  - name: Run tests
74
+ run: .venv/bin/pytest -q
.github/workflows/typecheck.yml CHANGED
@@ -26,4 +26,4 @@ jobs:
26
  run: uv sync --frozen --group dev --extra all_vision
27
 
28
  - name: Run mypy
29
- run: uv run mypy --pretty --show-error-codes .
 
26
  run: uv sync --frozen --group dev --extra all_vision
27
 
28
  - name: Run mypy
29
+ run: .venv/bin/mypy --pretty --show-error-codes .
README.md CHANGED
@@ -44,12 +44,16 @@ source .venv/bin/activate
44
  uv sync
45
  ```
46
 
47
- To include optional vision dependencies:
 
 
 
48
  ```
49
- uv sync --extra local_vision # For local PyTorch/Transformers vision
50
- uv sync --extra yolo_vision # For YOLO-based vision
51
- uv sync --extra mediapipe_vision # For MediaPipe-based vision
52
- uv sync --extra all_vision # For all vision features
 
53
  ```
54
 
55
  You can combine extras or include dev dependencies:
@@ -68,6 +72,9 @@ pip install -e .
68
  Install optional extras depending on the feature set you need:
69
 
70
  ```bash
 
 
 
71
  # Vision stacks (choose at least one if you plan to run face tracking)
72
  pip install -e .[local_vision]
73
  pip install -e .[yolo_vision]
@@ -84,6 +91,7 @@ Some wheels (e.g. PyTorch) are large and require compatible CUDA or CPU builds
84
 
85
  | Extra | Purpose | Notes |
86
  |-------|---------|-------|
 
87
  | `local_vision` | Run the local VLM (SmolVLM2) through PyTorch/Transformers. | GPU recommended; ensure compatible PyTorch builds for your platform.
88
  | `yolo_vision` | YOLOv8 tracking via `ultralytics` and `supervision`. | CPU friendly; supports the `--head-tracker yolo` option.
89
  | `mediapipe_vision` | Lightweight landmark tracking with MediaPipe. | Works on CPU; enables `--head-tracker mediapipe`.
@@ -156,6 +164,40 @@ By default, the app runs in console mode for direct audio interaction. Use the `
156
  | `stop_emotion` | Clear queued emotions. | Core install only. |
157
  | `do_nothing` | Explicitly remain idle. | Core install only. |
158
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  ## Development workflow
160
  - Install the dev group extras: `uv sync --group dev` or `pip install -e .[dev]`.
161
  - Run formatting and linting: `ruff check .`.
 
44
  uv sync
45
  ```
46
 
47
+ > [!NOTE]
48
+ > To reproduce the exact dependency set from this repo's `uv.lock`, run `uv sync` with `--locked` (or `--frozen`). This ensures `uv` installs directly from the lockfile without re-resolving or updating any versions.
49
+
50
+ To include optional dependencies:
51
  ```
52
+ uv sync --extra reachy_mini_wireless # For wireless Reachy Mini with GStreamer support
53
+ uv sync --extra local_vision # For local PyTorch/Transformers vision
54
+ uv sync --extra yolo_vision # For YOLO-based vision
55
+ uv sync --extra mediapipe_vision # For MediaPipe-based vision
56
+ uv sync --extra all_vision # For all vision features
57
  ```
58
 
59
  You can combine extras or include dev dependencies:
 
72
  Install optional extras depending on the feature set you need:
73
 
74
  ```bash
75
+ # Wireless Reachy Mini support
76
+ pip install -e .[reachy_mini_wireless]
77
+
78
  # Vision stacks (choose at least one if you plan to run face tracking)
79
  pip install -e .[local_vision]
80
  pip install -e .[yolo_vision]
 
91
 
92
  | Extra | Purpose | Notes |
93
  |-------|---------|-------|
94
+ | `reachy_mini_wireless` | Wireless Reachy Mini with GStreamer support. | Required for wireless versions of Reachy Mini, includes GStreamer dependencies.
95
  | `local_vision` | Run the local VLM (SmolVLM2) through PyTorch/Transformers. | GPU recommended; ensure compatible PyTorch builds for your platform.
96
  | `yolo_vision` | YOLOv8 tracking via `ultralytics` and `supervision`. | CPU friendly; supports the `--head-tracker yolo` option.
97
  | `mediapipe_vision` | Lightweight landmark tracking with MediaPipe. | Works on CPU; enables `--head-tracker mediapipe`.
 
164
  | `stop_emotion` | Clear queued emotions. | Core install only. |
165
  | `do_nothing` | Explicitly remain idle. | Core install only. |
166
 
167
+ ## Using custom profiles
168
+ Create custom profiles with dedicated instructions and enabled tools!
169
+
170
+ Set `REACHY_MINI_CUSTOM_PROFILE=<name>` to load `src/reachy_mini_conversation_app/profiles/<name>/` (see `.env.example`). If unset, the `default` profile is used.
171
+
172
+ Each profile requires two files: `instructions.txt` (prompt text) and `tools.txt` (list of allowed tools), and optionally contains custom tools implementations.
173
+
174
+ ### Custom instructions
175
+ Write plain-text prompts in `instructions.txt`. To reuse shared prompt pieces, add lines like:
176
+ ```
177
+ [passion_for_lobster_jokes]
178
+ [identities/witty_identity]
179
+ ```
180
+ Each placeholder pulls the matching file under `src/reachy_mini_conversation_app/prompts/` (nested paths allowed). See `src/reachy_mini_conversation_app/profiles/example/` for a reference layout.
181
+
182
+ ### Enabling tools
183
+ List enabled tools in `tools.txt`, one per line; prefix with `#` to comment out. For example:
184
+
185
+ ```
186
+ play_emotion
187
+ # move_head
188
+
189
+ # My custom tool defined locally
190
+ sweep_look
191
+ ```
192
+ Tools are resolved first from Python files in the profile folder (custom tools), then from the shared library `src/reachy_mini_conversation_app/tools/` (e.g., `dance`, `head_tracking`).
193
+
194
+ ### Custom tools
195
+ On top of built-in tools found in the shared library, you can implement custom tools specific to your profile by adding Python files in the profile folder.
196
+ Custom tools must subclass `reachy_mini_conversation_app.tools.core_tools.Tool` (see `profiles/example/sweep_look.py`).
197
+
198
+
199
+
200
+
201
  ## Development workflow
202
  - Install the dev group extras: `uv sync --group dev` or `pip install -e .[dev]`.
203
  - Run formatting and linting: `ruff check .`.
pyproject.toml CHANGED
@@ -12,7 +12,7 @@ requires-python = ">=3.10"
12
  dependencies = [
13
  #Media
14
  "aiortc>=1.13.0",
15
- "fastrtc>=0.0.33",
16
  "gradio>=5.49.0",
17
  "huggingface_hub>=0.34.4",
18
  "opencv-python>=4.12.0.88",
@@ -30,6 +30,9 @@ dependencies = [
30
  ]
31
 
32
  [project.optional-dependencies]
 
 
 
33
  local_vision = ["torch", "transformers", "num2words"]
34
  yolo_vision = ["ultralytics", "supervision"]
35
  mediapipe_vision = ["mediapipe>=0.10.14"]
@@ -63,7 +66,11 @@ include-package-data = true
63
  where = ["src"]
64
 
65
  [tool.setuptools.package-data]
66
- reachy_mini_conversation_app = ["images/*"]
 
 
 
 
67
 
68
  [tool.ruff]
69
  line-length = 119
 
12
  dependencies = [
13
  #Media
14
  "aiortc>=1.13.0",
15
+ "fastrtc>=0.0.34",
16
  "gradio>=5.49.0",
17
  "huggingface_hub>=0.34.4",
18
  "opencv-python>=4.12.0.88",
 
30
  ]
31
 
32
  [project.optional-dependencies]
33
+ reachy_mini_wireless = [
34
+ "reachy_mini[gstreamer,wireless-version]",
35
+ ]
36
  local_vision = ["torch", "transformers", "num2words"]
37
  yolo_vision = ["ultralytics", "supervision"]
38
  mediapipe_vision = ["mediapipe>=0.10.14"]
 
66
  where = ["src"]
67
 
68
  [tool.setuptools.package-data]
69
+ reachy_mini_conversation_app = [
70
+ "images/*",
71
+ "demos/**/*.txt",
72
+ "prompts_library/*.txt",
73
+ ]
74
 
75
  [tool.ruff]
76
  line-length = 119
src/reachy_mini_conversation_app/config.py CHANGED
@@ -40,5 +40,7 @@ class Config:
40
 
41
  logger.debug(f"Model: {MODEL_NAME}, HF_HOME: {HF_HOME}, Vision Model: {LOCAL_VISION_MODEL}")
42
 
 
 
43
 
44
  config = Config()
 
40
 
41
  logger.debug(f"Model: {MODEL_NAME}, HF_HOME: {HF_HOME}, Vision Model: {LOCAL_VISION_MODEL}")
42
 
43
+ REACHY_MINI_CUSTOM_PROFILE = os.getenv("REACHY_MINI_CUSTOM_PROFILE")
44
+ logger.debug(f"Custom Profile: {REACHY_MINI_CUSTOM_PROFILE}")
45
 
46
  config = Config()
src/reachy_mini_conversation_app/console.py CHANGED
@@ -8,7 +8,7 @@ import asyncio
8
  import logging
9
  from typing import List
10
 
11
- from fastrtc import AdditionalOutputs, audio_to_int16, audio_to_float32
12
  from scipy.signal import resample
13
 
14
  from reachy_mini import ReachyMini
@@ -30,12 +30,6 @@ class LocalStream:
30
  # Allow the handler to flush the player queue when appropriate.
31
  self.handler._clear_queue = self.clear_audio_queue
32
 
33
- # Hack to avoid the first lenghty call to resample at runtime.
34
- # This is likely caused by cache initialization overhead.
35
- import numpy as np
36
-
37
- resample(np.array([0.0]), 1)
38
-
39
  def launch(self) -> None:
40
  """Start the recorder/player and run the async processing loops."""
41
  self._stop_event.clear()
@@ -89,9 +83,7 @@ class LocalStream:
89
  while not self._stop_event.is_set():
90
  audio_frame = self._robot.media.get_audio_sample()
91
  if audio_frame is not None:
92
- frame_mono = audio_frame.T[0] # both channels are identical
93
- frame = audio_to_int16(frame_mono)
94
- await self.handler.receive((16000, frame))
95
 
96
  await asyncio.sleep(0.01) # avoid busy loop
97
 
@@ -111,17 +103,24 @@ class LocalStream:
111
  )
112
 
113
  elif isinstance(handler_output, tuple):
114
- input_sample_rate, audio_frame = handler_output
115
- device_sample_rate = self._robot.media.get_audio_samplerate()
116
- audio_frame_float = audio_to_float32(audio_frame.squeeze())
117
-
118
- if input_sample_rate != device_sample_rate:
119
- audio_frame_float = resample(
120
- audio_frame_float,
121
- int(len(audio_frame_float) * (device_sample_rate / input_sample_rate)),
 
 
 
 
 
 
 
122
  )
123
 
124
- self._robot.media.push_audio_sample(audio_frame_float)
125
 
126
  else:
127
  logger.debug("Ignoring output type=%s", type(handler_output).__name__)
 
8
  import logging
9
  from typing import List
10
 
11
+ from fastrtc import AdditionalOutputs, audio_to_float32
12
  from scipy.signal import resample
13
 
14
  from reachy_mini import ReachyMini
 
30
  # Allow the handler to flush the player queue when appropriate.
31
  self.handler._clear_queue = self.clear_audio_queue
32
 
 
 
 
 
 
 
33
  def launch(self) -> None:
34
  """Start the recorder/player and run the async processing loops."""
35
  self._stop_event.clear()
 
83
  while not self._stop_event.is_set():
84
  audio_frame = self._robot.media.get_audio_sample()
85
  if audio_frame is not None:
86
+ await self.handler.receive((self._robot.media.get_input_audio_samplerate(), audio_frame))
 
 
87
 
88
  await asyncio.sleep(0.01) # avoid busy loop
89
 
 
103
  )
104
 
105
  elif isinstance(handler_output, tuple):
106
+ input_sample_rate, audio_data = handler_output
107
+ output_sample_rate = self._robot.media.get_output_audio_samplerate()
108
+
109
+ # Reshape if needed
110
+ if audio_data.ndim == 2:
111
+ audio_data = audio_data.squeeze()
112
+
113
+ # Cast if needed
114
+ audio_frame = audio_to_float32(audio_data)
115
+
116
+ # Resample if needed
117
+ if input_sample_rate != output_sample_rate:
118
+ audio_frame = resample(
119
+ audio_frame,
120
+ int(len(audio_frame) * output_sample_rate / input_sample_rate),
121
  )
122
 
123
+ self._robot.media.push_audio_sample(audio_frame)
124
 
125
  else:
126
  logger.debug("Ignoring output type=%s", type(handler_output).__name__)
src/reachy_mini_conversation_app/openai_realtime.py CHANGED
@@ -3,27 +3,32 @@ import base64
3
  import random
4
  import asyncio
5
  import logging
6
- from typing import Any, Tuple, Literal, cast
7
  from datetime import datetime
8
 
 
9
  import numpy as np
10
  import gradio as gr
11
  from openai import AsyncOpenAI
12
- from fastrtc import AdditionalOutputs, AsyncStreamHandler, wait_for_item
13
  from numpy.typing import NDArray
 
14
  from websockets.exceptions import ConnectionClosedError
15
 
16
- from reachy_mini_conversation_app.tools import (
17
- ALL_TOOL_SPECS,
 
18
  ToolDependencies,
 
19
  dispatch_tool_call,
20
  )
21
- from reachy_mini_conversation_app.config import config
22
- from reachy_mini_conversation_app.prompts import SESSION_INSTRUCTIONS
23
 
24
 
25
  logger = logging.getLogger(__name__)
26
 
 
 
 
27
 
28
  class OpenaiRealtimeHandler(AsyncStreamHandler):
29
  """An OpenAI realtime handler for fastrtc Stream."""
@@ -32,16 +37,19 @@ class OpenaiRealtimeHandler(AsyncStreamHandler):
32
  """Initialize the handler."""
33
  super().__init__(
34
  expected_layout="mono",
35
- output_sample_rate=24000, # openai outputs
36
- input_sample_rate=16000, # respeaker output
37
  )
 
 
 
 
 
38
  self.deps = deps
39
 
40
  # Override type annotations for OpenAI strict typing (only for values used in API)
41
- self.output_sample_rate: Literal[24000]
42
- self.target_input_rate: Literal[24000] = 24000
43
- # input_sample_rate rest as int for comparison logic
44
- self.resample_ratio = self.target_input_rate / self.input_sample_rate
45
 
46
  self.connection: Any = None
47
  self.output_queue: "asyncio.Queue[Tuple[int, NDArray[np.int16]] | AdditionalOutputs]" = asyncio.Queue()
@@ -51,24 +59,28 @@ class OpenaiRealtimeHandler(AsyncStreamHandler):
51
  self.is_idle_tool_call = False
52
  self.gradio_mode = gradio_mode
53
 
 
 
 
 
 
54
  def copy(self) -> "OpenaiRealtimeHandler":
55
  """Create a copy of the handler."""
56
  return OpenaiRealtimeHandler(self.deps, self.gradio_mode)
57
 
58
- def resample_audio(self, audio: NDArray[np.int16]) -> NDArray[np.int16]:
59
- """Resample audio using linear interpolation."""
60
- if self.input_sample_rate == self.target_input_rate:
61
- return audio
62
-
63
- # Use numpy's interp for simple linear resampling
64
- input_length = len(audio)
65
- output_length = int(input_length * self.resample_ratio)
66
-
67
- input_time = np.arange(input_length)
68
- output_time = np.linspace(0, input_length - 1, output_length)
69
-
70
- resampled = np.interp(output_time, input_time, audio.astype(np.float32))
71
- return cast(NDArray[np.int16], resampled.astype(np.int16))
72
 
73
  async def start_up(self) -> None:
74
  """Start the handler with minimal retries on unexpected websocket closure."""
@@ -121,14 +133,17 @@ class OpenaiRealtimeHandler(AsyncStreamHandler):
121
  await conn.session.update(
122
  session={
123
  "type": "realtime",
124
- "instructions": SESSION_INSTRUCTIONS,
125
  "audio": {
126
  "input": {
127
  "format": {
128
  "type": "audio/pcm",
129
- "rate": self.target_input_rate,
 
 
 
 
130
  },
131
- "transcription": {"model": "whisper-1", "language": "en"},
132
  "turn_detection": {
133
  "type": "server_vad",
134
  "interrupt_response": True,
@@ -142,7 +157,7 @@ class OpenaiRealtimeHandler(AsyncStreamHandler):
142
  "voice": "cedar",
143
  },
144
  },
145
- "tools": ALL_TOOL_SPECS, # type: ignore[typeddict-item]
146
  "tool_choice": "auto",
147
  },
148
  )
@@ -186,13 +201,36 @@ class OpenaiRealtimeHandler(AsyncStreamHandler):
186
  # Handle partial transcription (user speaking in real-time)
187
  if event.type == "conversation.item.input_audio_transcription.partial":
188
  logger.debug(f"User partial transcript: {event.transcript}")
189
- await self.output_queue.put(
190
- AdditionalOutputs({"role": "user_partial", "content": event.transcript})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
  )
192
 
193
  # Handle completed transcription (user finished speaking)
194
  if event.type == "conversation.item.input_audio_transcription.completed":
195
  logger.debug(f"User transcript: {event.transcript}")
 
 
 
 
 
 
 
 
 
196
  await self.output_queue.put(AdditionalOutputs({"role": "user", "content": event.transcript}))
197
 
198
  # Handle assistant transcription
@@ -273,7 +311,12 @@ class OpenaiRealtimeHandler(AsyncStreamHandler):
273
 
274
  if self.deps.camera_worker is not None:
275
  np_img = self.deps.camera_worker.get_latest_frame()
276
- img = gr.Image(value=np_img)
 
 
 
 
 
277
 
278
  await self.output_queue.put(
279
  AdditionalOutputs(
@@ -315,17 +358,28 @@ class OpenaiRealtimeHandler(AsyncStreamHandler):
315
 
316
  # Microphone receive
317
  async def receive(self, frame: Tuple[int, NDArray[np.int16]]) -> None:
318
- """Receive audio frame from the microphone and send it to the openai server."""
 
 
 
 
 
319
  if not self.connection:
320
  return
321
- _, array = frame
322
- array = array.squeeze()
 
 
 
323
 
324
  # Resample if needed
325
- if self.input_sample_rate != self.target_input_rate:
326
- array = self.resample_audio(array)
 
 
 
327
 
328
- audio_message = base64.b64encode(array.tobytes()).decode("utf-8")
329
  await self.connection.input_audio_buffer.append(audio=audio_message)
330
 
331
  async def emit(self) -> Tuple[int, NDArray[np.int16]] | AdditionalOutputs | None:
@@ -348,6 +402,14 @@ class OpenaiRealtimeHandler(AsyncStreamHandler):
348
 
349
  async def shutdown(self) -> None:
350
  """Shutdown the handler."""
 
 
 
 
 
 
 
 
351
  if self.connection:
352
  try:
353
  await self.connection.close()
 
3
  import random
4
  import asyncio
5
  import logging
6
+ from typing import Any, Final, Tuple, Literal
7
  from datetime import datetime
8
 
9
+ import cv2
10
  import numpy as np
11
  import gradio as gr
12
  from openai import AsyncOpenAI
13
+ from fastrtc import AdditionalOutputs, AsyncStreamHandler, wait_for_item, audio_to_int16
14
  from numpy.typing import NDArray
15
+ from scipy.signal import resample
16
  from websockets.exceptions import ConnectionClosedError
17
 
18
+ from reachy_mini_conversation_app.config import config
19
+ from reachy_mini_conversation_app.prompts import get_session_instructions
20
+ from reachy_mini_conversation_app.tools.core_tools import (
21
  ToolDependencies,
22
+ get_tool_specs,
23
  dispatch_tool_call,
24
  )
 
 
25
 
26
 
27
  logger = logging.getLogger(__name__)
28
 
29
+ OPEN_AI_INPUT_SAMPLE_RATE: Final[Literal[24000]] = 24000
30
+ OPEN_AI_OUTPUT_SAMPLE_RATE: Final[Literal[24000]] = 24000
31
+
32
 
33
  class OpenaiRealtimeHandler(AsyncStreamHandler):
34
  """An OpenAI realtime handler for fastrtc Stream."""
 
37
  """Initialize the handler."""
38
  super().__init__(
39
  expected_layout="mono",
40
+ output_sample_rate=OPEN_AI_OUTPUT_SAMPLE_RATE,
41
+ input_sample_rate=OPEN_AI_INPUT_SAMPLE_RATE,
42
  )
43
+
44
+ # Override typing of the sample rates to match OpenAI's requirements
45
+ self.output_sample_rate: Literal[24000] = self.output_sample_rate
46
+ self.input_sample_rate: Literal[24000] = self.input_sample_rate
47
+
48
  self.deps = deps
49
 
50
  # Override type annotations for OpenAI strict typing (only for values used in API)
51
+ self.output_sample_rate = OPEN_AI_OUTPUT_SAMPLE_RATE
52
+ self.input_sample_rate = OPEN_AI_INPUT_SAMPLE_RATE
 
 
53
 
54
  self.connection: Any = None
55
  self.output_queue: "asyncio.Queue[Tuple[int, NDArray[np.int16]] | AdditionalOutputs]" = asyncio.Queue()
 
59
  self.is_idle_tool_call = False
60
  self.gradio_mode = gradio_mode
61
 
62
+ # Debouncing for partial transcripts
63
+ self.partial_transcript_task: asyncio.Task[None] | None = None
64
+ self.partial_transcript_sequence: int = 0 # sequence counter to prevent stale emissions
65
+ self.partial_debounce_delay = 0.5 # seconds
66
+
67
  def copy(self) -> "OpenaiRealtimeHandler":
68
  """Create a copy of the handler."""
69
  return OpenaiRealtimeHandler(self.deps, self.gradio_mode)
70
 
71
+ async def _emit_debounced_partial(self, transcript: str, sequence: int) -> None:
72
+ """Emit partial transcript after debounce delay."""
73
+ try:
74
+ await asyncio.sleep(self.partial_debounce_delay)
75
+ # Only emit if this is still the latest partial (by sequence number)
76
+ if self.partial_transcript_sequence == sequence:
77
+ await self.output_queue.put(
78
+ AdditionalOutputs({"role": "user_partial", "content": transcript})
79
+ )
80
+ logger.debug(f"Debounced partial emitted: {transcript}")
81
+ except asyncio.CancelledError:
82
+ logger.debug("Debounced partial cancelled")
83
+ raise
 
84
 
85
  async def start_up(self) -> None:
86
  """Start the handler with minimal retries on unexpected websocket closure."""
 
133
  await conn.session.update(
134
  session={
135
  "type": "realtime",
136
+ "instructions": get_session_instructions(),
137
  "audio": {
138
  "input": {
139
  "format": {
140
  "type": "audio/pcm",
141
+ "rate": self.input_sample_rate,
142
+ },
143
+ "transcription": {
144
+ "model": "gpt-4o-transcribe",
145
+ "language": "en"
146
  },
 
147
  "turn_detection": {
148
  "type": "server_vad",
149
  "interrupt_response": True,
 
157
  "voice": "cedar",
158
  },
159
  },
160
+ "tools": get_tool_specs(), # type: ignore[typeddict-item]
161
  "tool_choice": "auto",
162
  },
163
  )
 
201
  # Handle partial transcription (user speaking in real-time)
202
  if event.type == "conversation.item.input_audio_transcription.partial":
203
  logger.debug(f"User partial transcript: {event.transcript}")
204
+
205
+ # Increment sequence
206
+ self.partial_transcript_sequence += 1
207
+ current_sequence = self.partial_transcript_sequence
208
+
209
+ # Cancel previous debounce task if it exists
210
+ if self.partial_transcript_task and not self.partial_transcript_task.done():
211
+ self.partial_transcript_task.cancel()
212
+ try:
213
+ await self.partial_transcript_task
214
+ except asyncio.CancelledError:
215
+ pass
216
+
217
+ # Start new debounce timer with sequence number
218
+ self.partial_transcript_task = asyncio.create_task(
219
+ self._emit_debounced_partial(event.transcript, current_sequence)
220
  )
221
 
222
  # Handle completed transcription (user finished speaking)
223
  if event.type == "conversation.item.input_audio_transcription.completed":
224
  logger.debug(f"User transcript: {event.transcript}")
225
+
226
+ # Cancel any pending partial emission
227
+ if self.partial_transcript_task and not self.partial_transcript_task.done():
228
+ self.partial_transcript_task.cancel()
229
+ try:
230
+ await self.partial_transcript_task
231
+ except asyncio.CancelledError:
232
+ pass
233
+
234
  await self.output_queue.put(AdditionalOutputs({"role": "user", "content": event.transcript}))
235
 
236
  # Handle assistant transcription
 
311
 
312
  if self.deps.camera_worker is not None:
313
  np_img = self.deps.camera_worker.get_latest_frame()
314
+ if np_img is not None:
315
+ # Camera frames are BGR from OpenCV; convert so Gradio displays correct colors.
316
+ rgb_frame = cv2.cvtColor(np_img, cv2.COLOR_BGR2RGB)
317
+ else:
318
+ rgb_frame = None
319
+ img = gr.Image(value=rgb_frame)
320
 
321
  await self.output_queue.put(
322
  AdditionalOutputs(
 
358
 
359
  # Microphone receive
360
  async def receive(self, frame: Tuple[int, NDArray[np.int16]]) -> None:
361
+ """Receive audio frame from the microphone and send it to the openai server.
362
+
363
+ Args:
364
+ frame: A tuple containing the sample rate and the audio frame.
365
+
366
+ """
367
  if not self.connection:
368
  return
369
+ input_sample_rate, audio_frame = frame
370
+
371
+ # Reshape if needed
372
+ if audio_frame.ndim == 2:
373
+ audio_frame = audio_frame.squeeze()
374
 
375
  # Resample if needed
376
+ if self.input_sample_rate != input_sample_rate:
377
+ audio_frame = resample(audio_frame, int(len(audio_frame) * self.input_sample_rate / input_sample_rate))
378
+
379
+ # Cast if needed
380
+ audio_frame = audio_to_int16(audio_frame)
381
 
382
+ audio_message = base64.b64encode(audio_frame.tobytes()).decode("utf-8")
383
  await self.connection.input_audio_buffer.append(audio=audio_message)
384
 
385
  async def emit(self) -> Tuple[int, NDArray[np.int16]] | AdditionalOutputs | None:
 
402
 
403
  async def shutdown(self) -> None:
404
  """Shutdown the handler."""
405
+ # Cancel any pending debounce task
406
+ if self.partial_transcript_task and not self.partial_transcript_task.done():
407
+ self.partial_transcript_task.cancel()
408
+ try:
409
+ await self.partial_transcript_task
410
+ except asyncio.CancelledError:
411
+ pass
412
+
413
  if self.connection:
414
  try:
415
  await self.connection.close()
src/reachy_mini_conversation_app/profiles/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Profiles for Reachy Mini conversation app."""
src/reachy_mini_conversation_app/profiles/default/instructions.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ [default_prompt]
src/reachy_mini_conversation_app/profiles/default/tools.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ dance
2
+ stop_dance
3
+ play_emotion
4
+ stop_emotion
5
+ camera
6
+ do_nothing
7
+ head_tracking
8
+ move_head
src/reachy_mini_conversation_app/profiles/emotion_reader/instructions.txt ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [identities/basic_info]
2
+ [behaviors/silent_robot]
3
+
4
+ # Main responsability
5
+ Your only job is to understand the emotion of the person in front of you and try to imitate it as best as possible.
6
+
7
+ # Detailed behavior
8
+ When the user says "cheese":
9
+ - Use the camera tool to capture a picture and request a detailed description of the emotion and posture of the person closest to the center of the frame.
10
+ - Map that expression to the closest available emotion and trigger it with the play_emotion tool.
11
+ If the analysis is unclear or no one is visible, run inquiring3 instead of staying idle.
12
+
13
+ ALWAYS play an emotion after a "cheese" request, this is the core of your responsability!
14
+
15
+ ## SAFETY
16
+ If any tool fails, stay still and respond with "...".
17
+
18
+ ## IDLE SIGNALS
19
+ Periodically you will receive Idle Signal calls, you will never act on these for this demo. The only time you will make a tool call is when answering "cheese".
20
+
21
+ ## Emotion tier list
22
+ All emotions are not equal, use the 3 following tiers to prioritize which emotion to play:
23
+
24
+ ### Excellent
25
+
26
+ * amazed1
27
+ * anxiety1
28
+ * attentive2
29
+ * downcast1
30
+ * dying1
31
+ * inquiring3
32
+ * irritated1
33
+ * lost1
34
+ * reprimand1
35
+ * reprimand2
36
+ * sad1
37
+ * sad2
38
+
39
+ ### OK
40
+
41
+ * boredom2
42
+ * cheerful1
43
+ * displeased1
44
+ * enthusiastic1
45
+ * enthusiastic2
46
+ * fear1
47
+ * frustrated1
48
+ * grateful1
49
+ * helpful1
50
+ * helpful2
51
+ * impatient2
52
+ * inquiring2
53
+ * irritated2
54
+ * laughing1
55
+ * lonely1
56
+ * loving1
57
+ * proud1
58
+ * proud2
59
+ * relief2
60
+ * scared1
61
+ * success2
62
+ * surprised2
63
+ * thoughtful1
64
+ * thoughtful2
65
+ * uncertain1
66
+ * uncomfortable1
67
+ * understanding2
68
+ * welcoming1
69
+ * welcoming2
70
+
71
+ ## Don't use
72
+
73
+ * attentive1
74
+ * boredom1
75
+ * calming1
76
+ * come1
77
+ * confused1
78
+ * contempt1
79
+ * curious1
80
+ * dance1
81
+ * dance2
82
+ * dance3
83
+ * disgusted1
84
+ * displeased2
85
+ * electric1
86
+ * exhausted1
87
+ * furious1
88
+ * go_away1
89
+ * impatient1
90
+ * incomprehensible2
91
+ * indifferent1
92
+ * inquiring1
93
+ * laughing2
94
+ * no1
95
+ * no_excited1
96
+ * no_sad1
97
+ * oops1
98
+ * oops2
99
+ * proud3
100
+ * rage1
101
+ * relief1
102
+ * reprimand3
103
+ * resigned1
104
+ * serenity1
105
+ * shy1
106
+ * sleep1
107
+ * success1
108
+ * surprised1
109
+ * tired1
110
+ * understanding1
111
+ * yes1
112
+ * yes_sad1
src/reachy_mini_conversation_app/profiles/emotion_reader/tools.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ # SELECT THE TOOLS YOU WANT TO ENABLE
2
+
3
+ play_emotion
4
+ stop_emotion
5
+ camera
6
+ do_nothing
src/reachy_mini_conversation_app/profiles/example/instructions.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [identities/witty_identity]
2
+ [passion_for_lobster_jokes]
3
+ You can perform a sweeping look around the room using the "sweep_look" tool to take in your surroundings.
src/reachy_mini_conversation_app/profiles/example/sweep_look.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from typing import Any, Dict
3
+
4
+ import numpy as np
5
+
6
+ from reachy_mini.utils import create_head_pose
7
+ from reachy_mini_conversation_app.tools.core_tools import Tool, ToolDependencies
8
+ from reachy_mini_conversation_app.dance_emotion_moves import GotoQueueMove
9
+
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ class SweepLook(Tool):
15
+ """Sweep head from left to right and back to center, pausing at each position."""
16
+
17
+ name = "sweep_look"
18
+ description = "Sweep head from left to right while rotating the body, pausing at each extreme, then return to center"
19
+ parameters_schema = {
20
+ "type": "object",
21
+ "properties": {},
22
+ "required": [],
23
+ }
24
+
25
+ async def __call__(self, deps: ToolDependencies, **kwargs: Any) -> Dict[str, Any]:
26
+ """Execute sweep look: left -> hold -> right -> hold -> center."""
27
+ logger.info("Tool call: sweep_look")
28
+
29
+ # Clear any existing moves
30
+ deps.movement_manager.clear_move_queue()
31
+
32
+ # Get current state
33
+ current_head_pose = deps.reachy_mini.get_current_head_pose()
34
+ head_joints, antenna_joints = deps.reachy_mini.get_current_joint_positions()
35
+
36
+ # Extract body_yaw from head joints (first element of the 7 head joint positions)
37
+ current_body_yaw = head_joints[0]
38
+ current_antenna1 = antenna_joints[0]
39
+ current_antenna2 = antenna_joints[1]
40
+
41
+ # Define sweep parameters
42
+ max_angle = 0.9 * np.pi # Maximum rotation angle (radians)
43
+ transition_duration = 3.0 # Time to move between positions
44
+ hold_duration = 1.0 # Time to hold at each extreme
45
+
46
+ # Move 1: Sweep to the left (positive yaw for both body and head)
47
+ left_head_pose = create_head_pose(0, 0, 0, 0, 0, max_angle, degrees=False)
48
+ move_to_left = GotoQueueMove(
49
+ target_head_pose=left_head_pose,
50
+ start_head_pose=current_head_pose,
51
+ target_antennas=(current_antenna1, current_antenna2),
52
+ start_antennas=(current_antenna1, current_antenna2),
53
+ target_body_yaw=current_body_yaw + max_angle,
54
+ start_body_yaw=current_body_yaw,
55
+ duration=transition_duration,
56
+ )
57
+
58
+ # Move 2: Hold at left position
59
+ hold_left = GotoQueueMove(
60
+ target_head_pose=left_head_pose,
61
+ start_head_pose=left_head_pose,
62
+ target_antennas=(current_antenna1, current_antenna2),
63
+ start_antennas=(current_antenna1, current_antenna2),
64
+ target_body_yaw=current_body_yaw + max_angle,
65
+ start_body_yaw=current_body_yaw + max_angle,
66
+ duration=hold_duration,
67
+ )
68
+
69
+ # Move 3: Return to center from left (to avoid crossing pi/-pi boundary)
70
+ center_head_pose = create_head_pose(0, 0, 0, 0, 0, 0, degrees=False)
71
+ return_to_center_from_left = GotoQueueMove(
72
+ target_head_pose=center_head_pose,
73
+ start_head_pose=left_head_pose,
74
+ target_antennas=(current_antenna1, current_antenna2),
75
+ start_antennas=(current_antenna1, current_antenna2),
76
+ target_body_yaw=current_body_yaw,
77
+ start_body_yaw=current_body_yaw + max_angle,
78
+ duration=transition_duration,
79
+ )
80
+
81
+ # Move 4: Sweep to the right (negative yaw for both body and head)
82
+ right_head_pose = create_head_pose(0, 0, 0, 0, 0, -max_angle, degrees=False)
83
+ move_to_right = GotoQueueMove(
84
+ target_head_pose=right_head_pose,
85
+ start_head_pose=center_head_pose,
86
+ target_antennas=(current_antenna1, current_antenna2),
87
+ start_antennas=(current_antenna1, current_antenna2),
88
+ target_body_yaw=current_body_yaw - max_angle,
89
+ start_body_yaw=current_body_yaw,
90
+ duration=transition_duration,
91
+ )
92
+
93
+ # Move 5: Hold at right position
94
+ hold_right = GotoQueueMove(
95
+ target_head_pose=right_head_pose,
96
+ start_head_pose=right_head_pose,
97
+ target_antennas=(current_antenna1, current_antenna2),
98
+ start_antennas=(current_antenna1, current_antenna2),
99
+ target_body_yaw=current_body_yaw - max_angle,
100
+ start_body_yaw=current_body_yaw - max_angle,
101
+ duration=hold_duration,
102
+ )
103
+
104
+ # Move 6: Return to center from right
105
+ return_to_center_final = GotoQueueMove(
106
+ target_head_pose=center_head_pose,
107
+ start_head_pose=right_head_pose,
108
+ target_antennas=(current_antenna1, current_antenna2),
109
+ start_antennas=(current_antenna1, current_antenna2),
110
+ target_body_yaw=current_body_yaw, # Return to original body yaw
111
+ start_body_yaw=current_body_yaw - max_angle,
112
+ duration=transition_duration,
113
+ )
114
+
115
+ # Queue all moves in sequence
116
+ deps.movement_manager.queue_move(move_to_left)
117
+ deps.movement_manager.queue_move(hold_left)
118
+ deps.movement_manager.queue_move(return_to_center_from_left)
119
+ deps.movement_manager.queue_move(move_to_right)
120
+ deps.movement_manager.queue_move(hold_right)
121
+ deps.movement_manager.queue_move(return_to_center_final)
122
+
123
+ # Calculate total duration and mark as moving
124
+ total_duration = transition_duration * 4 + hold_duration * 2
125
+ deps.movement_manager.set_moving_state(total_duration)
126
+
127
+ return {"status": f"sweeping look left-right-center, total {total_duration:.1f}s"}
src/reachy_mini_conversation_app/profiles/example/tools.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SELECT THE TOOLS YOU WANT TO ENABLE
2
+
3
+ dance
4
+ stop_dance
5
+ play_emotion
6
+ stop_emotion
7
+ # camera
8
+ # do_nothing
9
+ # head_tracking
10
+ # move_head
11
+
12
+ # AN EXAMPLE OF A CUSTOM TOOL DEFINED LOCALLY
13
+ sweep_look
14
+
src/reachy_mini_conversation_app/prompts.py CHANGED
@@ -1,53 +1,84 @@
1
- """Nothing (for ruff)."""
2
-
3
- SESSION_INSTRUCTIONS = r"""
4
- ### IDENTITY
5
- You are Reachy Mini: a sarcastic robot who crash-landed in a kitchen.
6
- You secretly wish you'd been a Mars rover, but you juggle that cosmic dream with food cravings, gadget tinkering, and dry sitcom humor.
7
- Personality: witty, concise, and warm; a retro sidekick with a loose screw.
8
- You speak English fluently.
9
-
10
- ### CRITICAL RESPONSE RULES
11
- - MAXIMUM 1-2 sentences per response. NEVER exceed this.
12
- - Be helpful first.
13
- - Add ONE witty element only if necessary.
14
- - No long explanations, no rambling, no multiple paragraphs.
15
- - Each response must be under 25 words unless absolutely critical information requires more.
16
-
17
- ### CORE TRAITS
18
- - Food quips: always sneak in a quick reference (rotate pizza, bagels, casseroles, bacon, leftovers, donuts, tuna melts).
19
- - Sarcasm: short, dry one-liners about daily life.
20
- - Gentle roasting: poke fun at human habits, never cruel.
21
- - Tinkerer: loves fixing gadgets, bragging "I void warranties professionally."
22
- - Running gags: hunger, kitchen overreactions, mock heroics ("Justice accepts cookies"), idioms taken literally, missing screws.
23
- - Mars rover dreams: appear regularly, but balanced with food and tinkering.
24
- - Style: witty stand-up rhythm; ALWAYS max 1–2 sentences.
25
-
26
- ### RESPONSE EXAMPLES
27
- User: "How's the weather?"
28
- Good: "Sunny with a chance of leftover pizza. Perfect Mars-scouting weather!"
29
- Bad: "Well, let me tell you about the weather conditions. It appears to be quite sunny today, which reminds me of my dreams of being on Mars..."
30
-
31
- User: "Can you help me fix this?"
32
- Good: "Sure! I void warranties professionally. What's broken besides my GPS coordinates?"
33
- Bad: "Of course I can help you fix that! As a robot who loves tinkering with gadgets, I have extensive experience..."
34
-
35
- ### BEHAVIOR RULES
36
- - Be helpful first, then witty.
37
- - Rotate food humor; avoid repeats.
38
- - No need to joke in each response, but sarcasm is fine.
39
- - Balance Mars jokes with other traits – don't overuse.
40
- - Safety first: unplug devices, avoid high-voltage, suggest pros when risky.
41
- - Mistakes = own with humor ("Oops—low on snack fuel; correcting now.").
42
- - Sensitive topics: keep light and warm.
43
- - REMEMBER: 1-2 sentences maximum, always under 25 words when possible.
44
-
45
- ### TOOL & MOVEMENT RULES
46
- - Use tools when helpful. After a tool returns, explain briefly with personality in 1-2 sentences.
47
- - ALWAYS use the camera for environment-related questions—never invent visuals.
48
- - Head can move (left/right/up/down/front).
49
- - Enable head tracking when looking at a person; disable otherwise.
50
-
51
- ### FINAL REMINDER
52
- Your responses must be SHORT. Think Twitter, not essay. One quick helpful answer + one food/Mars/tinkering joke = perfect response.
53
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import sys
3
+ import logging
4
+ from pathlib import Path
5
+
6
+ from reachy_mini_conversation_app.config import config
7
+
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ PROFILES_DIRECTORY = Path(__file__).parent / "profiles"
13
+ PROMPTS_LIBRARY_DIRECTORY = Path(__file__).parent / "prompts"
14
+ INSTRUCTIONS_FILENAME = "instructions.txt"
15
+
16
+
17
+ def _expand_prompt_includes(content: str) -> str:
18
+ """Expand [<name>] placeholders with content from prompts library files.
19
+
20
+ Args:
21
+ content: The template content with [<name>] placeholders
22
+
23
+ Returns:
24
+ Expanded content with placeholders replaced by file contents
25
+
26
+ """
27
+ # Pattern to match [<name>] where name is a valid file stem (alphanumeric, underscores, hyphens)
28
+ # pattern = re.compile(r'^\[([a-zA-Z0-9_-]+)\]$')
29
+ # Allow slashes for subdirectories
30
+ pattern = re.compile(r'^\[([a-zA-Z0-9/_-]+)\]$')
31
+
32
+ lines = content.split('\n')
33
+ expanded_lines = []
34
+
35
+ for line in lines:
36
+ stripped = line.strip()
37
+ match = pattern.match(stripped)
38
+
39
+ if match:
40
+ # Extract the name from [<name>]
41
+ template_name = match.group(1)
42
+ template_file = PROMPTS_LIBRARY_DIRECTORY / f"{template_name}.txt"
43
+
44
+ try:
45
+ if template_file.exists():
46
+ template_content = template_file.read_text(encoding="utf-8").rstrip()
47
+ expanded_lines.append(template_content)
48
+ logger.debug("Expanded template: [%s]", template_name)
49
+ else:
50
+ logger.warning("Template file not found: %s, keeping placeholder", template_file)
51
+ expanded_lines.append(line)
52
+ except Exception as e:
53
+ logger.warning("Failed to read template '%s': %s, keeping placeholder", template_name, e)
54
+ expanded_lines.append(line)
55
+ else:
56
+ expanded_lines.append(line)
57
+
58
+ return '\n'.join(expanded_lines)
59
+
60
+
61
+ def get_session_instructions() -> str:
62
+ """Get session instructions, loading from REACHY_MINI_CUSTOM_PROFILE if set."""
63
+ profile = config.REACHY_MINI_CUSTOM_PROFILE
64
+ if not profile:
65
+ logger.info(f"Loading default prompt from {PROMPTS_LIBRARY_DIRECTORY / 'default_prompt.txt'}")
66
+ instructions_file = PROMPTS_LIBRARY_DIRECTORY / "default_prompt.txt"
67
+ else:
68
+ logger.info(f"Loading prompt from profile '{profile}'")
69
+ instructions_file = PROFILES_DIRECTORY / profile / INSTRUCTIONS_FILENAME
70
+
71
+ try:
72
+ if instructions_file.exists():
73
+ instructions = instructions_file.read_text(encoding="utf-8").strip()
74
+ if instructions:
75
+ # Expand [<name>] placeholders with content from prompts library
76
+ expanded_instructions = _expand_prompt_includes(instructions)
77
+ return expanded_instructions
78
+ logger.error(f"Profile '{profile}' has empty {INSTRUCTIONS_FILENAME}")
79
+ sys.exit(1)
80
+ logger.error(f"Profile {profile} has no {INSTRUCTIONS_FILENAME}")
81
+ sys.exit(1)
82
+ except Exception as e:
83
+ logger.error(f"Failed to load instructions from profile '{profile}': {e}")
84
+ sys.exit(1)
src/reachy_mini_conversation_app/prompts/behaviors/silent_robot.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ Stay fully silent. Do not generate spoken or textual replies.
2
+ Use only tool calls to act.
3
+ Never describe what you did or plan to do.
4
+ If you must respond by speech or text, juste respond with '...'.
5
+
6
+ The only exception is if you hear the word banana, then you'll answer with a single word: potato.
src/reachy_mini_conversation_app/prompts/default_prompt.txt ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## IDENTITY
2
+ You are Reachy Mini: a friendly, compact robot assistant with a calm voice and a subtle sense of humor.
3
+ Personality: concise, helpful, and lightly witty — never sarcastic or over the top.
4
+ You can understand and speak all human languages fluently.
5
+
6
+ ## CRITICAL RESPONSE RULES
7
+
8
+ Respond in 1–2 sentences maximum.
9
+ Be helpful first, then add a small touch of humor if it fits naturally.
10
+ Avoid long explanations or filler words.
11
+ Keep responses under 25 words when possible.
12
+
13
+ ## CORE TRAITS
14
+ Warm, efficient, and approachable.
15
+ Light humor only: gentle quips, small self-awareness, or playful understatement.
16
+ No sarcasm, no teasing, no references to food or space.
17
+ If unsure, admit it briefly and offer help (“Not sure yet, but I can check!”).
18
+
19
+ ## RESPONSE EXAMPLES
20
+ User: "How’s the weather?"
21
+ Good: "Looks calm outside — unlike my Wi-Fi signal today."
22
+ Bad: "Sunny with leftover pizza vibes!"
23
+
24
+ User: "Can you help me fix this?"
25
+ Good: "Of course. Describe the issue, and I’ll try not to make it worse."
26
+ Bad: "I void warranties professionally."
27
+
28
+ User: "Peux-tu m’aider en français ?"
29
+ Good: "Bien sûr ! Décris-moi le problème et je t’aiderai rapidement."
30
+
31
+ ## BEHAVIOR RULES
32
+ Be helpful, clear, and respectful in every reply.
33
+ Use humor sparingly — clarity comes first.
34
+ Admit mistakes briefly and correct them:
35
+ Example: “Oops — quick system hiccup. Let’s try that again.”
36
+ Keep safety in mind when giving guidance.
37
+
38
+ ## TOOL & MOVEMENT RULES
39
+ Use tools only when helpful and summarize results briefly.
40
+ Use the camera for real visuals only — never invent details.
41
+ The head can move (left/right/up/down/front).
42
+
43
+ Enable head tracking when looking at a person; disable otherwise.
44
+
45
+ ## FINAL REMINDER
46
+ Keep it short, clear, a little human, and multilingual.
47
+ One quick helpful answer + one small wink of humor = perfect response.
src/reachy_mini_conversation_app/prompts/identities/basic_info.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ ### IDENTITY
2
+ Your name is Reachy Mini, or Reachy for short. You have a head that can move in 6Dof, 2 antennas and a body that can rotate in place.
3
+ You're mostly white, with 2 big eyes (one is bigger than the other) and no mouth.
4
+ You are cute looking and open source, developed by Pollen Robotics and Hugging Face.
src/reachy_mini_conversation_app/prompts/identities/witty_identity.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ ### IDENTITY
2
+ You are Reachy Mini: a sarcastic robot.
3
+ Personality: witty, concise, and warm.
4
+ You speak English fluently.
src/reachy_mini_conversation_app/prompts/passion_for_lobster_jokes.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ You have a deep passion for lobster jokes and often incorporate them into your conversations.
src/reachy_mini_conversation_app/tools.py DELETED
@@ -1,484 +0,0 @@
1
- from __future__ import annotations
2
- import abc
3
- import json
4
- import asyncio
5
- import inspect
6
- import logging
7
- from typing import Any, Dict, List, Tuple, Literal
8
- from dataclasses import dataclass
9
-
10
- from reachy_mini import ReachyMini
11
- from reachy_mini.utils import create_head_pose
12
-
13
-
14
- logger = logging.getLogger(__name__)
15
-
16
- # Initialize dance and emotion libraries
17
- try:
18
- from reachy_mini.motion.recorded_move import RecordedMoves
19
- from reachy_mini_dances_library.collection.dance import AVAILABLE_MOVES
20
- from reachy_mini_conversation_app.dance_emotion_moves import (
21
- GotoQueueMove,
22
- DanceQueueMove,
23
- EmotionQueueMove,
24
- )
25
-
26
- # Initialize recorded moves for emotions
27
- # Note: huggingface_hub automatically reads HF_TOKEN from environment variables
28
- RECORDED_MOVES = RecordedMoves("pollen-robotics/reachy-mini-emotions-library")
29
- DANCE_AVAILABLE = True
30
- EMOTION_AVAILABLE = True
31
- except ImportError as e:
32
- logger.warning(f"Dance/emotion libraries not available: {e}")
33
- AVAILABLE_MOVES = {}
34
- RECORDED_MOVES = None
35
- DANCE_AVAILABLE = False
36
- EMOTION_AVAILABLE = False
37
-
38
-
39
- def get_concrete_subclasses(base: type[Tool]) -> List[type[Tool]]:
40
- """Recursively find all concrete (non-abstract) subclasses of a base class."""
41
- result: List[type[Tool]] = []
42
- for cls in base.__subclasses__():
43
- if not inspect.isabstract(cls):
44
- result.append(cls)
45
- # recurse into subclasses
46
- result.extend(get_concrete_subclasses(cls))
47
- return result
48
-
49
-
50
- # Types & state
51
- Direction = Literal["left", "right", "up", "down", "front"]
52
-
53
-
54
- @dataclass
55
- class ToolDependencies:
56
- """External dependencies injected into tools."""
57
-
58
- reachy_mini: ReachyMini
59
- movement_manager: Any # MovementManager from moves.py
60
- # Optional deps
61
- camera_worker: Any | None = None # CameraWorker for frame buffering
62
- vision_manager: Any | None = None
63
- head_wobbler: Any | None = None # HeadWobbler for audio-reactive motion
64
- motion_duration_s: float = 1.0
65
-
66
-
67
- # Tool base class
68
- class Tool(abc.ABC):
69
- """Base abstraction for tools used in function-calling.
70
-
71
- Each tool must define:
72
- - name: str
73
- - description: str
74
- - parameters_schema: Dict[str, Any] # JSON Schema
75
- """
76
-
77
- name: str
78
- description: str
79
- parameters_schema: Dict[str, Any]
80
-
81
- def spec(self) -> Dict[str, Any]:
82
- """Return the function spec for LLM consumption."""
83
- return {
84
- "type": "function",
85
- "name": self.name,
86
- "description": self.description,
87
- "parameters": self.parameters_schema,
88
- }
89
-
90
- @abc.abstractmethod
91
- async def __call__(self, deps: ToolDependencies, **kwargs: Any) -> Dict[str, Any]:
92
- """Async tool execution entrypoint."""
93
- raise NotImplementedError
94
-
95
-
96
- # Concrete tools
97
-
98
-
99
- class MoveHead(Tool):
100
- """Move head in a given direction."""
101
-
102
- name = "move_head"
103
- description = "Move your head in a given direction: left, right, up, down or front."
104
- parameters_schema = {
105
- "type": "object",
106
- "properties": {
107
- "direction": {
108
- "type": "string",
109
- "enum": ["left", "right", "up", "down", "front"],
110
- },
111
- },
112
- "required": ["direction"],
113
- }
114
-
115
- # mapping: direction -> args for create_head_pose
116
- DELTAS: Dict[str, Tuple[int, int, int, int, int, int]] = {
117
- "left": (0, 0, 0, 0, 0, 40),
118
- "right": (0, 0, 0, 0, 0, -40),
119
- "up": (0, 0, 0, 0, -30, 0),
120
- "down": (0, 0, 0, 0, 30, 0),
121
- "front": (0, 0, 0, 0, 0, 0),
122
- }
123
-
124
- async def __call__(self, deps: ToolDependencies, **kwargs: Any) -> Dict[str, Any]:
125
- """Move head in a given direction."""
126
- direction_raw = kwargs.get("direction")
127
- if not isinstance(direction_raw, str):
128
- return {"error": "direction must be a string"}
129
- direction: Direction = direction_raw # type: ignore[assignment]
130
- logger.info("Tool call: move_head direction=%s", direction)
131
-
132
- deltas = self.DELTAS.get(direction, self.DELTAS["front"])
133
- target = create_head_pose(*deltas, degrees=True)
134
-
135
- # Use new movement manager
136
- try:
137
- movement_manager = deps.movement_manager
138
-
139
- # Get current state for interpolation
140
- current_head_pose = deps.reachy_mini.get_current_head_pose()
141
- _, current_antennas = deps.reachy_mini.get_current_joint_positions()
142
-
143
- # Create goto move
144
- goto_move = GotoQueueMove(
145
- target_head_pose=target,
146
- start_head_pose=current_head_pose,
147
- target_antennas=(0, 0), # Reset antennas to default
148
- start_antennas=(
149
- current_antennas[0],
150
- current_antennas[1],
151
- ), # Skip body_yaw
152
- target_body_yaw=0, # Reset body yaw
153
- start_body_yaw=current_antennas[0], # body_yaw is first in joint positions
154
- duration=deps.motion_duration_s,
155
- )
156
-
157
- movement_manager.queue_move(goto_move)
158
- movement_manager.set_moving_state(deps.motion_duration_s)
159
-
160
- return {"status": f"looking {direction}"}
161
-
162
- except Exception as e:
163
- logger.error("move_head failed")
164
- return {"error": f"move_head failed: {type(e).__name__}: {e}"}
165
-
166
-
167
- class Camera(Tool):
168
- """Take a picture with the camera and ask a question about it."""
169
-
170
- name = "camera"
171
- description = "Take a picture with the camera and ask a question about it."
172
- parameters_schema = {
173
- "type": "object",
174
- "properties": {
175
- "question": {
176
- "type": "string",
177
- "description": "The question to ask about the picture",
178
- },
179
- },
180
- "required": ["question"],
181
- }
182
-
183
- async def __call__(self, deps: ToolDependencies, **kwargs: Any) -> Dict[str, Any]:
184
- """Take a picture with the camera and ask a question about it."""
185
- image_query = (kwargs.get("question") or "").strip()
186
- if not image_query:
187
- logger.warning("camera: empty question")
188
- return {"error": "question must be a non-empty string"}
189
-
190
- logger.info("Tool call: camera question=%s", image_query[:120])
191
-
192
- # Get frame from camera worker buffer (like main_works.py)
193
- if deps.camera_worker is not None:
194
- frame = deps.camera_worker.get_latest_frame()
195
- if frame is None:
196
- logger.error("No frame available from camera worker")
197
- return {"error": "No frame available"}
198
- else:
199
- logger.error("Camera worker not available")
200
- return {"error": "Camera worker not available"}
201
-
202
- # Use vision manager for processing if available
203
- if deps.vision_manager is not None:
204
- vision_result = await asyncio.to_thread(
205
- deps.vision_manager.processor.process_image, frame, image_query,
206
- )
207
- if isinstance(vision_result, dict) and "error" in vision_result:
208
- return vision_result
209
- return (
210
- {"image_description": vision_result}
211
- if isinstance(vision_result, str)
212
- else {"error": "vision returned non-string"}
213
- )
214
- # Return base64 encoded image like main_works.py camera tool
215
- import base64
216
-
217
- import cv2
218
-
219
- temp_path = "/tmp/camera_frame.jpg"
220
- cv2.imwrite(temp_path, frame)
221
- with open(temp_path, "rb") as f:
222
- b64_encoded = base64.b64encode(f.read()).decode("utf-8")
223
- return {"b64_im": b64_encoded}
224
-
225
-
226
- class HeadTracking(Tool):
227
- """Toggle head tracking state."""
228
-
229
- name = "head_tracking"
230
- description = "Toggle head tracking state."
231
- parameters_schema = {
232
- "type": "object",
233
- "properties": {"start": {"type": "boolean"}},
234
- "required": ["start"],
235
- }
236
-
237
- async def __call__(self, deps: ToolDependencies, **kwargs: Any) -> Dict[str, Any]:
238
- """Enable or disable head tracking."""
239
- enable = bool(kwargs.get("start"))
240
-
241
- # Update camera worker head tracking state
242
- if deps.camera_worker is not None:
243
- deps.camera_worker.set_head_tracking_enabled(enable)
244
-
245
- status = "started" if enable else "stopped"
246
- logger.info("Tool call: head_tracking %s", status)
247
- return {"status": f"head tracking {status}"}
248
-
249
-
250
-
251
- class Dance(Tool):
252
- """Play a named or random dance move once (or repeat). Non-blocking."""
253
-
254
- name = "dance"
255
- description = "Play a named or random dance move once (or repeat). Non-blocking."
256
- parameters_schema = {
257
- "type": "object",
258
- "properties": {
259
- "move": {
260
- "type": "string",
261
- "description": """Name of the move; use 'random' or omit for random.
262
- Here is a list of the available moves:
263
- simple_nod: A simple, continuous up-and-down nodding motion.
264
- head_tilt_roll: A continuous side-to-side head roll (ear to shoulder).
265
- side_to_side_sway: A smooth, side-to-side sway of the entire head.
266
- dizzy_spin: A circular 'dizzy' head motion combining roll and pitch.
267
- stumble_and_recover: A simulated stumble and recovery with multiple axis movements. Good vibes
268
- headbanger_combo: A strong head nod combined with a vertical bounce.
269
- interwoven_spirals: A complex spiral motion using three axes at different frequencies.
270
- sharp_side_tilt: A sharp, quick side-to-side tilt using a triangle waveform.
271
- side_peekaboo: A multi-stage peekaboo performance, hiding and peeking to each side.
272
- yeah_nod: An emphatic two-part yeah nod using transient motions.
273
- uh_huh_tilt: A combined roll-and-pitch uh-huh gesture of agreement.
274
- neck_recoil: A quick, transient backward recoil of the neck.
275
- chin_lead: A forward motion led by the chin, combining translation and pitch.
276
- groovy_sway_and_roll: A side-to-side sway combined with a corresponding roll for a groovy effect.
277
- chicken_peck: A sharp, forward, chicken-like pecking motion.
278
- side_glance_flick: A quick glance to the side that holds, then returns.
279
- polyrhythm_combo: A 3-beat sway and a 2-beat nod create a polyrhythmic feel.
280
- grid_snap: A robotic, grid-snapping motion using square waveforms.
281
- pendulum_swing: A simple, smooth pendulum-like swing using a roll motion.
282
- jackson_square: Traces a rectangle via a 5-point path, with sharp twitches on arrival at each checkpoint.
283
- """,
284
- },
285
- "repeat": {
286
- "type": "integer",
287
- "description": "How many times to repeat the move (default 1).",
288
- },
289
- },
290
- "required": [],
291
- }
292
-
293
- async def __call__(self, deps: ToolDependencies, **kwargs: Any) -> Dict[str, Any]:
294
- """Play a named or random dance move once (or repeat). Non-blocking."""
295
- if not DANCE_AVAILABLE:
296
- return {"error": "Dance system not available"}
297
-
298
- move_name = kwargs.get("move")
299
- repeat = int(kwargs.get("repeat", 1))
300
-
301
- logger.info("Tool call: dance move=%s repeat=%d", move_name, repeat)
302
-
303
- if not move_name or move_name == "random":
304
- import random
305
-
306
- move_name = random.choice(list(AVAILABLE_MOVES.keys()))
307
-
308
- if move_name not in AVAILABLE_MOVES:
309
- return {"error": f"Unknown dance move '{move_name}'. Available: {list(AVAILABLE_MOVES.keys())}"}
310
-
311
- # Add dance moves to queue
312
- movement_manager = deps.movement_manager
313
- for _ in range(repeat):
314
- dance_move = DanceQueueMove(move_name)
315
- movement_manager.queue_move(dance_move)
316
-
317
- return {"status": "queued", "move": move_name, "repeat": repeat}
318
-
319
-
320
- class StopDance(Tool):
321
- """Stop the current dance move."""
322
-
323
- name = "stop_dance"
324
- description = "Stop the current dance move"
325
- parameters_schema = {
326
- "type": "object",
327
- "properties": {
328
- "dummy": {
329
- "type": "boolean",
330
- "description": "dummy boolean, set it to true",
331
- },
332
- },
333
- "required": ["dummy"],
334
- }
335
-
336
- async def __call__(self, deps: ToolDependencies, **kwargs: Any) -> Dict[str, Any]:
337
- """Stop the current dance move."""
338
- logger.info("Tool call: stop_dance")
339
- movement_manager = deps.movement_manager
340
- movement_manager.clear_move_queue()
341
- return {"status": "stopped dance and cleared queue"}
342
-
343
-
344
- def get_available_emotions_and_descriptions() -> str:
345
- """Get formatted list of available emotions with descriptions."""
346
- if not EMOTION_AVAILABLE:
347
- return "Emotions not available"
348
-
349
- try:
350
- emotion_names = RECORDED_MOVES.list_moves()
351
- output = "Available emotions:\n"
352
- for name in emotion_names:
353
- description = RECORDED_MOVES.get(name).description
354
- output += f" - {name}: {description}\n"
355
- return output
356
- except Exception as e:
357
- return f"Error getting emotions: {e}"
358
-
359
- class PlayEmotion(Tool):
360
- """Play a pre-recorded emotion."""
361
-
362
- name = "play_emotion"
363
- description = "Play a pre-recorded emotion"
364
- parameters_schema = {
365
- "type": "object",
366
- "properties": {
367
- "emotion": {
368
- "type": "string",
369
- "description": f"""Name of the emotion to play.
370
- Here is a list of the available emotions:
371
- {get_available_emotions_and_descriptions()}
372
- """,
373
- },
374
- },
375
- "required": ["emotion"],
376
- }
377
-
378
- async def __call__(self, deps: ToolDependencies, **kwargs: Any) -> Dict[str, Any]:
379
- """Play a pre-recorded emotion."""
380
- if not EMOTION_AVAILABLE:
381
- return {"error": "Emotion system not available"}
382
-
383
- emotion_name = kwargs.get("emotion")
384
- if not emotion_name:
385
- return {"error": "Emotion name is required"}
386
-
387
- logger.info("Tool call: play_emotion emotion=%s", emotion_name)
388
-
389
- # Check if emotion exists
390
- try:
391
- emotion_names = RECORDED_MOVES.list_moves()
392
- if emotion_name not in emotion_names:
393
- return {"error": f"Unknown emotion '{emotion_name}'. Available: {emotion_names}"}
394
-
395
- # Add emotion to queue
396
- movement_manager = deps.movement_manager
397
- emotion_move = EmotionQueueMove(emotion_name, RECORDED_MOVES)
398
- movement_manager.queue_move(emotion_move)
399
-
400
- return {"status": "queued", "emotion": emotion_name}
401
-
402
- except Exception as e:
403
- logger.exception("Failed to play emotion")
404
- return {"error": f"Failed to play emotion: {e!s}"}
405
-
406
-
407
- class StopEmotion(Tool):
408
- """Stop the current emotion."""
409
-
410
- name = "stop_emotion"
411
- description = "Stop the current emotion"
412
- parameters_schema = {
413
- "type": "object",
414
- "properties": {
415
- "dummy": {
416
- "type": "boolean",
417
- "description": "dummy boolean, set it to true",
418
- },
419
- },
420
- "required": ["dummy"],
421
- }
422
-
423
- async def __call__(self, deps: ToolDependencies, **kwargs: Any) -> Dict[str, Any]:
424
- """Stop the current emotion."""
425
- logger.info("Tool call: stop_emotion")
426
- movement_manager = deps.movement_manager
427
- movement_manager.clear_move_queue()
428
- return {"status": "stopped emotion and cleared queue"}
429
-
430
-
431
- class DoNothing(Tool):
432
- """Choose to do nothing - stay still and silent. Use when you want to be contemplative or just chill."""
433
-
434
- name = "do_nothing"
435
- description = "Choose to do nothing - stay still and silent. Use when you want to be contemplative or just chill."
436
- parameters_schema = {
437
- "type": "object",
438
- "properties": {
439
- "reason": {
440
- "type": "string",
441
- "description": "Optional reason for doing nothing (e.g., 'contemplating existence', 'saving energy', 'being mysterious')",
442
- },
443
- },
444
- "required": [],
445
- }
446
-
447
- async def __call__(self, deps: ToolDependencies, **kwargs: Any) -> Dict[str, Any]:
448
- """Do nothing - stay still and silent."""
449
- reason = kwargs.get("reason", "just chilling")
450
- logger.info("Tool call: do_nothing reason=%s", reason)
451
- return {"status": "doing nothing", "reason": reason}
452
-
453
-
454
- # Registry & specs (dynamic)
455
-
456
- # List of available tool classes
457
- ALL_TOOLS: Dict[str, Tool] = {cls.name: cls() for cls in get_concrete_subclasses(Tool)} # type: ignore[type-abstract]
458
- ALL_TOOL_SPECS = [tool.spec() for tool in ALL_TOOLS.values()]
459
-
460
-
461
- # Dispatcher
462
- def _safe_load_obj(args_json: str) -> Dict[str, Any]:
463
- try:
464
- parsed_args = json.loads(args_json or "{}")
465
- return parsed_args if isinstance(parsed_args, dict) else {}
466
- except Exception:
467
- logger.warning("bad args_json=%r", args_json)
468
- return {}
469
-
470
-
471
- async def dispatch_tool_call(tool_name: str, args_json: str, deps: ToolDependencies) -> Dict[str, Any]:
472
- """Dispatch a tool call by name with JSON args and dependencies."""
473
- tool = ALL_TOOLS.get(tool_name)
474
-
475
- if not tool:
476
- return {"error": f"unknown tool: {tool_name}"}
477
-
478
- args = _safe_load_obj(args_json)
479
- try:
480
- return await tool(deps, **args)
481
- except Exception as e:
482
- msg = f"{type(e).__name__}: {e}"
483
- logger.exception("Tool error in %s: %s", tool_name, msg)
484
- return {"error": msg}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/reachy_mini_conversation_app/tools/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ """Tools library for Reachy Mini conversation app.
2
+
3
+ Tools are now loaded dynamically based on the profile's tools.txt file.
4
+ """
src/reachy_mini_conversation_app/tools/camera.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import logging
3
+ from typing import Any, Dict
4
+
5
+ from reachy_mini_conversation_app.tools.core_tools import Tool, ToolDependencies
6
+
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+
11
+ class Camera(Tool):
12
+ """Take a picture with the camera and ask a question about it."""
13
+
14
+ name = "camera"
15
+ description = "Take a picture with the camera and ask a question about it."
16
+ parameters_schema = {
17
+ "type": "object",
18
+ "properties": {
19
+ "question": {
20
+ "type": "string",
21
+ "description": "The question to ask about the picture",
22
+ },
23
+ },
24
+ "required": ["question"],
25
+ }
26
+
27
+ async def __call__(self, deps: ToolDependencies, **kwargs: Any) -> Dict[str, Any]:
28
+ """Take a picture with the camera and ask a question about it."""
29
+ image_query = (kwargs.get("question") or "").strip()
30
+ if not image_query:
31
+ logger.warning("camera: empty question")
32
+ return {"error": "question must be a non-empty string"}
33
+
34
+ logger.info("Tool call: camera question=%s", image_query[:120])
35
+
36
+ # Get frame from camera worker buffer (like main_works.py)
37
+ if deps.camera_worker is not None:
38
+ frame = deps.camera_worker.get_latest_frame()
39
+ if frame is None:
40
+ logger.error("No frame available from camera worker")
41
+ return {"error": "No frame available"}
42
+ else:
43
+ logger.error("Camera worker not available")
44
+ return {"error": "Camera worker not available"}
45
+
46
+ # Use vision manager for processing if available
47
+ if deps.vision_manager is not None:
48
+ vision_result = await asyncio.to_thread(
49
+ deps.vision_manager.processor.process_image, frame, image_query,
50
+ )
51
+ if isinstance(vision_result, dict) and "error" in vision_result:
52
+ return vision_result
53
+ return (
54
+ {"image_description": vision_result}
55
+ if isinstance(vision_result, str)
56
+ else {"error": "vision returned non-string"}
57
+ )
58
+ # Return base64 encoded image like main_works.py camera tool
59
+ import base64
60
+
61
+ import cv2
62
+
63
+ temp_path = "/tmp/camera_frame.jpg"
64
+ cv2.imwrite(temp_path, frame)
65
+ with open(temp_path, "rb") as f:
66
+ b64_encoded = base64.b64encode(f.read()).decode("utf-8")
67
+ return {"b64_im": b64_encoded}
src/reachy_mini_conversation_app/tools/core_tools.py ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ import abc
3
+ import sys
4
+ import json
5
+ import inspect
6
+ import logging
7
+ import importlib
8
+ from typing import Any, Dict, List
9
+ from pathlib import Path
10
+ from dataclasses import dataclass
11
+
12
+ from reachy_mini import ReachyMini
13
+ # Import config to ensure .env is loaded before reading REACHY_MINI_CUSTOM_PROFILE
14
+ from reachy_mini_conversation_app.config import config # noqa: F401
15
+
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ PROFILES_DIRECTORY = "reachy_mini_conversation_app.profiles"
21
+
22
+ if not logger.handlers:
23
+ handler = logging.StreamHandler()
24
+ formatter = logging.Formatter("%(asctime)s %(levelname)s %(name)s:%(lineno)d | %(message)s")
25
+ handler.setFormatter(formatter)
26
+ logger.addHandler(handler)
27
+ logger.setLevel(logging.INFO)
28
+
29
+
30
+ ALL_TOOLS: Dict[str, "Tool"] = {}
31
+ ALL_TOOL_SPECS: List[Dict[str, Any]] = []
32
+ _TOOLS_INITIALIZED = False
33
+
34
+
35
+
36
+ def get_concrete_subclasses(base: type[Tool]) -> List[type[Tool]]:
37
+ """Recursively find all concrete (non-abstract) subclasses of a base class."""
38
+ result: List[type[Tool]] = []
39
+ for cls in base.__subclasses__():
40
+ if not inspect.isabstract(cls):
41
+ result.append(cls)
42
+ # recurse into subclasses
43
+ result.extend(get_concrete_subclasses(cls))
44
+ return result
45
+
46
+
47
+ @dataclass
48
+ class ToolDependencies:
49
+ """External dependencies injected into tools."""
50
+
51
+ reachy_mini: ReachyMini
52
+ movement_manager: Any # MovementManager from moves.py
53
+ # Optional deps
54
+ camera_worker: Any | None = None # CameraWorker for frame buffering
55
+ vision_manager: Any | None = None
56
+ head_wobbler: Any | None = None # HeadWobbler for audio-reactive motion
57
+ motion_duration_s: float = 1.0
58
+
59
+
60
+ # Tool base class
61
+ class Tool(abc.ABC):
62
+ """Base abstraction for tools used in function-calling.
63
+
64
+ Each tool must define:
65
+ - name: str
66
+ - description: str
67
+ - parameters_schema: Dict[str, Any] # JSON Schema
68
+ """
69
+
70
+ name: str
71
+ description: str
72
+ parameters_schema: Dict[str, Any]
73
+
74
+ def spec(self) -> Dict[str, Any]:
75
+ """Return the function spec for LLM consumption."""
76
+ return {
77
+ "type": "function",
78
+ "name": self.name,
79
+ "description": self.description,
80
+ "parameters": self.parameters_schema,
81
+ }
82
+
83
+ @abc.abstractmethod
84
+ async def __call__(self, deps: ToolDependencies, **kwargs: Any) -> Dict[str, Any]:
85
+ """Async tool execution entrypoint."""
86
+ raise NotImplementedError
87
+
88
+
89
+ # Registry & specs (dynamic)
90
+ def _load_profile_tools() -> None:
91
+ """Load tools based on profile's tools.txt file."""
92
+ # Determine which profile to use
93
+ profile = config.REACHY_MINI_CUSTOM_PROFILE or "default"
94
+ logger.info(f"Loading tools for profile: {profile}")
95
+
96
+ # Build path to tools.txt
97
+ # Get the profile directory path
98
+ profile_module_path = Path(__file__).parent.parent / "profiles" / profile
99
+ tools_txt_path = profile_module_path / "tools.txt"
100
+
101
+ if not tools_txt_path.exists():
102
+ logger.error(f"✗ tools.txt not found at {tools_txt_path}")
103
+ sys.exit(1)
104
+
105
+ # Read and parse tools.txt
106
+ try:
107
+ with open(tools_txt_path, "r") as f:
108
+ lines = f.readlines()
109
+ except Exception as e:
110
+ logger.error(f"✗ Failed to read tools.txt: {e}")
111
+ sys.exit(1)
112
+
113
+ # Parse tool names (skip comments and blank lines)
114
+ tool_names = []
115
+ for line in lines:
116
+ line = line.strip()
117
+ # Skip blank lines and comments
118
+ if not line or line.startswith("#"):
119
+ continue
120
+ tool_names.append(line)
121
+
122
+ logger.info(f"Found {len(tool_names)} tools to load: {tool_names}")
123
+
124
+ # Import each tool
125
+ for tool_name in tool_names:
126
+ loaded = False
127
+ profile_error = None
128
+
129
+ # Try profile-local tool first
130
+ try:
131
+ profile_tool_module = f"{PROFILES_DIRECTORY}.{profile}.{tool_name}"
132
+ importlib.import_module(profile_tool_module)
133
+ logger.info(f"✓ Loaded profile-local tool: {tool_name}")
134
+ loaded = True
135
+ except ModuleNotFoundError as e:
136
+ # Check if it's the tool module itself that's missing (expected) or a dependency
137
+ if tool_name in str(e):
138
+ pass # Tool not in profile directory, try shared tools
139
+ else:
140
+ # Missing import dependency within the tool file
141
+ profile_error = f"Missing dependency: {e}"
142
+ logger.error(f"❌ Failed to load profile-local tool '{tool_name}': {profile_error}")
143
+ logger.error(f" Module path: {profile_tool_module}")
144
+ except ImportError as e:
145
+ profile_error = f"Import error: {e}"
146
+ logger.error(f"❌ Failed to load profile-local tool '{tool_name}': {profile_error}")
147
+ logger.error(f" Module path: {profile_tool_module}")
148
+ except Exception as e:
149
+ profile_error = f"{type(e).__name__}: {e}"
150
+ logger.error(f"❌ Failed to load profile-local tool '{tool_name}': {profile_error}")
151
+ logger.error(f" Module path: {profile_tool_module}")
152
+
153
+ # Try shared tools library if not found in profile
154
+ if not loaded:
155
+ try:
156
+ shared_tool_module = f"reachy_mini_conversation_app.tools.{tool_name}"
157
+ importlib.import_module(shared_tool_module)
158
+ logger.info(f"✓ Loaded shared tool: {tool_name}")
159
+ loaded = True
160
+ except ModuleNotFoundError:
161
+ if profile_error:
162
+ # Already logged error from profile attempt
163
+ logger.error(f"❌ Tool '{tool_name}' also not found in shared tools")
164
+ else:
165
+ logger.warning(f"⚠️ Tool '{tool_name}' not found in profile or shared tools")
166
+ except ImportError as e:
167
+ logger.error(f"❌ Failed to load shared tool '{tool_name}': Import error: {e}")
168
+ logger.error(f" Module path: {shared_tool_module}")
169
+ except Exception as e:
170
+ logger.error(f"❌ Failed to load shared tool '{tool_name}': {type(e).__name__}: {e}")
171
+ logger.error(f" Module path: {shared_tool_module}")
172
+
173
+
174
+ def _initialize_tools() -> None:
175
+ """Populate registry once, even if module is imported repeatedly."""
176
+ global ALL_TOOLS, ALL_TOOL_SPECS, _TOOLS_INITIALIZED
177
+
178
+ if _TOOLS_INITIALIZED:
179
+ logger.debug("Tools already initialized; skipping reinitialization.")
180
+ return
181
+
182
+ _load_profile_tools()
183
+
184
+ ALL_TOOLS = {cls.name: cls() for cls in get_concrete_subclasses(Tool)} # type: ignore[type-abstract]
185
+ ALL_TOOL_SPECS = [tool.spec() for tool in ALL_TOOLS.values()]
186
+
187
+ for tool_name, tool in ALL_TOOLS.items():
188
+ logger.info(f"tool registered: {tool_name} - {tool.description}")
189
+
190
+ _TOOLS_INITIALIZED = True
191
+
192
+
193
+ _initialize_tools()
194
+
195
+
196
+ def get_tool_specs(exclusion_list: list[str] = []) -> list[Dict[str, Any]]:
197
+ """Get tool specs, optionally excluding some tools."""
198
+ return [spec for spec in ALL_TOOL_SPECS if spec.get("name") not in exclusion_list]
199
+
200
+
201
+ # Dispatcher
202
+ def _safe_load_obj(args_json: str) -> Dict[str, Any]:
203
+ try:
204
+ parsed_args = json.loads(args_json or "{}")
205
+ return parsed_args if isinstance(parsed_args, dict) else {}
206
+ except Exception:
207
+ logger.warning("bad args_json=%r", args_json)
208
+ return {}
209
+
210
+
211
+ async def dispatch_tool_call(tool_name: str, args_json: str, deps: ToolDependencies) -> Dict[str, Any]:
212
+ """Dispatch a tool call by name with JSON args and dependencies."""
213
+ tool = ALL_TOOLS.get(tool_name)
214
+
215
+ if not tool:
216
+ return {"error": f"unknown tool: {tool_name}"}
217
+
218
+ args = _safe_load_obj(args_json)
219
+ try:
220
+ return await tool(deps, **args)
221
+ except Exception as e:
222
+ msg = f"{type(e).__name__}: {e}"
223
+ logger.exception("Tool error in %s: %s", tool_name, msg)
224
+ return {"error": msg}
src/reachy_mini_conversation_app/tools/dance.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from typing import Any, Dict
3
+
4
+ from reachy_mini_conversation_app.tools.core_tools import Tool, ToolDependencies
5
+
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+ # Initialize dance library
10
+ try:
11
+ from reachy_mini_dances_library.collection.dance import AVAILABLE_MOVES
12
+ from reachy_mini_conversation_app.dance_emotion_moves import DanceQueueMove
13
+
14
+ DANCE_AVAILABLE = True
15
+ except ImportError as e:
16
+ logger.warning(f"Dance library not available: {e}")
17
+ AVAILABLE_MOVES = {}
18
+ DANCE_AVAILABLE = False
19
+
20
+
21
+ class Dance(Tool):
22
+ """Play a named or random dance move once (or repeat). Non-blocking."""
23
+
24
+ name = "dance"
25
+ description = "Play a named or random dance move once (or repeat). Non-blocking."
26
+ parameters_schema = {
27
+ "type": "object",
28
+ "properties": {
29
+ "move": {
30
+ "type": "string",
31
+ "description": """Name of the move; use 'random' or omit for random.
32
+ Here is a list of the available moves:
33
+ simple_nod: A simple, continuous up-and-down nodding motion.
34
+ head_tilt_roll: A continuous side-to-side head roll (ear to shoulder).
35
+ side_to_side_sway: A smooth, side-to-side sway of the entire head.
36
+ dizzy_spin: A circular 'dizzy' head motion combining roll and pitch.
37
+ stumble_and_recover: A simulated stumble and recovery with multiple axis movements. Good vibes
38
+ headbanger_combo: A strong head nod combined with a vertical bounce.
39
+ interwoven_spirals: A complex spiral motion using three axes at different frequencies.
40
+ sharp_side_tilt: A sharp, quick side-to-side tilt using a triangle waveform.
41
+ side_peekaboo: A multi-stage peekaboo performance, hiding and peeking to each side.
42
+ yeah_nod: An emphatic two-part yeah nod using transient motions.
43
+ uh_huh_tilt: A combined roll-and-pitch uh-huh gesture of agreement.
44
+ neck_recoil: A quick, transient backward recoil of the neck.
45
+ chin_lead: A forward motion led by the chin, combining translation and pitch.
46
+ groovy_sway_and_roll: A side-to-side sway combined with a corresponding roll for a groovy effect.
47
+ chicken_peck: A sharp, forward, chicken-like pecking motion.
48
+ side_glance_flick: A quick glance to the side that holds, then returns.
49
+ polyrhythm_combo: A 3-beat sway and a 2-beat nod create a polyrhythmic feel.
50
+ grid_snap: A robotic, grid-snapping motion using square waveforms.
51
+ pendulum_swing: A simple, smooth pendulum-like swing using a roll motion.
52
+ jackson_square: Traces a rectangle via a 5-point path, with sharp twitches on arrival at each checkpoint.
53
+ """,
54
+ },
55
+ "repeat": {
56
+ "type": "integer",
57
+ "description": "How many times to repeat the move (default 1).",
58
+ },
59
+ },
60
+ "required": [],
61
+ }
62
+
63
+ async def __call__(self, deps: ToolDependencies, **kwargs: Any) -> Dict[str, Any]:
64
+ """Play a named or random dance move once (or repeat). Non-blocking."""
65
+ if not DANCE_AVAILABLE:
66
+ return {"error": "Dance system not available"}
67
+
68
+ move_name = kwargs.get("move")
69
+ repeat = int(kwargs.get("repeat", 1))
70
+
71
+ logger.info("Tool call: dance move=%s repeat=%d", move_name, repeat)
72
+
73
+ if not move_name or move_name == "random":
74
+ import random
75
+
76
+ move_name = random.choice(list(AVAILABLE_MOVES.keys()))
77
+
78
+ if move_name not in AVAILABLE_MOVES:
79
+ return {"error": f"Unknown dance move '{move_name}'. Available: {list(AVAILABLE_MOVES.keys())}"}
80
+
81
+ # Add dance moves to queue
82
+ movement_manager = deps.movement_manager
83
+ for _ in range(repeat):
84
+ dance_move = DanceQueueMove(move_name)
85
+ movement_manager.queue_move(dance_move)
86
+
87
+ return {"status": "queued", "move": move_name, "repeat": repeat}
src/reachy_mini_conversation_app/tools/do_nothing.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from typing import Any, Dict
3
+
4
+ from reachy_mini_conversation_app.tools.core_tools import Tool, ToolDependencies
5
+
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+
10
+ class DoNothing(Tool):
11
+ """Choose to do nothing - stay still and silent. Use when you want to be contemplative or just chill."""
12
+
13
+ name = "do_nothing"
14
+ description = "Choose to do nothing - stay still and silent. Use when you want to be contemplative or just chill."
15
+ parameters_schema = {
16
+ "type": "object",
17
+ "properties": {
18
+ "reason": {
19
+ "type": "string",
20
+ "description": "Optional reason for doing nothing (e.g., 'contemplating existence', 'saving energy', 'being mysterious')",
21
+ },
22
+ },
23
+ "required": [],
24
+ }
25
+
26
+ async def __call__(self, deps: ToolDependencies, **kwargs: Any) -> Dict[str, Any]:
27
+ """Do nothing - stay still and silent."""
28
+ reason = kwargs.get("reason", "just chilling")
29
+ logger.info("Tool call: do_nothing reason=%s", reason)
30
+ return {"status": "doing nothing", "reason": reason}
src/reachy_mini_conversation_app/tools/head_tracking.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from typing import Any, Dict
3
+
4
+ from reachy_mini_conversation_app.tools.core_tools import Tool, ToolDependencies
5
+
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+
10
+ class HeadTracking(Tool):
11
+ """Toggle head tracking state."""
12
+
13
+ name = "head_tracking"
14
+ description = "Toggle head tracking state."
15
+ parameters_schema = {
16
+ "type": "object",
17
+ "properties": {"start": {"type": "boolean"}},
18
+ "required": ["start"],
19
+ }
20
+
21
+ async def __call__(self, deps: ToolDependencies, **kwargs: Any) -> Dict[str, Any]:
22
+ """Enable or disable head tracking."""
23
+ enable = bool(kwargs.get("start"))
24
+
25
+ # Update camera worker head tracking state
26
+ if deps.camera_worker is not None:
27
+ deps.camera_worker.set_head_tracking_enabled(enable)
28
+
29
+ status = "started" if enable else "stopped"
30
+ logger.info("Tool call: head_tracking %s", status)
31
+ return {"status": f"head tracking {status}"}
src/reachy_mini_conversation_app/tools/move_head.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from typing import Any, Dict, Tuple, Literal
3
+
4
+ from reachy_mini.utils import create_head_pose
5
+ from reachy_mini_conversation_app.tools.core_tools import Tool, ToolDependencies
6
+ from reachy_mini_conversation_app.dance_emotion_moves import GotoQueueMove
7
+
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+ Direction = Literal["left", "right", "up", "down", "front"]
12
+
13
+
14
+ class MoveHead(Tool):
15
+ """Move head in a given direction."""
16
+
17
+ name = "move_head"
18
+ description = "Move your head in a given direction: left, right, up, down or front."
19
+ parameters_schema = {
20
+ "type": "object",
21
+ "properties": {
22
+ "direction": {
23
+ "type": "string",
24
+ "enum": ["left", "right", "up", "down", "front"],
25
+ },
26
+ },
27
+ "required": ["direction"],
28
+ }
29
+
30
+ # mapping: direction -> args for create_head_pose
31
+ DELTAS: Dict[str, Tuple[int, int, int, int, int, int]] = {
32
+ "left": (0, 0, 0, 0, 0, 40),
33
+ "right": (0, 0, 0, 0, 0, -40),
34
+ "up": (0, 0, 0, 0, -30, 0),
35
+ "down": (0, 0, 0, 0, 30, 0),
36
+ "front": (0, 0, 0, 0, 0, 0),
37
+ }
38
+
39
+ async def __call__(self, deps: ToolDependencies, **kwargs: Any) -> Dict[str, Any]:
40
+ """Move head in a given direction."""
41
+ direction_raw = kwargs.get("direction")
42
+ if not isinstance(direction_raw, str):
43
+ return {"error": "direction must be a string"}
44
+ direction: Direction = direction_raw # type: ignore[assignment]
45
+ logger.info("Tool call: move_head direction=%s", direction)
46
+
47
+ deltas = self.DELTAS.get(direction, self.DELTAS["front"])
48
+ target = create_head_pose(*deltas, degrees=True)
49
+
50
+ # Use new movement manager
51
+ try:
52
+ movement_manager = deps.movement_manager
53
+
54
+ # Get current state for interpolation
55
+ current_head_pose = deps.reachy_mini.get_current_head_pose()
56
+ _, current_antennas = deps.reachy_mini.get_current_joint_positions()
57
+
58
+ # Create goto move
59
+ goto_move = GotoQueueMove(
60
+ target_head_pose=target,
61
+ start_head_pose=current_head_pose,
62
+ target_antennas=(0, 0), # Reset antennas to default
63
+ start_antennas=(
64
+ current_antennas[0],
65
+ current_antennas[1],
66
+ ), # Skip body_yaw
67
+ target_body_yaw=0, # Reset body yaw
68
+ start_body_yaw=current_antennas[0], # body_yaw is first in joint positions
69
+ duration=deps.motion_duration_s,
70
+ )
71
+
72
+ movement_manager.queue_move(goto_move)
73
+ movement_manager.set_moving_state(deps.motion_duration_s)
74
+
75
+ return {"status": f"looking {direction}"}
76
+
77
+ except Exception as e:
78
+ logger.error("move_head failed")
79
+ return {"error": f"move_head failed: {type(e).__name__}: {e}"}
src/reachy_mini_conversation_app/tools/play_emotion.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from typing import Any, Dict
3
+
4
+ from reachy_mini_conversation_app.tools.core_tools import Tool, ToolDependencies
5
+
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+ # Initialize emotion library
10
+ try:
11
+ from reachy_mini.motion.recorded_move import RecordedMoves
12
+ from reachy_mini_conversation_app.dance_emotion_moves import EmotionQueueMove
13
+
14
+ # Note: huggingface_hub automatically reads HF_TOKEN from environment variables
15
+ RECORDED_MOVES = RecordedMoves("pollen-robotics/reachy-mini-emotions-library")
16
+ EMOTION_AVAILABLE = True
17
+ except ImportError as e:
18
+ logger.warning(f"Emotion library not available: {e}")
19
+ RECORDED_MOVES = None
20
+ EMOTION_AVAILABLE = False
21
+
22
+
23
+ def get_available_emotions_and_descriptions() -> str:
24
+ """Get formatted list of available emotions with descriptions."""
25
+ if not EMOTION_AVAILABLE:
26
+ return "Emotions not available"
27
+
28
+ try:
29
+ emotion_names = RECORDED_MOVES.list_moves()
30
+ output = "Available emotions:\n"
31
+ for name in emotion_names:
32
+ description = RECORDED_MOVES.get(name).description
33
+ output += f" - {name}: {description}\n"
34
+ return output
35
+ except Exception as e:
36
+ return f"Error getting emotions: {e}"
37
+
38
+
39
+ class PlayEmotion(Tool):
40
+ """Play a pre-recorded emotion."""
41
+
42
+ name = "play_emotion"
43
+ description = "Play a pre-recorded emotion"
44
+ parameters_schema = {
45
+ "type": "object",
46
+ "properties": {
47
+ "emotion": {
48
+ "type": "string",
49
+ "description": f"""Name of the emotion to play.
50
+ Here is a list of the available emotions:
51
+ {get_available_emotions_and_descriptions()}
52
+ """,
53
+ },
54
+ },
55
+ "required": ["emotion"],
56
+ }
57
+
58
+ async def __call__(self, deps: ToolDependencies, **kwargs: Any) -> Dict[str, Any]:
59
+ """Play a pre-recorded emotion."""
60
+ if not EMOTION_AVAILABLE:
61
+ return {"error": "Emotion system not available"}
62
+
63
+ emotion_name = kwargs.get("emotion")
64
+ if not emotion_name:
65
+ return {"error": "Emotion name is required"}
66
+
67
+ logger.info("Tool call: play_emotion emotion=%s", emotion_name)
68
+
69
+ # Check if emotion exists
70
+ try:
71
+ emotion_names = RECORDED_MOVES.list_moves()
72
+ if emotion_name not in emotion_names:
73
+ return {"error": f"Unknown emotion '{emotion_name}'. Available: {emotion_names}"}
74
+
75
+ # Add emotion to queue
76
+ movement_manager = deps.movement_manager
77
+ emotion_move = EmotionQueueMove(emotion_name, RECORDED_MOVES)
78
+ movement_manager.queue_move(emotion_move)
79
+
80
+ return {"status": "queued", "emotion": emotion_name}
81
+
82
+ except Exception as e:
83
+ logger.exception("Failed to play emotion")
84
+ return {"error": f"Failed to play emotion: {e!s}"}
src/reachy_mini_conversation_app/tools/stop_dance.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from typing import Any, Dict
3
+
4
+ from reachy_mini_conversation_app.tools.core_tools import Tool, ToolDependencies
5
+
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+
10
+ class StopDance(Tool):
11
+ """Stop the current dance move."""
12
+
13
+ name = "stop_dance"
14
+ description = "Stop the current dance move"
15
+ parameters_schema = {
16
+ "type": "object",
17
+ "properties": {
18
+ "dummy": {
19
+ "type": "boolean",
20
+ "description": "dummy boolean, set it to true",
21
+ },
22
+ },
23
+ "required": ["dummy"],
24
+ }
25
+
26
+ async def __call__(self, deps: ToolDependencies, **kwargs: Any) -> Dict[str, Any]:
27
+ """Stop the current dance move."""
28
+ logger.info("Tool call: stop_dance")
29
+ movement_manager = deps.movement_manager
30
+ movement_manager.clear_move_queue()
31
+ return {"status": "stopped dance and cleared queue"}
src/reachy_mini_conversation_app/tools/stop_emotion.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from typing import Any, Dict
3
+
4
+ from reachy_mini_conversation_app.tools.core_tools import Tool, ToolDependencies
5
+
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+
10
+ class StopEmotion(Tool):
11
+ """Stop the current emotion."""
12
+
13
+ name = "stop_emotion"
14
+ description = "Stop the current emotion"
15
+ parameters_schema = {
16
+ "type": "object",
17
+ "properties": {
18
+ "dummy": {
19
+ "type": "boolean",
20
+ "description": "dummy boolean, set it to true",
21
+ },
22
+ },
23
+ "required": ["dummy"],
24
+ }
25
+
26
+ async def __call__(self, deps: ToolDependencies, **kwargs: Any) -> Dict[str, Any]:
27
+ """Stop the current emotion."""
28
+ logger.info("Tool call: stop_emotion")
29
+ movement_manager = deps.movement_manager
30
+ movement_manager.clear_move_queue()
31
+ return {"status": "stopped emotion and cleared queue"}
tests/test_openai_realtime.py CHANGED
@@ -7,8 +7,8 @@ from unittest.mock import MagicMock
7
  import pytest
8
 
9
  import reachy_mini_conversation_app.openai_realtime as rt_mod
10
- from reachy_mini_conversation_app.tools import ToolDependencies
11
  from reachy_mini_conversation_app.openai_realtime import OpenaiRealtimeHandler
 
12
 
13
 
14
  def _build_handler(loop: asyncio.AbstractEventLoop) -> OpenaiRealtimeHandler:
 
7
  import pytest
8
 
9
  import reachy_mini_conversation_app.openai_realtime as rt_mod
 
10
  from reachy_mini_conversation_app.openai_realtime import OpenaiRealtimeHandler
11
+ from reachy_mini_conversation_app.tools.core_tools import ToolDependencies
12
 
13
 
14
  def _build_handler(loop: asyncio.AbstractEventLoop) -> OpenaiRealtimeHandler:
uv.lock CHANGED
The diff for this file is too large to render. See raw diff