Merge branch 'develop' into 62-spinoff-personalities
Browse files
README.md
CHANGED
|
@@ -131,6 +131,7 @@ By default, the app runs in console mode for direct audio interaction. Use the `
|
|
| 131 |
| `--local-vision` | `False` | Use local vision model (SmolVLM2) for periodic image processing instead of gpt-realtime vision. Requires `local_vision` extra to be installed. |
|
| 132 |
| `--gradio` | `False` | Launch the Gradio web UI. Without this flag, runs in console mode. Required when running in simulation mode. |
|
| 133 |
| `--debug` | `False` | Enable verbose logging for troubleshooting. |
|
|
|
|
| 134 |
|
| 135 |
|
| 136 |
### Examples
|
|
@@ -146,6 +147,12 @@ By default, the app runs in console mode for direct audio interaction. Use the `
|
|
| 146 |
reachy-mini-conversation-app --local-vision
|
| 147 |
```
|
| 148 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
- Disable the camera pipeline (audio-only conversation):
|
| 150 |
|
| 151 |
```bash
|
|
|
|
| 131 |
| `--local-vision` | `False` | Use local vision model (SmolVLM2) for periodic image processing instead of gpt-realtime vision. Requires `local_vision` extra to be installed. |
|
| 132 |
| `--gradio` | `False` | Launch the Gradio web UI. Without this flag, runs in console mode. Required when running in simulation mode. |
|
| 133 |
| `--debug` | `False` | Enable verbose logging for troubleshooting. |
|
| 134 |
+
| `--wireless-version` | `False` | Use GStreamer backend for wireless version of the robot. Requires `reachy_mini_wireless` extra to be installed.
|
| 135 |
|
| 136 |
|
| 137 |
### Examples
|
|
|
|
| 147 |
reachy-mini-conversation-app --local-vision
|
| 148 |
```
|
| 149 |
|
| 150 |
+
- Run with wireless support (requires `reachy_mini_wireless` extra and daemon started with `--wireless-version`):
|
| 151 |
+
|
| 152 |
+
```bash
|
| 153 |
+
reachy-mini-conversation-app --wireless-version
|
| 154 |
+
```
|
| 155 |
+
|
| 156 |
- Disable the camera pipeline (audio-only conversation):
|
| 157 |
|
| 158 |
```bash
|
pyproject.toml
CHANGED
|
@@ -26,7 +26,8 @@ dependencies = [
|
|
| 26 |
#Reachy mini
|
| 27 |
"reachy_mini_dances_library",
|
| 28 |
"reachy_mini_toolbox",
|
| 29 |
-
"reachy_mini>=1.1.
|
|
|
|
| 30 |
]
|
| 31 |
|
| 32 |
[project.optional-dependencies]
|
|
|
|
| 26 |
#Reachy mini
|
| 27 |
"reachy_mini_dances_library",
|
| 28 |
"reachy_mini_toolbox",
|
| 29 |
+
"reachy_mini>=1.1.3",
|
| 30 |
+
"eclipse-zenoh~=1.7.0",
|
| 31 |
]
|
| 32 |
|
| 33 |
[project.optional-dependencies]
|
src/reachy_mini_conversation_app/console.py
CHANGED
|
@@ -12,6 +12,7 @@ from fastrtc import AdditionalOutputs, audio_to_float32
|
|
| 12 |
from scipy.signal import resample
|
| 13 |
|
| 14 |
from reachy_mini import ReachyMini
|
|
|
|
| 15 |
from reachy_mini_conversation_app.openai_realtime import OpenaiRealtimeHandler
|
| 16 |
|
| 17 |
|
|
@@ -75,17 +76,21 @@ class LocalStream:
|
|
| 75 |
def clear_audio_queue(self) -> None:
|
| 76 |
"""Flush the player's appsrc to drop any queued audio immediately."""
|
| 77 |
logger.info("User intervention: flushing player queue")
|
|
|
|
|
|
|
|
|
|
| 78 |
self.handler.output_queue = asyncio.Queue()
|
| 79 |
|
| 80 |
async def record_loop(self) -> None:
|
| 81 |
"""Read mic frames from the recorder and forward them to the handler."""
|
| 82 |
-
|
|
|
|
|
|
|
| 83 |
while not self._stop_event.is_set():
|
| 84 |
audio_frame = self._robot.media.get_audio_sample()
|
| 85 |
if audio_frame is not None:
|
| 86 |
-
await self.handler.receive((
|
| 87 |
-
|
| 88 |
-
await asyncio.sleep(0.01) # avoid busy loop
|
| 89 |
|
| 90 |
async def play_loop(self) -> None:
|
| 91 |
"""Fetch outputs from the handler: log text and play audio frames."""
|
|
|
|
| 12 |
from scipy.signal import resample
|
| 13 |
|
| 14 |
from reachy_mini import ReachyMini
|
| 15 |
+
from reachy_mini.media.media_manager import MediaBackend
|
| 16 |
from reachy_mini_conversation_app.openai_realtime import OpenaiRealtimeHandler
|
| 17 |
|
| 18 |
|
|
|
|
| 76 |
def clear_audio_queue(self) -> None:
|
| 77 |
"""Flush the player's appsrc to drop any queued audio immediately."""
|
| 78 |
logger.info("User intervention: flushing player queue")
|
| 79 |
+
if self._robot.media.backend == MediaBackend.GSTREAMER:
|
| 80 |
+
# Directly flush gstreamer audio pipe
|
| 81 |
+
self._robot.media.audio.clear_player()
|
| 82 |
self.handler.output_queue = asyncio.Queue()
|
| 83 |
|
| 84 |
async def record_loop(self) -> None:
|
| 85 |
"""Read mic frames from the recorder and forward them to the handler."""
|
| 86 |
+
input_sample_rate = self._robot.media.get_input_audio_samplerate()
|
| 87 |
+
logger.debug(f"Audio recording started at {input_sample_rate} Hz")
|
| 88 |
+
|
| 89 |
while not self._stop_event.is_set():
|
| 90 |
audio_frame = self._robot.media.get_audio_sample()
|
| 91 |
if audio_frame is not None:
|
| 92 |
+
await self.handler.receive((input_sample_rate, audio_frame))
|
| 93 |
+
await asyncio.sleep(0) # avoid busy loop
|
|
|
|
| 94 |
|
| 95 |
async def play_loop(self) -> None:
|
| 96 |
"""Fetch outputs from the handler: log text and play audio frames."""
|
src/reachy_mini_conversation_app/main.py
CHANGED
|
@@ -56,7 +56,23 @@ def run(
|
|
| 56 |
logger.warning("Head tracking is not activated due to --no-camera.")
|
| 57 |
|
| 58 |
if robot is None:
|
| 59 |
-
robot
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
|
| 61 |
# Check if running in simulation mode without --gradio
|
| 62 |
if robot.client.get_status()["simulation_enabled"] and not args.gradio:
|
|
|
|
| 56 |
logger.warning("Head tracking is not activated due to --no-camera.")
|
| 57 |
|
| 58 |
if robot is None:
|
| 59 |
+
# Initialize robot with appropriate backend
|
| 60 |
+
# TODO: Implement dynamic robot connection detection
|
| 61 |
+
# Automatically detect and connect to available Reachy Mini robot(s!)
|
| 62 |
+
# Priority checks (in order):
|
| 63 |
+
# 1. Reachy Lite connected directly to the host
|
| 64 |
+
# 2. Reachy Mini daemon running on localhost (same device)
|
| 65 |
+
# 3. Reachy Mini daemon on local network (same subnet)
|
| 66 |
+
|
| 67 |
+
if args.wireless_version and not args.on_device:
|
| 68 |
+
logger.info("Using WebRTC backend for fully remote wireless version")
|
| 69 |
+
robot = ReachyMini(media_backend="webrtc", localhost_only=False)
|
| 70 |
+
elif args.wireless_version and args.on_device:
|
| 71 |
+
logger.info("Using GStreamer backend for on-device wireless version")
|
| 72 |
+
robot = ReachyMini(media_backend="gstreamer")
|
| 73 |
+
else:
|
| 74 |
+
logger.info("Using default backend for lite version")
|
| 75 |
+
robot = ReachyMini(media_backend="default")
|
| 76 |
|
| 77 |
# Check if running in simulation mode without --gradio
|
| 78 |
if robot.client.get_status()["simulation_enabled"] and not args.gradio:
|
src/reachy_mini_conversation_app/openai_realtime.py
CHANGED
|
@@ -400,14 +400,19 @@ class OpenaiRealtimeHandler(AsyncStreamHandler):
|
|
| 400 |
|
| 401 |
# Microphone receive
|
| 402 |
async def receive(self, frame: Tuple[int, NDArray[np.int16]]) -> None:
|
| 403 |
-
"""Receive audio frame from the microphone and send it to the
|
|
|
|
|
|
|
|
|
|
|
|
|
| 404 |
|
| 405 |
Args:
|
| 406 |
-
frame: A tuple containing
|
| 407 |
|
| 408 |
"""
|
| 409 |
if not self.connection:
|
| 410 |
return
|
|
|
|
| 411 |
input_sample_rate, audio_frame = frame
|
| 412 |
|
| 413 |
# Reshape if needed
|
|
@@ -426,6 +431,7 @@ class OpenaiRealtimeHandler(AsyncStreamHandler):
|
|
| 426 |
# Cast if needed
|
| 427 |
audio_frame = audio_to_int16(audio_frame)
|
| 428 |
|
|
|
|
| 429 |
audio_message = base64.b64encode(audio_frame.tobytes()).decode("utf-8")
|
| 430 |
await self.connection.input_audio_buffer.append(audio=audio_message)
|
| 431 |
|
|
|
|
| 400 |
|
| 401 |
# Microphone receive
|
| 402 |
async def receive(self, frame: Tuple[int, NDArray[np.int16]]) -> None:
|
| 403 |
+
"""Receive audio frame from the microphone and send it to the OpenAI server.
|
| 404 |
+
|
| 405 |
+
Handles both mono and stereo audio formats, converting to the expected
|
| 406 |
+
mono format for OpenAI's API. Resamples if the input sample rate differs
|
| 407 |
+
from the expected rate.
|
| 408 |
|
| 409 |
Args:
|
| 410 |
+
frame: A tuple containing (sample_rate, audio_data).
|
| 411 |
|
| 412 |
"""
|
| 413 |
if not self.connection:
|
| 414 |
return
|
| 415 |
+
|
| 416 |
input_sample_rate, audio_frame = frame
|
| 417 |
|
| 418 |
# Reshape if needed
|
|
|
|
| 431 |
# Cast if needed
|
| 432 |
audio_frame = audio_to_int16(audio_frame)
|
| 433 |
|
| 434 |
+
# Send to OpenAI
|
| 435 |
audio_message = base64.b64encode(audio_frame.tobytes()).decode("utf-8")
|
| 436 |
await self.connection.input_audio_buffer.append(audio=audio_message)
|
| 437 |
|
src/reachy_mini_conversation_app/utils.py
CHANGED
|
@@ -25,6 +25,18 @@ def parse_args() -> argparse.Namespace:
|
|
| 25 |
)
|
| 26 |
parser.add_argument("--gradio", default=False, action="store_true", help="Open gradio interface")
|
| 27 |
parser.add_argument("--debug", default=False, action="store_true", help="Enable debug logging")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
return parser.parse_known_args()
|
| 29 |
|
| 30 |
|
|
|
|
| 25 |
)
|
| 26 |
parser.add_argument("--gradio", default=False, action="store_true", help="Open gradio interface")
|
| 27 |
parser.add_argument("--debug", default=False, action="store_true", help="Enable debug logging")
|
| 28 |
+
parser.add_argument(
|
| 29 |
+
"--wireless-version",
|
| 30 |
+
default=False,
|
| 31 |
+
action="store_true",
|
| 32 |
+
help="Use WebRTC backend for wireless version of the robot",
|
| 33 |
+
)
|
| 34 |
+
parser.add_argument(
|
| 35 |
+
"--on-device",
|
| 36 |
+
default=False,
|
| 37 |
+
action="store_true",
|
| 38 |
+
help="Use when conversation app is running on the same device as Reachy Mini daemon",
|
| 39 |
+
)
|
| 40 |
return parser.parse_known_args()
|
| 41 |
|
| 42 |
|
uv.lock
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|