Alina
commited on
Add wireless version support (#133)
Browse files
README.md
CHANGED
|
@@ -118,6 +118,7 @@ By default, the app runs in console mode for direct audio interaction. Use the `
|
|
| 118 |
| `--local-vision` | `False` | Use local vision model (SmolVLM2) for periodic image processing instead of gpt-realtime vision. Requires `local_vision` extra to be installed. |
|
| 119 |
| `--gradio` | `False` | Launch the Gradio web UI. Without this flag, runs in console mode. Required when running in simulation mode. |
|
| 120 |
| `--debug` | `False` | Enable verbose logging for troubleshooting. |
|
|
|
|
| 121 |
|
| 122 |
|
| 123 |
### Examples
|
|
@@ -133,6 +134,12 @@ By default, the app runs in console mode for direct audio interaction. Use the `
|
|
| 133 |
reachy-mini-conversation-app --local-vision
|
| 134 |
```
|
| 135 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
- Disable the camera pipeline (audio-only conversation):
|
| 137 |
|
| 138 |
```bash
|
|
|
|
| 118 |
| `--local-vision` | `False` | Use local vision model (SmolVLM2) for periodic image processing instead of gpt-realtime vision. Requires `local_vision` extra to be installed. |
|
| 119 |
| `--gradio` | `False` | Launch the Gradio web UI. Without this flag, runs in console mode. Required when running in simulation mode. |
|
| 120 |
| `--debug` | `False` | Enable verbose logging for troubleshooting. |
|
| 121 |
+
| `--wireless-version` | `False` | Use GStreamer backend for wireless version of the robot. Requires `reachy_mini_wireless` extra to be installed.
|
| 122 |
|
| 123 |
|
| 124 |
### Examples
|
|
|
|
| 134 |
reachy-mini-conversation-app --local-vision
|
| 135 |
```
|
| 136 |
|
| 137 |
+
- Run with wireless support (requires `reachy_mini_wireless` extra and daemon started with `--wireless-version`):
|
| 138 |
+
|
| 139 |
+
```bash
|
| 140 |
+
reachy-mini-conversation-app --wireless-version
|
| 141 |
+
```
|
| 142 |
+
|
| 143 |
- Disable the camera pipeline (audio-only conversation):
|
| 144 |
|
| 145 |
```bash
|
pyproject.toml
CHANGED
|
@@ -26,9 +26,13 @@ dependencies = [
|
|
| 26 |
#Reachy mini
|
| 27 |
"reachy_mini_dances_library",
|
| 28 |
"reachy_mini_toolbox",
|
| 29 |
-
"reachy_mini
|
|
|
|
| 30 |
]
|
| 31 |
|
|
|
|
|
|
|
|
|
|
| 32 |
[project.optional-dependencies]
|
| 33 |
reachy_mini_wireless = [
|
| 34 |
"PyGObject>=3.42.2,<=3.46.0",
|
|
|
|
| 26 |
#Reachy mini
|
| 27 |
"reachy_mini_dances_library",
|
| 28 |
"reachy_mini_toolbox",
|
| 29 |
+
"reachy_mini",
|
| 30 |
+
"eclipse-zenoh~=1.7.0",
|
| 31 |
]
|
| 32 |
|
| 33 |
+
[tool.uv.sources]
|
| 34 |
+
reachy_mini = { git = "https://github.com/pollen-robotics/reachy_mini.git", branch = "487-fix-zenoh-config" }
|
| 35 |
+
|
| 36 |
[project.optional-dependencies]
|
| 37 |
reachy_mini_wireless = [
|
| 38 |
"PyGObject>=3.42.2,<=3.46.0",
|
src/reachy_mini_conversation_app/console.py
CHANGED
|
@@ -12,6 +12,7 @@ from fastrtc import AdditionalOutputs, audio_to_float32
|
|
| 12 |
from scipy.signal import resample
|
| 13 |
|
| 14 |
from reachy_mini import ReachyMini
|
|
|
|
| 15 |
from reachy_mini_conversation_app.openai_realtime import OpenaiRealtimeHandler
|
| 16 |
|
| 17 |
|
|
@@ -75,17 +76,21 @@ class LocalStream:
|
|
| 75 |
def clear_audio_queue(self) -> None:
|
| 76 |
"""Flush the player's appsrc to drop any queued audio immediately."""
|
| 77 |
logger.info("User intervention: flushing player queue")
|
|
|
|
|
|
|
|
|
|
| 78 |
self.handler.output_queue = asyncio.Queue()
|
| 79 |
|
| 80 |
async def record_loop(self) -> None:
|
| 81 |
"""Read mic frames from the recorder and forward them to the handler."""
|
| 82 |
-
|
|
|
|
|
|
|
| 83 |
while not self._stop_event.is_set():
|
| 84 |
audio_frame = self._robot.media.get_audio_sample()
|
| 85 |
if audio_frame is not None:
|
| 86 |
-
await self.handler.receive((
|
| 87 |
-
|
| 88 |
-
await asyncio.sleep(0.01) # avoid busy loop
|
| 89 |
|
| 90 |
async def play_loop(self) -> None:
|
| 91 |
"""Fetch outputs from the handler: log text and play audio frames."""
|
|
|
|
| 12 |
from scipy.signal import resample
|
| 13 |
|
| 14 |
from reachy_mini import ReachyMini
|
| 15 |
+
from reachy_mini.media.media_manager import MediaBackend
|
| 16 |
from reachy_mini_conversation_app.openai_realtime import OpenaiRealtimeHandler
|
| 17 |
|
| 18 |
|
|
|
|
| 76 |
def clear_audio_queue(self) -> None:
|
| 77 |
"""Flush the player's appsrc to drop any queued audio immediately."""
|
| 78 |
logger.info("User intervention: flushing player queue")
|
| 79 |
+
if self._robot.media.backend == MediaBackend.GSTREAMER:
|
| 80 |
+
# Directly flush gstreamer audio pipe
|
| 81 |
+
self._robot.media.audio.clear_player()
|
| 82 |
self.handler.output_queue = asyncio.Queue()
|
| 83 |
|
| 84 |
async def record_loop(self) -> None:
|
| 85 |
"""Read mic frames from the recorder and forward them to the handler."""
|
| 86 |
+
input_sample_rate = self._robot.media.get_input_audio_samplerate()
|
| 87 |
+
logger.debug(f"Audio recording started at {input_sample_rate} Hz")
|
| 88 |
+
|
| 89 |
while not self._stop_event.is_set():
|
| 90 |
audio_frame = self._robot.media.get_audio_sample()
|
| 91 |
if audio_frame is not None:
|
| 92 |
+
await self.handler.receive((input_sample_rate, audio_frame))
|
| 93 |
+
await asyncio.sleep(0) # avoid busy loop
|
|
|
|
| 94 |
|
| 95 |
async def play_loop(self) -> None:
|
| 96 |
"""Fetch outputs from the handler: log text and play audio frames."""
|
src/reachy_mini_conversation_app/main.py
CHANGED
|
@@ -37,7 +37,23 @@ def main() -> None:
|
|
| 37 |
if args.no_camera and args.head_tracker is not None:
|
| 38 |
logger.warning("Head tracking is not activated due to --no-camera.")
|
| 39 |
|
| 40 |
-
robot
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
# Check if running in simulation mode without --gradio
|
| 43 |
if robot.client.get_status()["simulation_enabled"] and not args.gradio:
|
|
|
|
| 37 |
if args.no_camera and args.head_tracker is not None:
|
| 38 |
logger.warning("Head tracking is not activated due to --no-camera.")
|
| 39 |
|
| 40 |
+
# Initialize robot with appropriate backend
|
| 41 |
+
# TODO: Implement dynamic robot connection detection
|
| 42 |
+
# Automatically detect and connect to available Reachy Mini robot(s!)
|
| 43 |
+
# Priority checks (in order):
|
| 44 |
+
# 1. Reachy Lite connected directly to the host
|
| 45 |
+
# 2. Reachy Mini daemon running on localhost (same device)
|
| 46 |
+
# 3. Reachy Mini daemon on local network (same subnet)
|
| 47 |
+
|
| 48 |
+
if args.wireless_version and not args.on_device:
|
| 49 |
+
logger.info("Using WebRTC backend for fully remote wireless version")
|
| 50 |
+
robot = ReachyMini(media_backend="webrtc", localhost_only=False)
|
| 51 |
+
elif args.wireless_version and args.on_device:
|
| 52 |
+
logger.info("Using GStreamer backend for on-device wireless version")
|
| 53 |
+
robot = ReachyMini(media_backend="gstreamer")
|
| 54 |
+
else:
|
| 55 |
+
logger.info("Using default backend for lite version")
|
| 56 |
+
robot = ReachyMini(media_backend="default")
|
| 57 |
|
| 58 |
# Check if running in simulation mode without --gradio
|
| 59 |
if robot.client.get_status()["simulation_enabled"] and not args.gradio:
|
src/reachy_mini_conversation_app/openai_realtime.py
CHANGED
|
@@ -340,14 +340,19 @@ class OpenaiRealtimeHandler(AsyncStreamHandler):
|
|
| 340 |
|
| 341 |
# Microphone receive
|
| 342 |
async def receive(self, frame: Tuple[int, NDArray[np.int16]]) -> None:
|
| 343 |
-
"""Receive audio frame from the microphone and send it to the
|
|
|
|
|
|
|
|
|
|
|
|
|
| 344 |
|
| 345 |
Args:
|
| 346 |
-
frame: A tuple containing
|
| 347 |
|
| 348 |
"""
|
| 349 |
if not self.connection:
|
| 350 |
return
|
|
|
|
| 351 |
input_sample_rate, audio_frame = frame
|
| 352 |
|
| 353 |
#Reshape if needed
|
|
@@ -369,6 +374,7 @@ class OpenaiRealtimeHandler(AsyncStreamHandler):
|
|
| 369 |
# Cast if needed
|
| 370 |
audio_frame = audio_to_int16(audio_frame)
|
| 371 |
|
|
|
|
| 372 |
audio_message = base64.b64encode(audio_frame.tobytes()).decode("utf-8")
|
| 373 |
await self.connection.input_audio_buffer.append(audio=audio_message)
|
| 374 |
|
|
|
|
| 340 |
|
| 341 |
# Microphone receive
|
| 342 |
async def receive(self, frame: Tuple[int, NDArray[np.int16]]) -> None:
|
| 343 |
+
"""Receive audio frame from the microphone and send it to the OpenAI server.
|
| 344 |
+
|
| 345 |
+
Handles both mono and stereo audio formats, converting to the expected
|
| 346 |
+
mono format for OpenAI's API. Resamples if the input sample rate differs
|
| 347 |
+
from the expected rate.
|
| 348 |
|
| 349 |
Args:
|
| 350 |
+
frame: A tuple containing (sample_rate, audio_data).
|
| 351 |
|
| 352 |
"""
|
| 353 |
if not self.connection:
|
| 354 |
return
|
| 355 |
+
|
| 356 |
input_sample_rate, audio_frame = frame
|
| 357 |
|
| 358 |
#Reshape if needed
|
|
|
|
| 374 |
# Cast if needed
|
| 375 |
audio_frame = audio_to_int16(audio_frame)
|
| 376 |
|
| 377 |
+
# Send to OpenAI
|
| 378 |
audio_message = base64.b64encode(audio_frame.tobytes()).decode("utf-8")
|
| 379 |
await self.connection.input_audio_buffer.append(audio=audio_message)
|
| 380 |
|
src/reachy_mini_conversation_app/utils.py
CHANGED
|
@@ -25,6 +25,18 @@ def parse_args() -> argparse.Namespace:
|
|
| 25 |
)
|
| 26 |
parser.add_argument("--gradio", default=False, action="store_true", help="Open gradio interface")
|
| 27 |
parser.add_argument("--debug", default=False, action="store_true", help="Enable debug logging")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
return parser.parse_args()
|
| 29 |
|
| 30 |
|
|
|
|
| 25 |
)
|
| 26 |
parser.add_argument("--gradio", default=False, action="store_true", help="Open gradio interface")
|
| 27 |
parser.add_argument("--debug", default=False, action="store_true", help="Enable debug logging")
|
| 28 |
+
parser.add_argument(
|
| 29 |
+
"--wireless-version",
|
| 30 |
+
default=False,
|
| 31 |
+
action="store_true",
|
| 32 |
+
help="Use WebRTC backend for wireless version of the robot",
|
| 33 |
+
)
|
| 34 |
+
parser.add_argument(
|
| 35 |
+
"--on-device",
|
| 36 |
+
default=False,
|
| 37 |
+
action="store_true",
|
| 38 |
+
help="Use when conversation app is running on the same device as Reachy Mini daemon",
|
| 39 |
+
)
|
| 40 |
return parser.parse_args()
|
| 41 |
|
| 42 |
|
uv.lock
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|