Spaces:

pollen-robotics
/

reachy_mini_conversation_app

Running

App Files Files Community

Alina commited on 7 days ago

Commit

6502183

unverified ·

1 Parent(s): fdb5f7f

Add wireless version support (#133)

Browse files

Files changed (7) hide show

README.md +7 -0
pyproject.toml +5 -1
src/reachy_mini_conversation_app/console.py +9 -4
src/reachy_mini_conversation_app/main.py +17 -1
src/reachy_mini_conversation_app/openai_realtime.py +8 -2
src/reachy_mini_conversation_app/utils.py +12 -0
uv.lock +0 -0

README.md CHANGED Viewed

@@ -118,6 +118,7 @@ By default, the app runs in console mode for direct audio interaction. Use the `
 | `--local-vision` | `False` | Use local vision model (SmolVLM2) for periodic image processing instead of gpt-realtime vision. Requires `local_vision` extra to be installed. |
 | `--gradio` | `False` | Launch the Gradio web UI. Without this flag, runs in console mode. Required when running in simulation mode. |
 | `--debug` | `False` | Enable verbose logging for troubleshooting. |
 ### Examples
@@ -133,6 +134,12 @@ By default, the app runs in console mode for direct audio interaction. Use the `
   reachy-mini-conversation-app --local-vision
   ```
 - Disable the camera pipeline (audio-only conversation):
   ```bash

 | `--local-vision` | `False` | Use local vision model (SmolVLM2) for periodic image processing instead of gpt-realtime vision. Requires `local_vision` extra to be installed. |
 | `--gradio` | `False` | Launch the Gradio web UI. Without this flag, runs in console mode. Required when running in simulation mode. |
 | `--debug` | `False` | Enable verbose logging for troubleshooting. |
+| `--wireless-version` | `False` | Use GStreamer backend for wireless version of the robot. Requires `reachy_mini_wireless` extra to be installed.
 ### Examples
   reachy-mini-conversation-app --local-vision
   ```
+- Run with wireless support (requires `reachy_mini_wireless` extra and daemon started with `--wireless-version`):
+  ```bash
+  reachy-mini-conversation-app --wireless-version
+  ```
 - Disable the camera pipeline (audio-only conversation):
   ```bash

pyproject.toml CHANGED Viewed

@@ -26,9 +26,13 @@ dependencies = [
     #Reachy mini
     "reachy_mini_dances_library",
     "reachy_mini_toolbox",
-    "reachy_mini>=1.1.2",
 ]
 [project.optional-dependencies]
 reachy_mini_wireless = [
   "PyGObject>=3.42.2,<=3.46.0",

     #Reachy mini
     "reachy_mini_dances_library",
     "reachy_mini_toolbox",
+    "reachy_mini",
+    "eclipse-zenoh~=1.7.0",
 ]
+[tool.uv.sources]
+reachy_mini = { git = "https://github.com/pollen-robotics/reachy_mini.git", branch = "487-fix-zenoh-config" }
 [project.optional-dependencies]
 reachy_mini_wireless = [
   "PyGObject>=3.42.2,<=3.46.0",

src/reachy_mini_conversation_app/console.py CHANGED Viewed

@@ -12,6 +12,7 @@ from fastrtc import AdditionalOutputs, audio_to_float32
 from scipy.signal import resample
 from reachy_mini import ReachyMini
 from reachy_mini_conversation_app.openai_realtime import OpenaiRealtimeHandler
@@ -75,17 +76,21 @@ class LocalStream:
     def clear_audio_queue(self) -> None:
         """Flush the player's appsrc to drop any queued audio immediately."""
         logger.info("User intervention: flushing player queue")
         self.handler.output_queue = asyncio.Queue()
     async def record_loop(self) -> None:
         """Read mic frames from the recorder and forward them to the handler."""
-        logger.info("Starting receive loop")
         while not self._stop_event.is_set():
             audio_frame = self._robot.media.get_audio_sample()
             if audio_frame is not None:
-                await self.handler.receive((self._robot.media.get_input_audio_samplerate(), audio_frame))
-            await asyncio.sleep(0.01)  # avoid busy loop
     async def play_loop(self) -> None:
         """Fetch outputs from the handler: log text and play audio frames."""

 from scipy.signal import resample
 from reachy_mini import ReachyMini
+from reachy_mini.media.media_manager import MediaBackend
 from reachy_mini_conversation_app.openai_realtime import OpenaiRealtimeHandler
     def clear_audio_queue(self) -> None:
         """Flush the player's appsrc to drop any queued audio immediately."""
         logger.info("User intervention: flushing player queue")
+        if self._robot.media.backend == MediaBackend.GSTREAMER:
+            # Directly flush gstreamer audio pipe
+            self._robot.media.audio.clear_player()
         self.handler.output_queue = asyncio.Queue()
     async def record_loop(self) -> None:
         """Read mic frames from the recorder and forward them to the handler."""
+        input_sample_rate = self._robot.media.get_input_audio_samplerate()
+        logger.debug(f"Audio recording started at {input_sample_rate} Hz")
         while not self._stop_event.is_set():
             audio_frame = self._robot.media.get_audio_sample()
             if audio_frame is not None:
+                await self.handler.receive((input_sample_rate, audio_frame))
+            await asyncio.sleep(0)  # avoid busy loop
     async def play_loop(self) -> None:
         """Fetch outputs from the handler: log text and play audio frames."""

src/reachy_mini_conversation_app/main.py CHANGED Viewed

@@ -37,7 +37,23 @@ def main() -> None:
     if args.no_camera and args.head_tracker is not None:
         logger.warning("Head tracking is not activated due to --no-camera.")
-    robot = ReachyMini()
     # Check if running in simulation mode without --gradio
     if robot.client.get_status()["simulation_enabled"] and not args.gradio:

     if args.no_camera and args.head_tracker is not None:
         logger.warning("Head tracking is not activated due to --no-camera.")
+    # Initialize robot with appropriate backend
+    # TODO: Implement dynamic robot connection detection
+    # Automatically detect and connect to available Reachy Mini robot(s!)
+    # Priority checks (in order):
+    #   1. Reachy Lite connected directly to the host
+    #   2. Reachy Mini daemon running on localhost (same device)
+    #   3. Reachy Mini daemon on local network (same subnet)
+    if args.wireless_version and not args.on_device:
+        logger.info("Using WebRTC backend for fully remote wireless version")
+        robot = ReachyMini(media_backend="webrtc", localhost_only=False)
+    elif args.wireless_version and args.on_device:
+        logger.info("Using GStreamer backend for on-device wireless version")
+        robot = ReachyMini(media_backend="gstreamer")
+    else:
+        logger.info("Using default backend for lite version")
+        robot = ReachyMini(media_backend="default")
     # Check if running in simulation mode without --gradio
     if robot.client.get_status()["simulation_enabled"] and not args.gradio:

src/reachy_mini_conversation_app/openai_realtime.py CHANGED Viewed

@@ -340,14 +340,19 @@ class OpenaiRealtimeHandler(AsyncStreamHandler):
     # Microphone receive
     async def receive(self, frame: Tuple[int, NDArray[np.int16]]) -> None:
-        """Receive audio frame from the microphone and send it to the openai server.
         Args:
-            frame: A tuple containing the sample rate and the audio frame.
         """
         if not self.connection:
             return
         input_sample_rate, audio_frame = frame
         #Reshape if needed
@@ -369,6 +374,7 @@ class OpenaiRealtimeHandler(AsyncStreamHandler):
         # Cast if needed
         audio_frame = audio_to_int16(audio_frame)
         audio_message = base64.b64encode(audio_frame.tobytes()).decode("utf-8")
         await self.connection.input_audio_buffer.append(audio=audio_message)

     # Microphone receive
     async def receive(self, frame: Tuple[int, NDArray[np.int16]]) -> None:
+        """Receive audio frame from the microphone and send it to the OpenAI server.
+        Handles both mono and stereo audio formats, converting to the expected
+        mono format for OpenAI's API. Resamples if the input sample rate differs
+        from the expected rate.
         Args:
+            frame: A tuple containing (sample_rate, audio_data).
         """
         if not self.connection:
             return
         input_sample_rate, audio_frame = frame
         #Reshape if needed
         # Cast if needed
         audio_frame = audio_to_int16(audio_frame)
+        # Send to OpenAI
         audio_message = base64.b64encode(audio_frame.tobytes()).decode("utf-8")
         await self.connection.input_audio_buffer.append(audio=audio_message)

src/reachy_mini_conversation_app/utils.py CHANGED Viewed

@@ -25,6 +25,18 @@ def parse_args() -> argparse.Namespace:
     )
     parser.add_argument("--gradio", default=False, action="store_true", help="Open gradio interface")
     parser.add_argument("--debug", default=False, action="store_true", help="Enable debug logging")
     return parser.parse_args()

     )
     parser.add_argument("--gradio", default=False, action="store_true", help="Open gradio interface")
     parser.add_argument("--debug", default=False, action="store_true", help="Enable debug logging")
+    parser.add_argument(
+        "--wireless-version",
+        default=False,
+        action="store_true",
+        help="Use WebRTC backend for wireless version of the robot",
+    )
+    parser.add_argument(
+        "--on-device",
+        default=False,
+        action="store_true",
+        help="Use when conversation app is running on the same device as Reachy Mini daemon",
+    )
     return parser.parse_args()

uv.lock CHANGED Viewed

The diff for this file is too large to render. See raw diff