Spaces:

pollen-robotics
/

reachy_mini_conversation_app

Running

App Files Files Community

apirrone commited on 3 days ago

Commit

ccacef2

2 Parent(s): 11d0943 87212d4

Merge branch 'develop' into 62-spinoff-personalities

Browse files

Files changed (7) hide show

README.md +7 -0
pyproject.toml +2 -1
src/reachy_mini_conversation_app/console.py +9 -4
src/reachy_mini_conversation_app/main.py +17 -1
src/reachy_mini_conversation_app/openai_realtime.py +8 -2
src/reachy_mini_conversation_app/utils.py +12 -0
uv.lock +0 -0

README.md CHANGED Viewed

@@ -131,6 +131,7 @@ By default, the app runs in console mode for direct audio interaction. Use the `
 | `--local-vision` | `False` | Use local vision model (SmolVLM2) for periodic image processing instead of gpt-realtime vision. Requires `local_vision` extra to be installed. |
 | `--gradio` | `False` | Launch the Gradio web UI. Without this flag, runs in console mode. Required when running in simulation mode. |
 | `--debug` | `False` | Enable verbose logging for troubleshooting. |
 ### Examples
@@ -146,6 +147,12 @@ By default, the app runs in console mode for direct audio interaction. Use the `
   reachy-mini-conversation-app --local-vision
   ```
 - Disable the camera pipeline (audio-only conversation):
   ```bash

 | `--local-vision` | `False` | Use local vision model (SmolVLM2) for periodic image processing instead of gpt-realtime vision. Requires `local_vision` extra to be installed. |
 | `--gradio` | `False` | Launch the Gradio web UI. Without this flag, runs in console mode. Required when running in simulation mode. |
 | `--debug` | `False` | Enable verbose logging for troubleshooting. |
+| `--wireless-version` | `False` | Use GStreamer backend for wireless version of the robot. Requires `reachy_mini_wireless` extra to be installed.
 ### Examples
   reachy-mini-conversation-app --local-vision
   ```
+- Run with wireless support (requires `reachy_mini_wireless` extra and daemon started with `--wireless-version`):
+  ```bash
+  reachy-mini-conversation-app --wireless-version
+  ```
 - Disable the camera pipeline (audio-only conversation):
   ```bash

pyproject.toml CHANGED Viewed

@@ -26,7 +26,8 @@ dependencies = [
     #Reachy mini
     "reachy_mini_dances_library",
     "reachy_mini_toolbox",
-    "reachy_mini>=1.1.2",
 ]
 [project.optional-dependencies]

     #Reachy mini
     "reachy_mini_dances_library",
     "reachy_mini_toolbox",
+    "reachy_mini>=1.1.3",
+    "eclipse-zenoh~=1.7.0",
 ]
 [project.optional-dependencies]

src/reachy_mini_conversation_app/console.py CHANGED Viewed

@@ -12,6 +12,7 @@ from fastrtc import AdditionalOutputs, audio_to_float32
 from scipy.signal import resample
 from reachy_mini import ReachyMini
 from reachy_mini_conversation_app.openai_realtime import OpenaiRealtimeHandler
@@ -75,17 +76,21 @@ class LocalStream:
     def clear_audio_queue(self) -> None:
         """Flush the player's appsrc to drop any queued audio immediately."""
         logger.info("User intervention: flushing player queue")
         self.handler.output_queue = asyncio.Queue()
     async def record_loop(self) -> None:
         """Read mic frames from the recorder and forward them to the handler."""
-        logger.info("Starting receive loop")
         while not self._stop_event.is_set():
             audio_frame = self._robot.media.get_audio_sample()
             if audio_frame is not None:
-                await self.handler.receive((self._robot.media.get_input_audio_samplerate(), audio_frame))
-            await asyncio.sleep(0.01)  # avoid busy loop
     async def play_loop(self) -> None:
         """Fetch outputs from the handler: log text and play audio frames."""

 from scipy.signal import resample
 from reachy_mini import ReachyMini
+from reachy_mini.media.media_manager import MediaBackend
 from reachy_mini_conversation_app.openai_realtime import OpenaiRealtimeHandler
     def clear_audio_queue(self) -> None:
         """Flush the player's appsrc to drop any queued audio immediately."""
         logger.info("User intervention: flushing player queue")
+        if self._robot.media.backend == MediaBackend.GSTREAMER:
+            # Directly flush gstreamer audio pipe
+            self._robot.media.audio.clear_player()
         self.handler.output_queue = asyncio.Queue()
     async def record_loop(self) -> None:
         """Read mic frames from the recorder and forward them to the handler."""
+        input_sample_rate = self._robot.media.get_input_audio_samplerate()
+        logger.debug(f"Audio recording started at {input_sample_rate} Hz")
         while not self._stop_event.is_set():
             audio_frame = self._robot.media.get_audio_sample()
             if audio_frame is not None:
+                await self.handler.receive((input_sample_rate, audio_frame))
+            await asyncio.sleep(0)  # avoid busy loop
     async def play_loop(self) -> None:
         """Fetch outputs from the handler: log text and play audio frames."""

src/reachy_mini_conversation_app/main.py CHANGED Viewed

@@ -56,7 +56,23 @@ def run(
         logger.warning("Head tracking is not activated due to --no-camera.")
     if robot is None:
-        robot = ReachyMini()
     # Check if running in simulation mode without --gradio
     if robot.client.get_status()["simulation_enabled"] and not args.gradio:

         logger.warning("Head tracking is not activated due to --no-camera.")
     if robot is None:
+        # Initialize robot with appropriate backend
+        # TODO: Implement dynamic robot connection detection
+        # Automatically detect and connect to available Reachy Mini robot(s!)
+        # Priority checks (in order):
+        #   1. Reachy Lite connected directly to the host
+        #   2. Reachy Mini daemon running on localhost (same device)
+        #   3. Reachy Mini daemon on local network (same subnet)
+        if args.wireless_version and not args.on_device:
+            logger.info("Using WebRTC backend for fully remote wireless version")
+            robot = ReachyMini(media_backend="webrtc", localhost_only=False)
+        elif args.wireless_version and args.on_device:
+            logger.info("Using GStreamer backend for on-device wireless version")
+            robot = ReachyMini(media_backend="gstreamer")
+        else:
+            logger.info("Using default backend for lite version")
+            robot = ReachyMini(media_backend="default")
     # Check if running in simulation mode without --gradio
     if robot.client.get_status()["simulation_enabled"] and not args.gradio:

src/reachy_mini_conversation_app/openai_realtime.py CHANGED Viewed

@@ -400,14 +400,19 @@ class OpenaiRealtimeHandler(AsyncStreamHandler):
     # Microphone receive
     async def receive(self, frame: Tuple[int, NDArray[np.int16]]) -> None:
-        """Receive audio frame from the microphone and send it to the openai server.
         Args:
-            frame: A tuple containing the sample rate and the audio frame.
         """
         if not self.connection:
             return
         input_sample_rate, audio_frame = frame
         # Reshape if needed
@@ -426,6 +431,7 @@ class OpenaiRealtimeHandler(AsyncStreamHandler):
         # Cast if needed
         audio_frame = audio_to_int16(audio_frame)
         audio_message = base64.b64encode(audio_frame.tobytes()).decode("utf-8")
         await self.connection.input_audio_buffer.append(audio=audio_message)

     # Microphone receive
     async def receive(self, frame: Tuple[int, NDArray[np.int16]]) -> None:
+        """Receive audio frame from the microphone and send it to the OpenAI server.
+        Handles both mono and stereo audio formats, converting to the expected
+        mono format for OpenAI's API. Resamples if the input sample rate differs
+        from the expected rate.
         Args:
+            frame: A tuple containing (sample_rate, audio_data).
         """
         if not self.connection:
             return
         input_sample_rate, audio_frame = frame
         # Reshape if needed
         # Cast if needed
         audio_frame = audio_to_int16(audio_frame)
+        # Send to OpenAI
         audio_message = base64.b64encode(audio_frame.tobytes()).decode("utf-8")
         await self.connection.input_audio_buffer.append(audio=audio_message)

src/reachy_mini_conversation_app/utils.py CHANGED Viewed

@@ -25,6 +25,18 @@ def parse_args() -> argparse.Namespace:
     )
     parser.add_argument("--gradio", default=False, action="store_true", help="Open gradio interface")
     parser.add_argument("--debug", default=False, action="store_true", help="Enable debug logging")
     return parser.parse_known_args()

     )
     parser.add_argument("--gradio", default=False, action="store_true", help="Open gradio interface")
     parser.add_argument("--debug", default=False, action="store_true", help="Enable debug logging")
+    parser.add_argument(
+        "--wireless-version",
+        default=False,
+        action="store_true",
+        help="Use WebRTC backend for wireless version of the robot",
+    )
+    parser.add_argument(
+        "--on-device",
+        default=False,
+        action="store_true",
+        help="Use when conversation app is running on the same device as Reachy Mini daemon",
+    )
     return parser.parse_known_args()

uv.lock CHANGED Viewed

The diff for this file is too large to render. See raw diff