Alina commited on
Commit
6502183
·
unverified ·
1 Parent(s): fdb5f7f

Add wireless version support (#133)

Browse files
README.md CHANGED
@@ -118,6 +118,7 @@ By default, the app runs in console mode for direct audio interaction. Use the `
118
  | `--local-vision` | `False` | Use local vision model (SmolVLM2) for periodic image processing instead of gpt-realtime vision. Requires `local_vision` extra to be installed. |
119
  | `--gradio` | `False` | Launch the Gradio web UI. Without this flag, runs in console mode. Required when running in simulation mode. |
120
  | `--debug` | `False` | Enable verbose logging for troubleshooting. |
 
121
 
122
 
123
  ### Examples
@@ -133,6 +134,12 @@ By default, the app runs in console mode for direct audio interaction. Use the `
133
  reachy-mini-conversation-app --local-vision
134
  ```
135
 
 
 
 
 
 
 
136
  - Disable the camera pipeline (audio-only conversation):
137
 
138
  ```bash
 
118
  | `--local-vision` | `False` | Use local vision model (SmolVLM2) for periodic image processing instead of gpt-realtime vision. Requires `local_vision` extra to be installed. |
119
  | `--gradio` | `False` | Launch the Gradio web UI. Without this flag, runs in console mode. Required when running in simulation mode. |
120
  | `--debug` | `False` | Enable verbose logging for troubleshooting. |
121
+ | `--wireless-version` | `False` | Use GStreamer backend for wireless version of the robot. Requires `reachy_mini_wireless` extra to be installed.
122
 
123
 
124
  ### Examples
 
134
  reachy-mini-conversation-app --local-vision
135
  ```
136
 
137
+ - Run with wireless support (requires `reachy_mini_wireless` extra and daemon started with `--wireless-version`):
138
+
139
+ ```bash
140
+ reachy-mini-conversation-app --wireless-version
141
+ ```
142
+
143
  - Disable the camera pipeline (audio-only conversation):
144
 
145
  ```bash
pyproject.toml CHANGED
@@ -26,9 +26,13 @@ dependencies = [
26
  #Reachy mini
27
  "reachy_mini_dances_library",
28
  "reachy_mini_toolbox",
29
- "reachy_mini>=1.1.2",
 
30
  ]
31
 
 
 
 
32
  [project.optional-dependencies]
33
  reachy_mini_wireless = [
34
  "PyGObject>=3.42.2,<=3.46.0",
 
26
  #Reachy mini
27
  "reachy_mini_dances_library",
28
  "reachy_mini_toolbox",
29
+ "reachy_mini",
30
+ "eclipse-zenoh~=1.7.0",
31
  ]
32
 
33
+ [tool.uv.sources]
34
+ reachy_mini = { git = "https://github.com/pollen-robotics/reachy_mini.git", branch = "487-fix-zenoh-config" }
35
+
36
  [project.optional-dependencies]
37
  reachy_mini_wireless = [
38
  "PyGObject>=3.42.2,<=3.46.0",
src/reachy_mini_conversation_app/console.py CHANGED
@@ -12,6 +12,7 @@ from fastrtc import AdditionalOutputs, audio_to_float32
12
  from scipy.signal import resample
13
 
14
  from reachy_mini import ReachyMini
 
15
  from reachy_mini_conversation_app.openai_realtime import OpenaiRealtimeHandler
16
 
17
 
@@ -75,17 +76,21 @@ class LocalStream:
75
  def clear_audio_queue(self) -> None:
76
  """Flush the player's appsrc to drop any queued audio immediately."""
77
  logger.info("User intervention: flushing player queue")
 
 
 
78
  self.handler.output_queue = asyncio.Queue()
79
 
80
  async def record_loop(self) -> None:
81
  """Read mic frames from the recorder and forward them to the handler."""
82
- logger.info("Starting receive loop")
 
 
83
  while not self._stop_event.is_set():
84
  audio_frame = self._robot.media.get_audio_sample()
85
  if audio_frame is not None:
86
- await self.handler.receive((self._robot.media.get_input_audio_samplerate(), audio_frame))
87
-
88
- await asyncio.sleep(0.01) # avoid busy loop
89
 
90
  async def play_loop(self) -> None:
91
  """Fetch outputs from the handler: log text and play audio frames."""
 
12
  from scipy.signal import resample
13
 
14
  from reachy_mini import ReachyMini
15
+ from reachy_mini.media.media_manager import MediaBackend
16
  from reachy_mini_conversation_app.openai_realtime import OpenaiRealtimeHandler
17
 
18
 
 
76
  def clear_audio_queue(self) -> None:
77
  """Flush the player's appsrc to drop any queued audio immediately."""
78
  logger.info("User intervention: flushing player queue")
79
+ if self._robot.media.backend == MediaBackend.GSTREAMER:
80
+ # Directly flush gstreamer audio pipe
81
+ self._robot.media.audio.clear_player()
82
  self.handler.output_queue = asyncio.Queue()
83
 
84
  async def record_loop(self) -> None:
85
  """Read mic frames from the recorder and forward them to the handler."""
86
+ input_sample_rate = self._robot.media.get_input_audio_samplerate()
87
+ logger.debug(f"Audio recording started at {input_sample_rate} Hz")
88
+
89
  while not self._stop_event.is_set():
90
  audio_frame = self._robot.media.get_audio_sample()
91
  if audio_frame is not None:
92
+ await self.handler.receive((input_sample_rate, audio_frame))
93
+ await asyncio.sleep(0) # avoid busy loop
 
94
 
95
  async def play_loop(self) -> None:
96
  """Fetch outputs from the handler: log text and play audio frames."""
src/reachy_mini_conversation_app/main.py CHANGED
@@ -37,7 +37,23 @@ def main() -> None:
37
  if args.no_camera and args.head_tracker is not None:
38
  logger.warning("Head tracking is not activated due to --no-camera.")
39
 
40
- robot = ReachyMini()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
  # Check if running in simulation mode without --gradio
43
  if robot.client.get_status()["simulation_enabled"] and not args.gradio:
 
37
  if args.no_camera and args.head_tracker is not None:
38
  logger.warning("Head tracking is not activated due to --no-camera.")
39
 
40
+ # Initialize robot with appropriate backend
41
+ # TODO: Implement dynamic robot connection detection
42
+ # Automatically detect and connect to available Reachy Mini robot(s!)
43
+ # Priority checks (in order):
44
+ # 1. Reachy Lite connected directly to the host
45
+ # 2. Reachy Mini daemon running on localhost (same device)
46
+ # 3. Reachy Mini daemon on local network (same subnet)
47
+
48
+ if args.wireless_version and not args.on_device:
49
+ logger.info("Using WebRTC backend for fully remote wireless version")
50
+ robot = ReachyMini(media_backend="webrtc", localhost_only=False)
51
+ elif args.wireless_version and args.on_device:
52
+ logger.info("Using GStreamer backend for on-device wireless version")
53
+ robot = ReachyMini(media_backend="gstreamer")
54
+ else:
55
+ logger.info("Using default backend for lite version")
56
+ robot = ReachyMini(media_backend="default")
57
 
58
  # Check if running in simulation mode without --gradio
59
  if robot.client.get_status()["simulation_enabled"] and not args.gradio:
src/reachy_mini_conversation_app/openai_realtime.py CHANGED
@@ -340,14 +340,19 @@ class OpenaiRealtimeHandler(AsyncStreamHandler):
340
 
341
  # Microphone receive
342
  async def receive(self, frame: Tuple[int, NDArray[np.int16]]) -> None:
343
- """Receive audio frame from the microphone and send it to the openai server.
 
 
 
 
344
 
345
  Args:
346
- frame: A tuple containing the sample rate and the audio frame.
347
 
348
  """
349
  if not self.connection:
350
  return
 
351
  input_sample_rate, audio_frame = frame
352
 
353
  #Reshape if needed
@@ -369,6 +374,7 @@ class OpenaiRealtimeHandler(AsyncStreamHandler):
369
  # Cast if needed
370
  audio_frame = audio_to_int16(audio_frame)
371
 
 
372
  audio_message = base64.b64encode(audio_frame.tobytes()).decode("utf-8")
373
  await self.connection.input_audio_buffer.append(audio=audio_message)
374
 
 
340
 
341
  # Microphone receive
342
  async def receive(self, frame: Tuple[int, NDArray[np.int16]]) -> None:
343
+ """Receive audio frame from the microphone and send it to the OpenAI server.
344
+
345
+ Handles both mono and stereo audio formats, converting to the expected
346
+ mono format for OpenAI's API. Resamples if the input sample rate differs
347
+ from the expected rate.
348
 
349
  Args:
350
+ frame: A tuple containing (sample_rate, audio_data).
351
 
352
  """
353
  if not self.connection:
354
  return
355
+
356
  input_sample_rate, audio_frame = frame
357
 
358
  #Reshape if needed
 
374
  # Cast if needed
375
  audio_frame = audio_to_int16(audio_frame)
376
 
377
+ # Send to OpenAI
378
  audio_message = base64.b64encode(audio_frame.tobytes()).decode("utf-8")
379
  await self.connection.input_audio_buffer.append(audio=audio_message)
380
 
src/reachy_mini_conversation_app/utils.py CHANGED
@@ -25,6 +25,18 @@ def parse_args() -> argparse.Namespace:
25
  )
26
  parser.add_argument("--gradio", default=False, action="store_true", help="Open gradio interface")
27
  parser.add_argument("--debug", default=False, action="store_true", help="Enable debug logging")
 
 
 
 
 
 
 
 
 
 
 
 
28
  return parser.parse_args()
29
 
30
 
 
25
  )
26
  parser.add_argument("--gradio", default=False, action="store_true", help="Open gradio interface")
27
  parser.add_argument("--debug", default=False, action="store_true", help="Enable debug logging")
28
+ parser.add_argument(
29
+ "--wireless-version",
30
+ default=False,
31
+ action="store_true",
32
+ help="Use WebRTC backend for wireless version of the robot",
33
+ )
34
+ parser.add_argument(
35
+ "--on-device",
36
+ default=False,
37
+ action="store_true",
38
+ help="Use when conversation app is running on the same device as Reachy Mini daemon",
39
+ )
40
  return parser.parse_args()
41
 
42
 
uv.lock CHANGED
The diff for this file is too large to render. See raw diff