Alina Lozovskaya commited on
Commit
1d985fe
·
1 Parent(s): 69601f9

Apply Ruff for better formatting

Browse files
src/reachy_mini_conversation_demo/audio_sway.py CHANGED
@@ -2,7 +2,7 @@ from __future__ import annotations
2
 
3
  import asyncio
4
  import base64
5
- from dataclasses import dataclass, field
6
  from typing import Callable, Optional, Tuple
7
 
8
  import numpy as np
 
2
 
3
  import asyncio
4
  import base64
5
+ from dataclasses import dataclass
6
  from typing import Callable, Optional, Tuple
7
 
8
  import numpy as np
src/reachy_mini_conversation_demo/main.py CHANGED
@@ -100,7 +100,9 @@ elif HEAD_TRACKING and SIM:
100
  else:
101
  logger.warning("Head tracking disabled")
102
 
103
- movement_manager = MovementManager(current_robot=current_robot, head_tracker=head_tracker, camera=camera)
 
 
104
  robot_is_speaking = asyncio.Event()
105
  speaking_queue = asyncio.Queue()
106
 
@@ -410,7 +412,7 @@ async def control_mic_loop():
410
  audio_sync.on_response_completed()
411
  await asyncio.sleep(0)
412
  continue
413
-
414
  await asyncio.sleep(block_time)
415
 
416
 
@@ -423,7 +425,7 @@ async def main():
423
  recorder.record()
424
  player = GstPlayer()
425
  player.play()
426
-
427
  movement_manager.set_neutral()
428
  logger.info("Starting main audio loop. You can start to speak")
429
 
@@ -432,12 +434,16 @@ async def main():
432
  asyncio.create_task(emit_loop(player, openai), name="emit"),
433
  asyncio.create_task(receive_loop(recorder, openai), name="recv"),
434
  asyncio.create_task(control_mic_loop(), name="mic-mute"),
435
- asyncio.create_task(movement_manager.enable(stop_event=stop_event), name="move"),
 
 
436
  ]
437
 
438
  if vision_manager:
439
  tasks.append(
440
- asyncio.create_task(vision_manager.enable(stop_event=stop_event), name="vision"),
 
 
441
  )
442
 
443
  try:
@@ -445,7 +451,7 @@ async def main():
445
  except asyncio.CancelledError:
446
  logger.info("Shutting down")
447
  stop_event.set()
448
-
449
  if camera:
450
  camera.release()
451
 
@@ -453,9 +459,10 @@ async def main():
453
  movement_manager.set_neutral()
454
  recorder.stop()
455
  player.stop()
456
-
457
  current_robot.client.disconnect()
458
  logger.info("Stopped, robot disconected")
459
 
 
460
  if __name__ == "__main__":
461
  asyncio.run(main())
 
100
  else:
101
  logger.warning("Head tracking disabled")
102
 
103
+ movement_manager = MovementManager(
104
+ current_robot=current_robot, head_tracker=head_tracker, camera=camera
105
+ )
106
  robot_is_speaking = asyncio.Event()
107
  speaking_queue = asyncio.Queue()
108
 
 
412
  audio_sync.on_response_completed()
413
  await asyncio.sleep(0)
414
  continue
415
+
416
  await asyncio.sleep(block_time)
417
 
418
 
 
425
  recorder.record()
426
  player = GstPlayer()
427
  player.play()
428
+
429
  movement_manager.set_neutral()
430
  logger.info("Starting main audio loop. You can start to speak")
431
 
 
434
  asyncio.create_task(emit_loop(player, openai), name="emit"),
435
  asyncio.create_task(receive_loop(recorder, openai), name="recv"),
436
  asyncio.create_task(control_mic_loop(), name="mic-mute"),
437
+ asyncio.create_task(
438
+ movement_manager.enable(stop_event=stop_event), name="move"
439
+ ),
440
  ]
441
 
442
  if vision_manager:
443
  tasks.append(
444
+ asyncio.create_task(
445
+ vision_manager.enable(stop_event=stop_event), name="vision"
446
+ ),
447
  )
448
 
449
  try:
 
451
  except asyncio.CancelledError:
452
  logger.info("Shutting down")
453
  stop_event.set()
454
+
455
  if camera:
456
  camera.release()
457
 
 
459
  movement_manager.set_neutral()
460
  recorder.stop()
461
  player.stop()
462
+
463
  current_robot.client.disconnect()
464
  logger.info("Stopped, robot disconected")
465
 
466
+
467
  if __name__ == "__main__":
468
  asyncio.run(main())
src/reachy_mini_conversation_demo/movement.py CHANGED
@@ -15,7 +15,12 @@ logger = logging.getLogger(__name__)
15
 
16
 
17
  class MovementManager:
18
- def __init__(self, current_robot: ReachyMini, head_tracker: HeadTracker | None, camera: cv2.VideoCapture| None):
 
 
 
 
 
19
  self.current_robot = current_robot
20
  self.head_tracker = head_tracker
21
  self.camera = camera
@@ -25,21 +30,20 @@ class MovementManager:
25
  self.moving_start = time.monotonic()
26
  self.moving_for = 0.0
27
  self.speech_head_offsets = [0.0] * 6
28
- self.movement_loop_sleep = 0.05 # seconds
29
 
30
  def set_offsets(self, offsets: list[float]) -> None:
31
  """Used by AudioSync callback to update speech offsets"""
32
  self.speech_head_offsets = list(offsets)
33
 
34
  def set_neutral(self) -> None:
35
- """Set neutral robot position """
36
  self.speech_head_offsets = [0.0] * 6
37
  self.current_head_pose = create_head_pose(0, 0, 0, 0, 0, 0, degrees=True)
38
  self.current_robot.set_target(head=self.current_head_pose, antennas=(0.0, 0.0))
39
 
40
  def reset_head_pose(self) -> None:
41
  self.current_head_pose = np.eye(4)
42
-
43
 
44
  async def enable(self, stop_event: threading.Event) -> None:
45
  logger.info("Starting head movement loop")
@@ -56,7 +60,9 @@ class MovementManager:
56
  logger.warning("Camera read failed")
57
  last_log_ts = current_time
58
  else:
59
- eye_center, _ = self.head_tracker.get_head_position(im) # as [-1, 1]
 
 
60
 
61
  if eye_center is not None:
62
  # Rescale target position into IMAGE_SIZE coordinates
@@ -68,22 +74,20 @@ class MovementManager:
68
  # Bounds checking
69
  eye_center = np.clip(eye_center, [0, 0], [w - 1, h - 1])
70
 
71
- current_head_pose = (
72
- self.current_robot.look_at_image(
73
- *eye_center, duration=0.0, apply=False
74
- )
75
  )
76
 
77
  self.current_head_pose = current_head_pose
78
  # Pose calculation
79
  try:
80
- current_x, current_y, current_z = self.current_head_pose[
81
- :3, 3
82
- ]
83
 
84
- current_roll, current_pitch, current_yaw = scipy.spatial.transform.Rotation.from_matrix(
85
- self.current_head_pose[:3, :3]
86
- ).as_euler("xyz", degrees=False)
 
 
87
 
88
  if debug_frame_count % 50 == 0:
89
  logger.debug(
@@ -102,15 +106,11 @@ class MovementManager:
102
  except Exception as e:
103
  logger.exception("Invalid pose; resetting")
104
  self.reset_head_pose()
105
- current_x, current_y, current_z = self.current_head_pose[
106
- :3, 3
107
- ]
108
  current_roll = current_pitch = current_yaw = 0.0
109
 
110
  # Movement check
111
- is_moving = (
112
- time.monotonic() - self.moving_start < self.moving_for
113
- )
114
 
115
  if debug_frame_count % 50 == 0:
116
  logger.debug(f"Robot moving: {is_moving}")
@@ -133,9 +133,7 @@ class MovementManager:
133
  logger.debug(
134
  "Final head pose with offsets: %s", head_pose[:3, 3]
135
  )
136
- logger.debug(
137
- "Speech offsets: %s", self.speech_head_offsets
138
- )
139
 
140
  self.current_robot.set_target(head=head_pose, antennas=(0.0, 0.0))
141
 
@@ -146,5 +144,5 @@ class MovementManager:
146
  logger.debug("Failed to set robot target: %s", e)
147
 
148
  await asyncio.sleep(self.movement_loop_sleep)
149
-
150
  logger.info("Exited head movement loop")
 
15
 
16
 
17
  class MovementManager:
18
+ def __init__(
19
+ self,
20
+ current_robot: ReachyMini,
21
+ head_tracker: HeadTracker | None,
22
+ camera: cv2.VideoCapture | None,
23
+ ):
24
  self.current_robot = current_robot
25
  self.head_tracker = head_tracker
26
  self.camera = camera
 
30
  self.moving_start = time.monotonic()
31
  self.moving_for = 0.0
32
  self.speech_head_offsets = [0.0] * 6
33
+ self.movement_loop_sleep = 0.05 # seconds
34
 
35
  def set_offsets(self, offsets: list[float]) -> None:
36
  """Used by AudioSync callback to update speech offsets"""
37
  self.speech_head_offsets = list(offsets)
38
 
39
  def set_neutral(self) -> None:
40
+ """Set neutral robot position"""
41
  self.speech_head_offsets = [0.0] * 6
42
  self.current_head_pose = create_head_pose(0, 0, 0, 0, 0, 0, degrees=True)
43
  self.current_robot.set_target(head=self.current_head_pose, antennas=(0.0, 0.0))
44
 
45
  def reset_head_pose(self) -> None:
46
  self.current_head_pose = np.eye(4)
 
47
 
48
  async def enable(self, stop_event: threading.Event) -> None:
49
  logger.info("Starting head movement loop")
 
60
  logger.warning("Camera read failed")
61
  last_log_ts = current_time
62
  else:
63
+ eye_center, _ = self.head_tracker.get_head_position(
64
+ im
65
+ ) # as [-1, 1]
66
 
67
  if eye_center is not None:
68
  # Rescale target position into IMAGE_SIZE coordinates
 
74
  # Bounds checking
75
  eye_center = np.clip(eye_center, [0, 0], [w - 1, h - 1])
76
 
77
+ current_head_pose = self.current_robot.look_at_image(
78
+ *eye_center, duration=0.0, apply=False
 
 
79
  )
80
 
81
  self.current_head_pose = current_head_pose
82
  # Pose calculation
83
  try:
84
+ current_x, current_y, current_z = self.current_head_pose[:3, 3]
 
 
85
 
86
+ current_roll, current_pitch, current_yaw = (
87
+ scipy.spatial.transform.Rotation.from_matrix(
88
+ self.current_head_pose[:3, :3]
89
+ ).as_euler("xyz", degrees=False)
90
+ )
91
 
92
  if debug_frame_count % 50 == 0:
93
  logger.debug(
 
106
  except Exception as e:
107
  logger.exception("Invalid pose; resetting")
108
  self.reset_head_pose()
109
+ current_x, current_y, current_z = self.current_head_pose[:3, 3]
 
 
110
  current_roll = current_pitch = current_yaw = 0.0
111
 
112
  # Movement check
113
+ is_moving = time.monotonic() - self.moving_start < self.moving_for
 
 
114
 
115
  if debug_frame_count % 50 == 0:
116
  logger.debug(f"Robot moving: {is_moving}")
 
133
  logger.debug(
134
  "Final head pose with offsets: %s", head_pose[:3, 3]
135
  )
136
+ logger.debug("Speech offsets: %s", self.speech_head_offsets)
 
 
137
 
138
  self.current_robot.set_target(head=head_pose, antennas=(0.0, 0.0))
139
 
 
144
  logger.debug("Failed to set robot target: %s", e)
145
 
146
  await asyncio.sleep(self.movement_loop_sleep)
147
+
148
  logger.info("Exited head movement loop")
src/reachy_mini_conversation_demo/vision.py CHANGED
@@ -239,9 +239,10 @@ class VisionManager:
239
  if current_time - self._last_processed_time >= self.vision_interval:
240
  success, frame = await asyncio.to_thread(self.camera.read)
241
  if success and frame is not None:
242
-
243
- description = await asyncio.to_thread(lambda: self.processor.process_image(
244
- frame, "Briefly describe what you see in one sentence.")
 
245
  )
246
 
247
  # Only update if we got a valid response
@@ -259,7 +260,7 @@ class VisionManager:
259
 
260
  except Exception as e:
261
  logger.exception("Vision processing loop error")
262
- await asyncio.sleep(5.0) # Longer sleep on error
263
 
264
  logger.info(f"Vision loop finished")
265
 
@@ -275,8 +276,10 @@ class VisionManager:
275
  success, frame = self.camera.read()
276
  if not success or frame is None:
277
  return {"error": "Failed to capture image from camera"}
278
-
279
- description = await asyncio.to_thread(lambda: self.processor.process_image(frame, prompt))
 
 
280
 
281
  return {
282
  "description": description,
@@ -287,8 +290,7 @@ class VisionManager:
287
  except Exception as e:
288
  logger.exception("Failed to process current frame")
289
  return {"error": f"Frame processing failed: {str(e)}"}
290
-
291
-
292
  async def get_status(self) -> Dict[str, Any]:
293
  """Get comprehensive status information"""
294
  return {
@@ -303,18 +305,14 @@ class VisionManager:
303
  }
304
 
305
 
306
-
307
- def init_camera(camera_index = 0, simulation=True):
308
-
309
  api_preference = cv2.CAP_AVFOUNDATION if sys.platform == "darwin" else 0
310
 
311
  if simulation:
312
  # Default build-in camera in SIM
313
  # TODO: please, test on Linux and Windows
314
  # TODO simulation in find_camera
315
- camera = cv2.VideoCapture(
316
- 0, api_preference
317
- )
318
  else:
319
  # TODO handle macos in find_camera
320
  if sys.platform == "darwin":
@@ -328,7 +326,6 @@ def init_camera(camera_index = 0, simulation=True):
328
  def init_vision(camera: cv2.VideoCapture) -> VisionManager:
329
  model_id = "HuggingFaceTB/SmolVLM2-2.2B-Instruct"
330
 
331
-
332
  cache_dir = os.path.expandvars(os.getenv("HF_HOME", "$HOME/.cache/huggingface"))
333
 
334
  try:
@@ -362,7 +359,7 @@ def init_vision(camera: cv2.VideoCapture) -> VisionManager:
362
 
363
  device_info = vision_manager.processor.get_model_info()
364
  logger.info(
365
- f"Vision processing enabled: {device_info["model_path"]} on {device_info["device"]} memory: {device_info.get("gpu_memory", "N/A")}",
366
  )
367
 
368
  return vision_manager
 
239
  if current_time - self._last_processed_time >= self.vision_interval:
240
  success, frame = await asyncio.to_thread(self.camera.read)
241
  if success and frame is not None:
242
+ description = await asyncio.to_thread(
243
+ lambda: self.processor.process_image(
244
+ frame, "Briefly describe what you see in one sentence."
245
+ )
246
  )
247
 
248
  # Only update if we got a valid response
 
260
 
261
  except Exception as e:
262
  logger.exception("Vision processing loop error")
263
+ await asyncio.sleep(5.0) # Longer sleep on error
264
 
265
  logger.info(f"Vision loop finished")
266
 
 
276
  success, frame = self.camera.read()
277
  if not success or frame is None:
278
  return {"error": "Failed to capture image from camera"}
279
+
280
+ description = await asyncio.to_thread(
281
+ lambda: self.processor.process_image(frame, prompt)
282
+ )
283
 
284
  return {
285
  "description": description,
 
290
  except Exception as e:
291
  logger.exception("Failed to process current frame")
292
  return {"error": f"Frame processing failed: {str(e)}"}
293
+
 
294
  async def get_status(self) -> Dict[str, Any]:
295
  """Get comprehensive status information"""
296
  return {
 
305
  }
306
 
307
 
308
+ def init_camera(camera_index=0, simulation=True):
 
 
309
  api_preference = cv2.CAP_AVFOUNDATION if sys.platform == "darwin" else 0
310
 
311
  if simulation:
312
  # Default build-in camera in SIM
313
  # TODO: please, test on Linux and Windows
314
  # TODO simulation in find_camera
315
+ camera = cv2.VideoCapture(0, api_preference)
 
 
316
  else:
317
  # TODO handle macos in find_camera
318
  if sys.platform == "darwin":
 
326
  def init_vision(camera: cv2.VideoCapture) -> VisionManager:
327
  model_id = "HuggingFaceTB/SmolVLM2-2.2B-Instruct"
328
 
 
329
  cache_dir = os.path.expandvars(os.getenv("HF_HOME", "$HOME/.cache/huggingface"))
330
 
331
  try:
 
359
 
360
  device_info = vision_manager.processor.get_model_info()
361
  logger.info(
362
+ f"Vision processing enabled: {device_info['model_path']} on {device_info['device']} memory: {device_info.get('gpu_memory', 'N/A')}",
363
  )
364
 
365
  return vision_manager