Spaces:

pollen-robotics
/

reachy_mini_conversation_app

Running

App Files Files Community

Alina Lozovskaya commited on Oct 14

Commit

7054b54

1 Parent(s): 58b3ac3

Improve variable names

Browse files

Files changed (3) hide show

src/reachy_mini_conversation_demo/console.py +16 -13
src/reachy_mini_conversation_demo/openai_realtime.py +4 -4
src/reachy_mini_conversation_demo/tools.py +19 -15

src/reachy_mini_conversation_demo/console.py CHANGED Viewed

@@ -78,9 +78,9 @@ class LocalStream:
         """Read mic frames from the recorder and forward them to the handler."""
         logger.info("Starting receive loop")
         while not self._stop_event.is_set():
-            data = self._robot.media.get_audio_sample()
-            if data is not None:
-                frame_mono = data.T[0]  # both channels are identical
                 frame = audio_to_int16(frame_mono)
                 await self.handler.receive((16000, frame))
                 # await asyncio.sleep(0)  # yield to event loop
@@ -90,10 +90,10 @@ class LocalStream:
     async def play_loop(self) -> None:
         """Fetch outputs from the handler: log text and play audio frames."""
         while not self._stop_event.is_set():
-            data = await self.handler.emit()
-            if isinstance(data, AdditionalOutputs):
-                for msg in data.args:
                     content = msg.get("content", "")
                     if isinstance(content, str):
                         logger.info(
@@ -102,14 +102,17 @@ class LocalStream:
                             content if len(content) < 500 else content[:500] + "…",
                         )
-            elif isinstance(data, tuple):
-                sample_rate, frame = data
                 device_sample_rate = self._robot.media.get_audio_samplerate()
-                frame = audio_to_float32(frame.squeeze())
-                if sample_rate != device_sample_rate:
-                    frame = librosa.resample(frame, orig_sr=sample_rate, target_sr=device_sample_rate)
-                self._robot.media.push_audio_sample(frame)
-            # else: ignore None/unknown outputs
             await asyncio.sleep(0)  # yield to event loop

         """Read mic frames from the recorder and forward them to the handler."""
         logger.info("Starting receive loop")
         while not self._stop_event.is_set():
+            audio_frame = self._robot.media.get_audio_sample()
+            if audio_frame is not None:
+                frame_mono = audio_frame.T[0]  # both channels are identical
                 frame = audio_to_int16(frame_mono)
                 await self.handler.receive((16000, frame))
                 # await asyncio.sleep(0)  # yield to event loop
     async def play_loop(self) -> None:
         """Fetch outputs from the handler: log text and play audio frames."""
         while not self._stop_event.is_set():
+            handler_output = await self.handler.emit()
+            if isinstance(handler_output, AdditionalOutputs):
+                for msg in handler_output.args:
                     content = msg.get("content", "")
                     if isinstance(content, str):
                         logger.info(
                             content if len(content) < 500 else content[:500] + "…",
                         )
+            elif isinstance(handler_output, tuple):
+                input_sample_rate, audio_frame = handler_output
                 device_sample_rate = self._robot.media.get_audio_samplerate()
+                audio_frame = audio_to_float32(audio_frame.squeeze())
+                if input_sample_rate != device_sample_rate:
+                    audio_frame = librosa.resample(
+                        audio_frame, orig_sr=input_sample_rate, target_sr=device_sample_rate
+                    )
+                self._robot.media.push_audio_sample(audio_frame)
+            else:
+                logger.debug("Ignoring output type=%s", type(handler_output).__name__)
             await asyncio.sleep(0)  # yield to event loop

src/reachy_mini_conversation_demo/openai_realtime.py CHANGED Viewed

@@ -136,11 +136,11 @@ class OpenaiRealtimeHandler(AsyncStreamHandler):
                 # 3) when args done, execute Python tool, send function_call_output, then trigger a new response
                 if event.type == "response.function_call_arguments.done":
                     call_id = getattr(event, "call_id", None)
-                    info = self._pending_calls.get(call_id)
-                    if not info:
                         continue
-                    tool_name = info["name"]
-                    args_json_str = info["args_buf"] or "{}"
                     try:
                         tool_result = await dispatch_tool_call(tool_name, args_json_str, self.deps)

                 # 3) when args done, execute Python tool, send function_call_output, then trigger a new response
                 if event.type == "response.function_call_arguments.done":
                     call_id = getattr(event, "call_id", None)
+                    tool_call_info = self._pending_calls.get(call_id)
+                    if not tool_call_info:
                         continue
+                    tool_name = tool_call_info["name"]
+                    args_json_str = tool_call_info["args_buf"] or "{}"
                     try:
                         tool_result = await dispatch_tool_call(tool_name, args_json_str, self.deps)

src/reachy_mini_conversation_demo/tools.py CHANGED Viewed

@@ -36,14 +36,14 @@ except ImportError as e:
     EMOTION_AVAILABLE = False
-def all_concrete_subclasses(base):
     """Recursively find all concrete (non-abstract) subclasses of a base class."""
     result = []
     for cls in base.__subclasses__():
         if not inspect.isabstract(cls):
             result.append(cls)
         # recurse into subclasses
-        result.extend(all_concrete_subclasses(cls))
     return result
@@ -157,7 +157,7 @@ class MoveHead(Tool):
             return {"status": f"looking {direction}"}
         except Exception as e:
-            logger.exception("move_head failed")
             return {"error": f"move_head failed: {type(e).__name__}: {e}"}
@@ -198,11 +198,15 @@ class Camera(Tool):
         # Use vision manager for processing if available
         if deps.vision_manager is not None:
-            result = await asyncio.to_thread(deps.vision_manager.processor.process_image, frame, image_query)
-            if isinstance(result, dict) and "error" in result:
-                return result
             return (
-                {"image_description": result} if isinstance(result, str) else {"error": "vision returned non-string"}
             )
         else:
             # Return base64 encoded image like main_works.py camera tool
@@ -341,12 +345,12 @@ def get_available_emotions_and_descriptions() -> str:
         return "Emotions not available"
     try:
-        names = RECORDED_MOVES.list_moves()
-        ret = "Available emotions:\n"
-        for name in names:
             description = RECORDED_MOVES.get(name).description
-            ret += f" - {name}: {description}\n"
-        return ret
     except Exception as e:
         return f"Error getting emotions: {e}"
@@ -448,15 +452,15 @@ class DoNothing(Tool):
 # Registry & specs (dynamic)
 # List of available tool classes
-ALL_TOOLS: Dict[str, Tool] = {cls.name: cls() for cls in all_concrete_subclasses(Tool)}
 ALL_TOOL_SPECS = [tool.spec() for tool in ALL_TOOLS.values()]
 # Dispatcher
 def _safe_load_obj(args_json: str) -> dict[str, Any]:
     try:
-        obj = json.loads(args_json or "{}")
-        return obj if isinstance(obj, dict) else {}
     except Exception:
         logger.warning("bad args_json=%r", args_json)
         return {}

     EMOTION_AVAILABLE = False
+def get_concrete_subclasses(base):
     """Recursively find all concrete (non-abstract) subclasses of a base class."""
     result = []
     for cls in base.__subclasses__():
         if not inspect.isabstract(cls):
             result.append(cls)
         # recurse into subclasses
+        result.extend(get_concrete_subclasses(cls))
     return result
             return {"status": f"looking {direction}"}
         except Exception as e:
+            logger.error("move_head failed")
             return {"error": f"move_head failed: {type(e).__name__}: {e}"}
         # Use vision manager for processing if available
         if deps.vision_manager is not None:
+            vision_result = await asyncio.to_thread(
+                deps.vision_manager.processor.process_image, frame, image_query
+            )
+            if isinstance(vision_result, dict) and "error" in vision_result:
+                return vision_result
             return (
+                {"image_description": vision_result}
+                if isinstance(vision_result, str)
+                else {"error": "vision returned non-string"}
             )
         else:
             # Return base64 encoded image like main_works.py camera tool
         return "Emotions not available"
     try:
+        emotion_names = RECORDED_MOVES.list_moves()
+        output = "Available emotions:\n"
+        for name in emotion_names:
             description = RECORDED_MOVES.get(name).description
+            output += f" - {name}: {description}\n"
+        return output
     except Exception as e:
         return f"Error getting emotions: {e}"
 # Registry & specs (dynamic)
 # List of available tool classes
+ALL_TOOLS: Dict[str, Tool] = {cls.name: cls() for cls in get_concrete_subclasses(Tool)}
 ALL_TOOL_SPECS = [tool.spec() for tool in ALL_TOOLS.values()]
 # Dispatcher
 def _safe_load_obj(args_json: str) -> dict[str, Any]:
     try:
+        parsed_args = json.loads(args_json or "{}")
+        return parsed_args if isinstance(parsed_args, dict) else {}
     except Exception:
         logger.warning("bad args_json=%r", args_json)
         return {}