Spaces:

pollen-robotics
/

reachy_mini_conversation_app

Running

App Files Files Community

apirrone commited on 25 days ago

Commit

e2ba649

1 Parent(s): 8b59975

textbox to enter openai api key

Browse files

Files changed (2) hide show

src/reachy_mini_conversation_app/main.py +15 -6
src/reachy_mini_conversation_app/openai_realtime.py +18 -14

src/reachy_mini_conversation_app/main.py CHANGED Viewed

@@ -2,13 +2,14 @@
 import os
 import sys
-import time
 import threading
 from typing import Any, Dict, List
 import gradio as gr
 from fastapi import FastAPI
 from fastrtc import Stream
 from reachy_mini import ReachyMini, ReachyMiniApp
 from reachy_mini_conversation_app.moves import MovementManager
@@ -18,6 +19,7 @@ from reachy_mini_conversation_app.utils import (
     setup_logger,
     handle_vision_stuff,
 )
 from reachy_mini_conversation_app.console import LocalStream
 from reachy_mini_conversation_app.openai_realtime import OpenaiRealtimeHandler
 from reachy_mini_conversation_app.audio.head_wobbler import HeadWobbler
@@ -29,7 +31,8 @@ def update_chatbot(chatbot: List[Dict[str, Any]], response: Dict[str, Any]) -> L
     return chatbot
-def main(robot=None):
     """Entrypoint for the Reachy Mini conversation demo."""
     args = parse_args()
     args.gradio = True  # TODO Antoine - force gradio for testing appifying
@@ -84,11 +87,16 @@ def main(robot=None):
     stream_manager: gr.Blocks | LocalStream | None = None
     if args.gradio:
         stream = Stream(
             handler=handler,
             mode="send-receive",
             modality="audio",
-            additional_inputs=[chatbot],
             additional_outputs=[chatbot],
             additional_outputs_handler=update_chatbot,
             ui_args={"title": "Talk with Reachy Mini"},
@@ -133,9 +141,10 @@ class ReachyMiniConversationApp(ReachyMiniApp):
     def run(self, reachy_mini: ReachyMini, stop_event: threading.Event):
         """Run the Reachy Mini conversation demo app."""
-        while not stop_event.is_set():
-            main(robot=reachy_mini)
-            time.sleep(1)
 if __name__ == "__main__":

 import os
 import sys
+import asyncio
 import threading
 from typing import Any, Dict, List
 import gradio as gr
 from fastapi import FastAPI
 from fastrtc import Stream
+from gradio.utils import get_space
 from reachy_mini import ReachyMini, ReachyMiniApp
 from reachy_mini_conversation_app.moves import MovementManager
     setup_logger,
     handle_vision_stuff,
 )
+from reachy_mini_conversation_app.config import config
 from reachy_mini_conversation_app.console import LocalStream
 from reachy_mini_conversation_app.openai_realtime import OpenaiRealtimeHandler
 from reachy_mini_conversation_app.audio.head_wobbler import HeadWobbler
     return chatbot
+# TODO handle stop event properly
+def main(robot=None, stop_event=None):
     """Entrypoint for the Reachy Mini conversation demo."""
     args = parse_args()
     args.gradio = True  # TODO Antoine - force gradio for testing appifying
     stream_manager: gr.Blocks | LocalStream | None = None
     if args.gradio:
+        api_key_textbox = gr.Textbox(
+            label="API Key",
+            type="password",
+            value=os.getenv("OPENAI_API_KEY") if not get_space() else "",
+        )
         stream = Stream(
             handler=handler,
             mode="send-receive",
             modality="audio",
+            additional_inputs=[chatbot, api_key_textbox],
             additional_outputs=[chatbot],
             additional_outputs_handler=update_chatbot,
             ui_args={"title": "Talk with Reachy Mini"},
     def run(self, reachy_mini: ReachyMini, stop_event: threading.Event):
         """Run the Reachy Mini conversation demo app."""
+        loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(loop)
+        main(robot=reachy_mini, stop_event=stop_event)
 if __name__ == "__main__":

src/reachy_mini_conversation_app/openai_realtime.py CHANGED Viewed

@@ -71,7 +71,15 @@ class OpenaiRealtimeHandler(AsyncStreamHandler):
     async def start_up(self) -> None:
         """Start the handler with minimal retries on unexpected websocket closure."""
-        self.client = AsyncOpenAI(api_key=config.OPENAI_API_KEY)
         max_attempts = 3
         for attempt in range(1, max_attempts + 1):
@@ -81,10 +89,7 @@ class OpenaiRealtimeHandler(AsyncStreamHandler):
                 return
             except ConnectionClosedError as e:
                 # Abrupt close (e.g., "no close frame received or sent") → retry
-                logger.warning(
-                    "Realtime websocket closed unexpectedly (attempt %d/%d): %s",
-                    attempt, max_attempts, e
-                )
                 if attempt < max_attempts:
                     # exponential backoff with jitter
                     base_delay = 2 ** (attempt - 1)  # 1s, 2s, 4s, 8s, etc.
@@ -112,10 +117,7 @@ class OpenaiRealtimeHandler(AsyncStreamHandler):
                                     "type": "audio/pcm",
                                     "rate": self.target_input_rate,
                                 },
-                                "transcription": {
-                                    "model": "whisper-1",
-                                    "language": "en"
-                                },
                                 "turn_detection": {
                                     "type": "server_vad",
                                     "interrupt_response": True,
@@ -156,10 +158,10 @@ class OpenaiRealtimeHandler(AsyncStreamHandler):
                     logger.debug("User speech stopped - server will auto-commit with VAD")
                 if event.type in (
-                    "response.audio.done",            # GA
-                    "response.output_audio.done",     # GA alias
-                    "response.audio.completed",       # legacy (for safety)
-                    "response.completed",             # text-only completion
                 ):
                     logger.debug("response completed")
@@ -296,7 +298,9 @@ class OpenaiRealtimeHandler(AsyncStreamHandler):
                     # Only show user-facing errors, not internal state errors
                     if code not in ("input_audio_buffer_commit_empty", "conversation_already_has_active_response"):
-                        await self.output_queue.put(AdditionalOutputs({"role": "assistant", "content": f"[error] {msg}"}))
     # Microphone receive
     async def receive(self, frame: Tuple[int, NDArray[np.int16]]) -> None:

     async def start_up(self) -> None:
         """Start the handler with minimal retries on unexpected websocket closure."""
+        await self.wait_for_args()
+        args = list(self.latest_args)
+        textbox_api_key = args[3] if len(args[3]) > 0 else None
+        if textbox_api_key is not None:
+            openai_api_key = textbox_api_key
+        else:
+            openai_api_key = config.OPENAI_API_KEY
+        self.client = AsyncOpenAI(api_key=openai_api_key)
         max_attempts = 3
         for attempt in range(1, max_attempts + 1):
                 return
             except ConnectionClosedError as e:
                 # Abrupt close (e.g., "no close frame received or sent") → retry
+                logger.warning("Realtime websocket closed unexpectedly (attempt %d/%d): %s", attempt, max_attempts, e)
                 if attempt < max_attempts:
                     # exponential backoff with jitter
                     base_delay = 2 ** (attempt - 1)  # 1s, 2s, 4s, 8s, etc.
                                     "type": "audio/pcm",
                                     "rate": self.target_input_rate,
                                 },
+                                "transcription": {"model": "whisper-1", "language": "en"},
                                 "turn_detection": {
                                     "type": "server_vad",
                                     "interrupt_response": True,
                     logger.debug("User speech stopped - server will auto-commit with VAD")
                 if event.type in (
+                    "response.audio.done",  # GA
+                    "response.output_audio.done",  # GA alias
+                    "response.audio.completed",  # legacy (for safety)
+                    "response.completed",  # text-only completion
                 ):
                     logger.debug("response completed")
                     # Only show user-facing errors, not internal state errors
                     if code not in ("input_audio_buffer_commit_empty", "conversation_already_has_active_response"):
+                        await self.output_queue.put(
+                            AdditionalOutputs({"role": "assistant", "content": f"[error] {msg}"})
+                        )
     # Microphone receive
     async def receive(self, frame: Tuple[int, NDArray[np.int16]]) -> None: