apirrone
commited on
Commit
·
e2ba649
1
Parent(s):
8b59975
textbox to enter openai api key
Browse files
src/reachy_mini_conversation_app/main.py
CHANGED
|
@@ -2,13 +2,14 @@
|
|
| 2 |
|
| 3 |
import os
|
| 4 |
import sys
|
| 5 |
-
import
|
| 6 |
import threading
|
| 7 |
from typing import Any, Dict, List
|
| 8 |
|
| 9 |
import gradio as gr
|
| 10 |
from fastapi import FastAPI
|
| 11 |
from fastrtc import Stream
|
|
|
|
| 12 |
|
| 13 |
from reachy_mini import ReachyMini, ReachyMiniApp
|
| 14 |
from reachy_mini_conversation_app.moves import MovementManager
|
|
@@ -18,6 +19,7 @@ from reachy_mini_conversation_app.utils import (
|
|
| 18 |
setup_logger,
|
| 19 |
handle_vision_stuff,
|
| 20 |
)
|
|
|
|
| 21 |
from reachy_mini_conversation_app.console import LocalStream
|
| 22 |
from reachy_mini_conversation_app.openai_realtime import OpenaiRealtimeHandler
|
| 23 |
from reachy_mini_conversation_app.audio.head_wobbler import HeadWobbler
|
|
@@ -29,7 +31,8 @@ def update_chatbot(chatbot: List[Dict[str, Any]], response: Dict[str, Any]) -> L
|
|
| 29 |
return chatbot
|
| 30 |
|
| 31 |
|
| 32 |
-
|
|
|
|
| 33 |
"""Entrypoint for the Reachy Mini conversation demo."""
|
| 34 |
args = parse_args()
|
| 35 |
args.gradio = True # TODO Antoine - force gradio for testing appifying
|
|
@@ -84,11 +87,16 @@ def main(robot=None):
|
|
| 84 |
stream_manager: gr.Blocks | LocalStream | None = None
|
| 85 |
|
| 86 |
if args.gradio:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
stream = Stream(
|
| 88 |
handler=handler,
|
| 89 |
mode="send-receive",
|
| 90 |
modality="audio",
|
| 91 |
-
additional_inputs=[chatbot],
|
| 92 |
additional_outputs=[chatbot],
|
| 93 |
additional_outputs_handler=update_chatbot,
|
| 94 |
ui_args={"title": "Talk with Reachy Mini"},
|
|
@@ -133,9 +141,10 @@ class ReachyMiniConversationApp(ReachyMiniApp):
|
|
| 133 |
|
| 134 |
def run(self, reachy_mini: ReachyMini, stop_event: threading.Event):
|
| 135 |
"""Run the Reachy Mini conversation demo app."""
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
|
|
|
| 139 |
|
| 140 |
|
| 141 |
if __name__ == "__main__":
|
|
|
|
| 2 |
|
| 3 |
import os
|
| 4 |
import sys
|
| 5 |
+
import asyncio
|
| 6 |
import threading
|
| 7 |
from typing import Any, Dict, List
|
| 8 |
|
| 9 |
import gradio as gr
|
| 10 |
from fastapi import FastAPI
|
| 11 |
from fastrtc import Stream
|
| 12 |
+
from gradio.utils import get_space
|
| 13 |
|
| 14 |
from reachy_mini import ReachyMini, ReachyMiniApp
|
| 15 |
from reachy_mini_conversation_app.moves import MovementManager
|
|
|
|
| 19 |
setup_logger,
|
| 20 |
handle_vision_stuff,
|
| 21 |
)
|
| 22 |
+
from reachy_mini_conversation_app.config import config
|
| 23 |
from reachy_mini_conversation_app.console import LocalStream
|
| 24 |
from reachy_mini_conversation_app.openai_realtime import OpenaiRealtimeHandler
|
| 25 |
from reachy_mini_conversation_app.audio.head_wobbler import HeadWobbler
|
|
|
|
| 31 |
return chatbot
|
| 32 |
|
| 33 |
|
| 34 |
+
# TODO handle stop event properly
|
| 35 |
+
def main(robot=None, stop_event=None):
|
| 36 |
"""Entrypoint for the Reachy Mini conversation demo."""
|
| 37 |
args = parse_args()
|
| 38 |
args.gradio = True # TODO Antoine - force gradio for testing appifying
|
|
|
|
| 87 |
stream_manager: gr.Blocks | LocalStream | None = None
|
| 88 |
|
| 89 |
if args.gradio:
|
| 90 |
+
api_key_textbox = gr.Textbox(
|
| 91 |
+
label="API Key",
|
| 92 |
+
type="password",
|
| 93 |
+
value=os.getenv("OPENAI_API_KEY") if not get_space() else "",
|
| 94 |
+
)
|
| 95 |
stream = Stream(
|
| 96 |
handler=handler,
|
| 97 |
mode="send-receive",
|
| 98 |
modality="audio",
|
| 99 |
+
additional_inputs=[chatbot, api_key_textbox],
|
| 100 |
additional_outputs=[chatbot],
|
| 101 |
additional_outputs_handler=update_chatbot,
|
| 102 |
ui_args={"title": "Talk with Reachy Mini"},
|
|
|
|
| 141 |
|
| 142 |
def run(self, reachy_mini: ReachyMini, stop_event: threading.Event):
|
| 143 |
"""Run the Reachy Mini conversation demo app."""
|
| 144 |
+
loop = asyncio.new_event_loop()
|
| 145 |
+
asyncio.set_event_loop(loop)
|
| 146 |
+
|
| 147 |
+
main(robot=reachy_mini, stop_event=stop_event)
|
| 148 |
|
| 149 |
|
| 150 |
if __name__ == "__main__":
|
src/reachy_mini_conversation_app/openai_realtime.py
CHANGED
|
@@ -71,7 +71,15 @@ class OpenaiRealtimeHandler(AsyncStreamHandler):
|
|
| 71 |
|
| 72 |
async def start_up(self) -> None:
|
| 73 |
"""Start the handler with minimal retries on unexpected websocket closure."""
|
| 74 |
-
self.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
|
| 76 |
max_attempts = 3
|
| 77 |
for attempt in range(1, max_attempts + 1):
|
|
@@ -81,10 +89,7 @@ class OpenaiRealtimeHandler(AsyncStreamHandler):
|
|
| 81 |
return
|
| 82 |
except ConnectionClosedError as e:
|
| 83 |
# Abrupt close (e.g., "no close frame received or sent") → retry
|
| 84 |
-
logger.warning(
|
| 85 |
-
"Realtime websocket closed unexpectedly (attempt %d/%d): %s",
|
| 86 |
-
attempt, max_attempts, e
|
| 87 |
-
)
|
| 88 |
if attempt < max_attempts:
|
| 89 |
# exponential backoff with jitter
|
| 90 |
base_delay = 2 ** (attempt - 1) # 1s, 2s, 4s, 8s, etc.
|
|
@@ -112,10 +117,7 @@ class OpenaiRealtimeHandler(AsyncStreamHandler):
|
|
| 112 |
"type": "audio/pcm",
|
| 113 |
"rate": self.target_input_rate,
|
| 114 |
},
|
| 115 |
-
"transcription": {
|
| 116 |
-
"model": "whisper-1",
|
| 117 |
-
"language": "en"
|
| 118 |
-
},
|
| 119 |
"turn_detection": {
|
| 120 |
"type": "server_vad",
|
| 121 |
"interrupt_response": True,
|
|
@@ -156,10 +158,10 @@ class OpenaiRealtimeHandler(AsyncStreamHandler):
|
|
| 156 |
logger.debug("User speech stopped - server will auto-commit with VAD")
|
| 157 |
|
| 158 |
if event.type in (
|
| 159 |
-
"response.audio.done",
|
| 160 |
-
"response.output_audio.done",
|
| 161 |
-
"response.audio.completed",
|
| 162 |
-
"response.completed",
|
| 163 |
):
|
| 164 |
logger.debug("response completed")
|
| 165 |
|
|
@@ -296,7 +298,9 @@ class OpenaiRealtimeHandler(AsyncStreamHandler):
|
|
| 296 |
|
| 297 |
# Only show user-facing errors, not internal state errors
|
| 298 |
if code not in ("input_audio_buffer_commit_empty", "conversation_already_has_active_response"):
|
| 299 |
-
await self.output_queue.put(
|
|
|
|
|
|
|
| 300 |
|
| 301 |
# Microphone receive
|
| 302 |
async def receive(self, frame: Tuple[int, NDArray[np.int16]]) -> None:
|
|
|
|
| 71 |
|
| 72 |
async def start_up(self) -> None:
|
| 73 |
"""Start the handler with minimal retries on unexpected websocket closure."""
|
| 74 |
+
await self.wait_for_args()
|
| 75 |
+
args = list(self.latest_args)
|
| 76 |
+
textbox_api_key = args[3] if len(args[3]) > 0 else None
|
| 77 |
+
if textbox_api_key is not None:
|
| 78 |
+
openai_api_key = textbox_api_key
|
| 79 |
+
else:
|
| 80 |
+
openai_api_key = config.OPENAI_API_KEY
|
| 81 |
+
|
| 82 |
+
self.client = AsyncOpenAI(api_key=openai_api_key)
|
| 83 |
|
| 84 |
max_attempts = 3
|
| 85 |
for attempt in range(1, max_attempts + 1):
|
|
|
|
| 89 |
return
|
| 90 |
except ConnectionClosedError as e:
|
| 91 |
# Abrupt close (e.g., "no close frame received or sent") → retry
|
| 92 |
+
logger.warning("Realtime websocket closed unexpectedly (attempt %d/%d): %s", attempt, max_attempts, e)
|
|
|
|
|
|
|
|
|
|
| 93 |
if attempt < max_attempts:
|
| 94 |
# exponential backoff with jitter
|
| 95 |
base_delay = 2 ** (attempt - 1) # 1s, 2s, 4s, 8s, etc.
|
|
|
|
| 117 |
"type": "audio/pcm",
|
| 118 |
"rate": self.target_input_rate,
|
| 119 |
},
|
| 120 |
+
"transcription": {"model": "whisper-1", "language": "en"},
|
|
|
|
|
|
|
|
|
|
| 121 |
"turn_detection": {
|
| 122 |
"type": "server_vad",
|
| 123 |
"interrupt_response": True,
|
|
|
|
| 158 |
logger.debug("User speech stopped - server will auto-commit with VAD")
|
| 159 |
|
| 160 |
if event.type in (
|
| 161 |
+
"response.audio.done", # GA
|
| 162 |
+
"response.output_audio.done", # GA alias
|
| 163 |
+
"response.audio.completed", # legacy (for safety)
|
| 164 |
+
"response.completed", # text-only completion
|
| 165 |
):
|
| 166 |
logger.debug("response completed")
|
| 167 |
|
|
|
|
| 298 |
|
| 299 |
# Only show user-facing errors, not internal state errors
|
| 300 |
if code not in ("input_audio_buffer_commit_empty", "conversation_already_has_active_response"):
|
| 301 |
+
await self.output_queue.put(
|
| 302 |
+
AdditionalOutputs({"role": "assistant", "content": f"[error] {msg}"})
|
| 303 |
+
)
|
| 304 |
|
| 305 |
# Microphone receive
|
| 306 |
async def receive(self, frame: Tuple[int, NDArray[np.int16]]) -> None:
|