Spaces:

fastrtc
/

llm-voice-chat-gradio

Runtime error

App Files Files Community

freddyaboulton HF Staff commited on Feb 20

Commit

a64ff02

verified ·

1 Parent(s): 473cd39

Upload folder using huggingface_hub

Browse files

Files changed (3) hide show

README_gradio.md +15 -0
app.py +113 -0
requirements.txt +6 -0

README_gradio.md ADDED Viewed

	@@ -0,0 +1,15 @@

+---
+title: LLM Voice Chat (Gradio)
+emoji: 💻
+colorFrom: purple
+colorTo: red
+sdk: gradio
+sdk_version: 5.16.0
+app_file: app.py
+pinned: false
+license: mit
+short_description: LLM Voice by ElevenLabs (Gradio)
+tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|GROQ_API_KEY, secret|ELEVENLABS_API_KEY]
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,113 @@

+from fastrtc import (
+    ReplyOnPause,
+    AdditionalOutputs,
+    Stream,
+    aggregate_bytes_to_16bit,
+    get_twilio_turn_credentials,
+    WebRTCError,
+    stt,
+    audio_to_bytes,
+)
+import numpy as np
+import gradio as gr
+from gradio.utils import get_space
+from groq import Groq
+from elevenlabs import ElevenLabs
+from dotenv import load_dotenv
+import time
+import os
+from fastapi import FastAPI
+load_dotenv()
+groq_client = Groq()
+tts_client = ElevenLabs(api_key=os.getenv("ELEVENLABS_API_KEY"))
+# See "Talk to Claude" in Cookbook for an example of how to keep
+# track of the chat history.
+def response(
+    audio: tuple[int, np.ndarray],
+    chatbot: list[dict] | None = None,
+):
+    try:
+        chatbot = chatbot or []
+        messages = [{"role": d["role"], "content": d["content"]} for d in chatbot]
+        start = time.time()
+        # text = stt(audio)
+        text = groq_client.audio.transcriptions.create(
+            file=("audio-file.mp3", audio_to_bytes(audio)),
+            model="whisper-large-v3-turbo",
+            response_format="verbose_json",
+        ).text
+        print("transcription", time.time() - start)
+        print("prompt", text)
+        chatbot.append({"role": "user", "content": text})
+        yield AdditionalOutputs(chatbot)
+        messages.append({"role": "user", "content": text})
+        response_text = (
+            groq_client.chat.completions.create(
+                model="llama-3.1-8b-instant",
+                max_tokens=512,
+                messages=messages,  # type: ignore
+            )
+            .choices[0]
+            .message.content
+        )
+        chatbot.append({"role": "assistant", "content": response_text})
+        iterator = tts_client.text_to_speech.convert_as_stream(
+            text=response_text,  # type: ignore
+            voice_id="JBFqnCBsd6RMkjVDRZzb",
+            model_id="eleven_multilingual_v2",
+            output_format="pcm_24000",
+        )
+        for chunk in aggregate_bytes_to_16bit(iterator):
+            audio_array = np.frombuffer(chunk, dtype=np.int16).reshape(1, -1)
+            yield (24000, audio_array)
+        yield AdditionalOutputs(chatbot)
+    except Exception as e:
+        import traceback
+        traceback.print_exc()
+        raise WebRTCError(traceback.format_exc())
+chatbot = gr.Chatbot(type="messages")
+stream = Stream(
+    modality="audio",
+    mode="send-receive",
+    handler=ReplyOnPause(response, input_sample_rate=16000),
+    additional_outputs_handler=lambda a, b: b,
+    additional_inputs=[chatbot],
+    additional_outputs=[chatbot],
+    rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
+    concurrency_limit=20 if get_space() else None,
+)
+for id, block in stream.ui.blocks.items():
+    if isinstance(block, gr.HTML):
+        stream.ui.blocks[id] = gr.HTML(
+            """
+                <h1 style='text-align: center'>
+                LLM Voice Chat (Powered by Groq, ElevenLabs, and WebRTC ⚡️)
+                </h1>
+                """
+        )
+# Mount the STREAM UI to the FastAPI app
+# Because I don't want to build the UI manually
+app = FastAPI()
+gr.mount_gradio_app(app, stream.ui, path="/")
+if __name__ == "__main__":
+    import os
+    if (mode := os.getenv("MODE")) == "UI":
+        stream.ui.launch(server_port=7860)
+    elif mode == "PHONE":
+        stream.fastphone(host="0.0.0.0", port=7860)
+    else:
+        import uvicorn
+        uvicorn.run(app, host="0.0.0.0", port=7860)

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+fastrtc[stopword]
+python-dotenv
+openai
+twilio
+groq
+elevenlabs