Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import torch | |
| import soundfile as sf | |
| import tempfile | |
| from transformers import AutoModelForTextToSpeech, AutoTokenizer | |
| # Load Kokoro-82M Model | |
| MODEL_NAME = "hexgrad/Kokoro-82M" | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| model = AutoModelForTextToSpeech.from_pretrained(MODEL_NAME).to("cpu") # Change to "cuda" if running on GPU | |
| # Define available voices (Check if Kokoro-82M has predefined voices) | |
| voices = ['default'] # Modify if multiple voices exist | |
| def generate_speech(text, voice, speed, show_transcript): | |
| """Convert input text to speech using Kokoro-82M""" | |
| inputs = tokenizer(text, return_tensors="pt").to("cpu") | |
| with torch.no_grad(): | |
| speech = model.generate(**inputs) | |
| # Save the generated speech as a file | |
| temp_file = tempfile.mktemp(suffix=".wav") | |
| sf.write(temp_file, speech.cpu().numpy(), 22050) # Adjust sample rate if necessary | |
| # Return audio and optional transcript | |
| return temp_file, text if show_transcript else None | |
| # Gradio UI | |
| interface = gr.Interface( | |
| fn=generate_speech, | |
| inputs=[ | |
| gr.Textbox(label="Input Text", lines=5, placeholder="Type here..."), | |
| gr.Dropdown(choices=voices, label="Select Voice", value='default'), | |
| gr.Slider(minimum=0.5, maximum=2.0, value=1.0, step=0.1, label="Speech Speed"), | |
| gr.Checkbox(label="Show Transcript", value=True) | |
| ], | |
| outputs=[ | |
| gr.Audio(label="Generated Speech"), | |
| gr.Textbox(label="Transcript", visible=True) | |
| ], | |
| title="Educational Text-to-Speech", | |
| description="Enter text, choose a voice, and generate speech. Use the transcript option to follow along while listening.", | |
| allow_flagging="never" | |
| ) | |
| # Launch the app | |
| if __name__ == "__main__": | |
| interface.launch() | |