Spaces:
Runtime error
Runtime error
| import torch | |
| from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor | |
| import soundfile as sf | |
| import gradio as gr | |
| # Load the pre-trained processor and model | |
| processor = Wav2Vec2Processor.from_pretrained("jonatasgrosman/wav2vec2-large-xlsr-53-chinese-zh-cn") | |
| model = Wav2Vec2ForCTC.from_pretrained("jonatasgrosman/wav2vec2-large-xlsr-53-chinese-zh-cn") | |
| def speech_to_text(audio): | |
| # Load audio file | |
| speech, sample_rate = sf.read(audio) | |
| # Preprocess the audio file | |
| inputs = processor(speech, sampling_rate=sample_rate, return_tensors="pt", padding=True) | |
| # Perform inference | |
| with torch.no_grad(): | |
| logits = model(**inputs).logits | |
| # Decode the predicted ids to text | |
| predicted_ids = torch.argmax(logits, dim=-1) | |
| transcription = processor.batch_decode(predicted_ids) | |
| return transcription[0] | |
| # Create the Gradio interface | |
| iface = gr.Interface( | |
| fn=speech_to_text, | |
| inputs=gr.Audio(type="filepath"), | |
| outputs=gr.Textbox(), | |
| title="Chinese Speech Recognition", | |
| description="Upload an audio file and get the transcribed text using the wav2vec2-large-xlsr-53-chinese-zh-cn model." | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch() | |