Spaces:

micknikolic
/

speech-to-text

Runtime error

App Files Files Community

micknikolic commited on Nov 26, 2023

Commit

836722f

1 Parent(s): 23672a2

Create app.py

Browse files

Files changed (1) hide show

app.py +75 -0

app.py ADDED Viewed

	@@ -0,0 +1,75 @@

+import accelerate
+import gradio as gr
+import time
+import io
+import librosa
+import torch
+import soundfile as sf
+from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
+#Instantiating the model object.
+model = AutoModelForSpeechSeq2Seq.from_pretrained(pretrained_model_name_or_path= "openai/whisper-large-v3",
+                                                  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+                                                  use_safetensors=True).to("cuda")
+#Instantiating the processor object.
+processor = AutoProcessor.from_pretrained(pretrained_model_name_or_path="openai/whisper-large-v3")
+#Instantiating the transformer class' pipeline object.
+pipe = pipeline(task="automatic-speech-recognition",
+                model="openai/whisper-large-v3",
+                tokenizer=processor.tokenizer,
+                feature_extractor=processor.feature_extractor,
+                max_new_tokens=128,
+                chunk_length_s=30,
+                batch_size=16,
+                return_timestamps=True,
+                torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+                device="cuda")
+#Defining speech-to-text function.
+def convert(audio, state=""):
+    """
+    This function performs speech to text conversion and will be used in Gradio's Interface function.
+    Parameters:
+    - audio: audio data as a bytes-like object.
+    - state: a string representing the accumulated text from previous conversions.
+    """
+    time.sleep(3)
+    try:
+        result = pipe(audio)
+        transcribed_text = result['text']
+        state += transcribed_text + " "
+    except Exception as e:
+        return f"Error processing audio: Please start recording!", state
+    return state, state
+#Instantiating Gradio Interface.
+gr_interface = gr.Interface(
+    fn = convert,
+    title = "Automatic Speech-to-Text",
+    description = "### Record your speech and watch it get converted to text!",
+    inputs = [
+        gr.Audio(
+            label="Please Record Your Speech Here!",
+            sources="microphone",
+            type="filepath"),
+        "state"],
+    outputs = [
+        "textbox",
+        "state"
+    ],
+    theme="dark",
+    live=True
+)
+#Launching the app (share=True).
+gr_interface.launch()