whisper_fileStream

Sleeping

Firefly777a commited on Mar 8, 2023

Commit

6a2808e

1 Parent(s): fea335b

Changed the way the ASR model is being called. (whisper)

Files changed (1) hide show

app.py CHANGED Viewed

@@ -37,29 +37,25 @@ Transcript: I need to buy a birthday
 Prediction: Present, Gift, Cake, Card
 Transcript: """
-# whisper model specification
-model = whisper.load_model("tiny")
 # openai.api_key = os.environ["Openai_APIkey"]
-def debug_inference(audio, prompt, model, temperature, state=""):
-    # load audio data
-    audio = whisper.load_audio(audio)
-    # ensure sample is in correct format for inference
-    audio = whisper.pad_or_trim(audio)
-    # generate a log-mel spetrogram of the audio data
-    mel = whisper.log_mel_spectrogram(audio)
-    _, probs = model.detect_language(mel)
-    # decode audio data
-    options = whisper.DecodingOptions(fp16 = False)
-    # transcribe speech to text
-    result = whisper.decode(model, mel, options)
-    print("result pre gp model from whisper: ", result, ".text ", result.text, "and the data type: ", type(result.text))
-    text = prompt + result.text + "\nPrediction: "
     response = openai.Completion.create(
                         model=model,
@@ -82,7 +78,7 @@ def debug_inference(audio, prompt, model, temperature, state=""):
         infers = list(map(lambda x: x.replace("\n", ""), temp))
         #infered = list(map(lambda x: x.split(','), infers))
-    return result.text, state, infers, text
 # get audio from microphone
 gr.Interface(

 Prediction: Present, Gift, Cake, Card
 Transcript: """
+# whisper model specification
+asr_model = whisper.load_model("tiny")
 # openai.api_key = os.environ["Openai_APIkey"]
+# Transcribe function
+def transcribe(audio_file):
+    print("Transcribing")
+    transcription = asr_model.transcribe(audio_file)["text"]
+    return transcription
+def debug_inference(audio, prompt, model, temperature, state=""):
+    # Transcribe with Whisper
+    print("The audio is:", audio)
+    transcript = transcribe(audio)
+    text = prompt + transcript + "\nPrediction: "
     response = openai.Completion.create(
                         model=model,
         infers = list(map(lambda x: x.replace("\n", ""), temp))
         #infered = list(map(lambda x: x.split(','), infers))
+    return transcript, state, infers, text
 # get audio from microphone
 gr.Interface(