whisper_fileStream

Running

App Files Files Community

Firefly777a commited on Mar 8, 2023

Commit

943c80d

1 Parent(s): 94b130f

added examples

Browse files

Files changed (1) hide show

app.py +26 -24

app.py CHANGED Viewed

@@ -3,12 +3,12 @@
 This script calls the model from openai api to predict the next few words.
 '''
 import os
-os.system("pip install --upgrade pip")
 from pprint import pprint
-os.system("pip install git+https://github.com/openai/whisper.git")
 import sys
-print("Sys: ", sys.executable)
-os.system("pip install openai")
 import openai
 import gradio as gr
 import whisper
@@ -18,38 +18,39 @@ from transformers import AutoModelForCausalLM
 from transformers import AutoTokenizer
 import time
-# PROMPT = """This is a tool for helping someone with memory issues remember the next word.
-# The predictions follow a few rules:
-# 1) The predictions are suggestions of ways to continue the transcript as if someone forgot what the next word was.
-# 2) The predictions do not repeat themselves.
-# 3) The predictions focus on suggesting nouns, adjectives, and verbs.
-# 4) The predictions are related to the context in the transcript.
-# EXAMPLES:
-# Transcript: Tomorrow night we're going out to
-# Prediction: The Movies, A Restaurant, A Baseball Game, The Theater, A Party for a friend
-# Transcript: I would like to order a cheeseburger with a side of
-# Prediction: Frnech fries, Milkshake, Apple slices, Side salad, Extra katsup
-# Transcript: My friend Savanah is
-# Prediction: An elecrical engineer, A marine biologist, A classical musician
-# Transcript: I need to buy a birthday
-# Prediction: Present, Gift, Cake, Card
-# Transcript: """
 # whisper model specification
 model = whisper.load_model("tiny")
-openai.api_key = os.environ["Openai_APIkey"]
 def debug_inference(audio, prompt, model, temperature, state=""):
     # load audio data
     audio = whisper.load_audio(audio)
     # ensure sample is in correct format for inference
     audio = whisper.pad_or_trim(audio)
     # generate a log-mel spetrogram of the audio data
-    mel = whisper.log_mel_spectrogram(audio).to(model.device)
     _, probs = model.detect_language(mel)
@@ -94,4 +95,5 @@ gr.Interface(
             "state"
             ],
     outputs=["textbox","state","textbox", "textbox"],
-    live=True).launch()

 This script calls the model from openai api to predict the next few words.
 '''
 import os
+# os.system("pip install --upgrade pip")
 from pprint import pprint
+# os.system("pip install git+https://github.com/openai/whisper.git")
 import sys
+# print("Sys: ", sys.executable)
+# os.system("pip install openai")
 import openai
 import gradio as gr
 import whisper
 from transformers import AutoTokenizer
 import time
+EXAMPLE_PROMPT = """This is a tool for helping someone with memory issues remember the next word.
+The predictions follow a few rules:
+1) The predictions are suggestions of ways to continue the transcript as if someone forgot what the next word was.
+2) The predictions do not repeat themselves.
+3) The predictions focus on suggesting nouns, adjectives, and verbs.
+4) The predictions are related to the context in the transcript.
+EXAMPLES:
+Transcript: Tomorrow night we're going out to
+Prediction: The Movies, A Restaurant, A Baseball Game, The Theater, A Party for a friend
+Transcript: I would like to order a cheeseburger with a side of
+Prediction: Frnech fries, Milkshake, Apple slices, Side salad, Extra katsup
+Transcript: My friend Savanah is
+Prediction: An elecrical engineer, A marine biologist, A classical musician
+Transcript: I need to buy a birthday
+Prediction: Present, Gift, Cake, Card
+Transcript: """
 # whisper model specification
 model = whisper.load_model("tiny")
+# openai.api_key = os.environ["Openai_APIkey"]
 def debug_inference(audio, prompt, model, temperature, state=""):
+    breakpoint()
     # load audio data
     audio = whisper.load_audio(audio)
     # ensure sample is in correct format for inference
     audio = whisper.pad_or_trim(audio)
     # generate a log-mel spetrogram of the audio data
+    mel = whisper.log_mel_spectrogram(audio)
     _, probs = model.detect_language(mel)
             "state"
             ],
     outputs=["textbox","state","textbox", "textbox"],
+    examples=[["example_in-the-mood-to-eat.m4a", EXAMPLE_PROMPT, "text-ada-001", 0.8, ""],["","","",0.9,""]]
+    live=False).launch()