whisper_fileStream

Running

App Files Files Community

Firefly777a commited on Mar 8, 2023

Commit

8bdb719

1 Parent(s): 2cc8a36

Large modification to the prompt, added rules.

Browse files

Files changed (1) hide show

app.py +22 -28

app.py CHANGED Viewed

@@ -38,10 +38,6 @@ model = whisper.load_model("tiny")
 def inference(audio, state=""):
-    #time.sleep(2)
-    #text = p(audio)["text"]
-    #state += text + " "
     # load audio data
     audio = whisper.load_audio(audio)
     # ensure sample is in correct format for inference
@@ -58,20 +54,26 @@ def inference(audio, state=""):
     result = whisper.decode(model, mel, options)
     print("result pre gp model from whisper: ", result, ".text ", result.text, "and the data type: ", type(result.text))
-    PROMPT = """The following is an incomplete transcript of a brief conversation.
-    Predict the next few words int he transcript to complete the sentence.
-    A few examples of transcripts and predictions are provided below:
-    Transcript: Tomorrow night we're going out to
-    Prediction: The Movies, A Restaurant, A Baseball Game, The Theater, A Party for a friend
-    Transcript: I would like to order a cheeseburger with a side of
-    Prediction: Frnech fries, Milkshake, Apple slices, Side salad, Extra katsup
-    Transcript: My friend Savanah is
-    Prediction: An elecrical engineer, A marine biologist, A classical musician
-    Transcript: I need to buy a birthday
-    Prediction: Present, Gift, Cake, Card
-    Given these examples, predict the next few words in the following sentence:
     """
-    text = PROMPT + result.text
     openai.api_key = os.environ["Openai_APIkey"]
@@ -107,17 +109,9 @@ def inference(audio, state=""):
 # get audio from microphone
 gr.Interface(
-        fn=inference,
-    inputs=[
-        gr.inputs.Audio(source="microphone", type="filepath"),
-        "state"
-    ],
-    outputs=[
-        "textbox",
-        "state",
-        "textbox"
-    ],
     live=True).launch()

 def inference(audio, state=""):
     # load audio data
     audio = whisper.load_audio(audio)
     # ensure sample is in correct format for inference
     result = whisper.decode(model, mel, options)
     print("result pre gp model from whisper: ", result, ".text ", result.text, "and the data type: ", type(result.text))
+    PROMPT = """This is a tool for helping someone with memory issues remember the next word.
+The predictions follow a few rules:
+1) The predictions are suggestions of ways to continue the transcript as if someone forgot what the next word was.
+2) The predictions do not repeat themselves.
+3) The predictions focus on suggesting nouns, adjectives, and verbs.
+4) The predictions are related to the context in the transcript.
+EXAMPLES:
+Transcript: Tomorrow night we're going out to
+Prediction: The Movies, A Restaurant, A Baseball Game, The Theater, A Party for a friend
+Transcript: I would like to order a cheeseburger with a side of
+Prediction: Frnech fries, Milkshake, Apple slices, Side salad, Extra katsup
+Transcript: My friend Savanah is
+Prediction: An elecrical engineer, A marine biologist, A classical musician
+Transcript: I need to buy a birthday
+Prediction: Present, Gift, Cake, Card
+Transcript:
     """
+    text = PROMPT + result.text + "\nPrediction: "
     openai.api_key = os.environ["Openai_APIkey"]
 # get audio from microphone
 gr.Interface(
+    fn=inference,
+    inputs=[gr.inputs.Audio(source="microphone", type="filepath"), "state"],
+    outputs=["textbox","state","textbox"],
     live=True).launch()