Spaces:
Running
Running
Commit
·
943c80d
1
Parent(s):
94b130f
added examples
Browse files
app.py
CHANGED
@@ -3,12 +3,12 @@
|
|
3 |
This script calls the model from openai api to predict the next few words.
|
4 |
'''
|
5 |
import os
|
6 |
-
os.system("pip install --upgrade pip")
|
7 |
from pprint import pprint
|
8 |
-
os.system("pip install git+https://github.com/openai/whisper.git")
|
9 |
import sys
|
10 |
-
print("Sys: ", sys.executable)
|
11 |
-
os.system("pip install openai")
|
12 |
import openai
|
13 |
import gradio as gr
|
14 |
import whisper
|
@@ -18,38 +18,39 @@ from transformers import AutoModelForCausalLM
|
|
18 |
from transformers import AutoTokenizer
|
19 |
import time
|
20 |
|
21 |
-
|
22 |
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
|
40 |
# whisper model specification
|
41 |
model = whisper.load_model("tiny")
|
42 |
|
43 |
-
openai.api_key = os.environ["Openai_APIkey"]
|
44 |
-
|
45 |
def debug_inference(audio, prompt, model, temperature, state=""):
|
|
|
46 |
# load audio data
|
47 |
audio = whisper.load_audio(audio)
|
48 |
# ensure sample is in correct format for inference
|
49 |
audio = whisper.pad_or_trim(audio)
|
50 |
|
51 |
# generate a log-mel spetrogram of the audio data
|
52 |
-
mel = whisper.log_mel_spectrogram(audio)
|
53 |
|
54 |
_, probs = model.detect_language(mel)
|
55 |
|
@@ -94,4 +95,5 @@ gr.Interface(
|
|
94 |
"state"
|
95 |
],
|
96 |
outputs=["textbox","state","textbox", "textbox"],
|
97 |
-
|
|
|
|
3 |
This script calls the model from openai api to predict the next few words.
|
4 |
'''
|
5 |
import os
|
6 |
+
# os.system("pip install --upgrade pip")
|
7 |
from pprint import pprint
|
8 |
+
# os.system("pip install git+https://github.com/openai/whisper.git")
|
9 |
import sys
|
10 |
+
# print("Sys: ", sys.executable)
|
11 |
+
# os.system("pip install openai")
|
12 |
import openai
|
13 |
import gradio as gr
|
14 |
import whisper
|
|
|
18 |
from transformers import AutoTokenizer
|
19 |
import time
|
20 |
|
21 |
+
EXAMPLE_PROMPT = """This is a tool for helping someone with memory issues remember the next word.
|
22 |
|
23 |
+
The predictions follow a few rules:
|
24 |
+
1) The predictions are suggestions of ways to continue the transcript as if someone forgot what the next word was.
|
25 |
+
2) The predictions do not repeat themselves.
|
26 |
+
3) The predictions focus on suggesting nouns, adjectives, and verbs.
|
27 |
+
4) The predictions are related to the context in the transcript.
|
28 |
|
29 |
+
EXAMPLES:
|
30 |
+
Transcript: Tomorrow night we're going out to
|
31 |
+
Prediction: The Movies, A Restaurant, A Baseball Game, The Theater, A Party for a friend
|
32 |
+
Transcript: I would like to order a cheeseburger with a side of
|
33 |
+
Prediction: Frnech fries, Milkshake, Apple slices, Side salad, Extra katsup
|
34 |
+
Transcript: My friend Savanah is
|
35 |
+
Prediction: An elecrical engineer, A marine biologist, A classical musician
|
36 |
+
Transcript: I need to buy a birthday
|
37 |
+
Prediction: Present, Gift, Cake, Card
|
38 |
+
Transcript: """
|
39 |
|
40 |
# whisper model specification
|
41 |
model = whisper.load_model("tiny")
|
42 |
|
43 |
+
# openai.api_key = os.environ["Openai_APIkey"]
|
44 |
+
|
45 |
def debug_inference(audio, prompt, model, temperature, state=""):
|
46 |
+
breakpoint()
|
47 |
# load audio data
|
48 |
audio = whisper.load_audio(audio)
|
49 |
# ensure sample is in correct format for inference
|
50 |
audio = whisper.pad_or_trim(audio)
|
51 |
|
52 |
# generate a log-mel spetrogram of the audio data
|
53 |
+
mel = whisper.log_mel_spectrogram(audio)
|
54 |
|
55 |
_, probs = model.detect_language(mel)
|
56 |
|
|
|
95 |
"state"
|
96 |
],
|
97 |
outputs=["textbox","state","textbox", "textbox"],
|
98 |
+
examples=[["example_in-the-mood-to-eat.m4a", EXAMPLE_PROMPT, "text-ada-001", 0.8, ""],["","","",0.9,""]]
|
99 |
+
live=False).launch()
|