Spaces:
Running
Running
''' | |
This script calls the ada model from openai api to predict the next few words. | |
''' | |
import os | |
os.system("pip install --upgrade pip") | |
from pprint import pprint | |
os.system("pip install git+https://github.com/openai/whisper.git") | |
import sys | |
print("Sys: ", sys.executable) | |
os.system("pip install openai") | |
import openai | |
import gradio as gr | |
import whisper | |
from transformers import pipeline | |
import torch | |
from transformers import AutoModelForCausalLM | |
from transformers import AutoTokenizer | |
import time | |
# import streaming.py | |
# from next_word_prediction import GPT2 | |
#gpt2 = AutoModelForCausalLM.from_pretrained("gpt2", return_dict_in_generate=True) | |
#tokenizer = AutoTokenizer.from_pretrained("gpt2") | |
### /code snippet | |
# get gpt2 model | |
#generator = pipeline('text-generation', model='gpt2') | |
# whisper model specification | |
model = whisper.load_model("tiny") | |
def inference(audio, state=""): | |
# load audio data | |
audio = whisper.load_audio(audio) | |
# ensure sample is in correct format for inference | |
audio = whisper.pad_or_trim(audio) | |
# generate a log-mel spetrogram of the audio data | |
mel = whisper.log_mel_spectrogram(audio).to(model.device) | |
_, probs = model.detect_language(mel) | |
# decode audio data | |
options = whisper.DecodingOptions(fp16 = False) | |
# transcribe speech to text | |
result = whisper.decode(model, mel, options) | |
print("result pre gp model from whisper: ", result, ".text ", result.text, "and the data type: ", type(result.text)) | |
PROMPT = """This is a tool for helping someone with memory issues remember the next word. | |
The predictions follow a few rules: | |
1) The predictions are suggestions of ways to continue the transcript as if someone forgot what the next word was. | |
2) The predictions do not repeat themselves. | |
3) The predictions focus on suggesting nouns, adjectives, and verbs. | |
4) The predictions are related to the context in the transcript. | |
EXAMPLES: | |
Transcript: Tomorrow night we're going out to | |
Prediction: The Movies, A Restaurant, A Baseball Game, The Theater, A Party for a friend | |
Transcript: I would like to order a cheeseburger with a side of | |
Prediction: Frnech fries, Milkshake, Apple slices, Side salad, Extra katsup | |
Transcript: My friend Savanah is | |
Prediction: An elecrical engineer, A marine biologist, A classical musician | |
Transcript: I need to buy a birthday | |
Prediction: Present, Gift, Cake, Card | |
Transcript: """ | |
text = PROMPT + result.text + "\nPrediction: " | |
openai.api_key = os.environ["Openai_APIkey"] | |
response = openai.Completion.create( | |
model="text-davinci-003", | |
prompt=text, | |
temperature=0.9, | |
max_tokens=8, | |
n=5) | |
infers = [] | |
temp = [] | |
infered=[] | |
for i in range(5): | |
print("print1 ", response['choices'][i]['text']) | |
temp.append(response['choices'][i]['text']) | |
print("print2: infers ", infers) | |
print("print3: Responses ", response) | |
print("Object type of response: ", type(response)) | |
#infered = list(map(lambda x: x.split(',')[0], infers)) | |
#print("Infered type is: ", type(infered)) | |
infers = list(map(lambda x: x.replace("\n", ""), temp)) | |
#infered = list(map(lambda x: x.split(','), infers)) | |
# result.text | |
#return getText, gr.update(visible=True), gr.update(visible=True), gr.update(visible=True) | |
return result.text, state, infers | |
# get audio from microphone | |
gr.Interface( | |
fn=inference, | |
inputs=[gr.inputs.Audio(source="microphone", type="filepath"), "state"], | |
outputs=["textbox","state","textbox"], | |
live=True).launch() | |