Firefly777a's picture
Major Changes changed approach to now do debugging
0a574ec
raw
history blame
3.67 kB
'''
This script calls the model from openai api to predict the next few words.
'''
import os
os.system("pip install --upgrade pip")
from pprint import pprint
os.system("pip install git+https://github.com/openai/whisper.git")
import sys
print("Sys: ", sys.executable)
os.system("pip install openai")
import openai
import gradio as gr
import whisper
from transformers import pipeline
import torch
from transformers import AutoModelForCausalLM
from transformers import AutoTokenizer
import time
# PROMPT = """This is a tool for helping someone with memory issues remember the next word.
# The predictions follow a few rules:
# 1) The predictions are suggestions of ways to continue the transcript as if someone forgot what the next word was.
# 2) The predictions do not repeat themselves.
# 3) The predictions focus on suggesting nouns, adjectives, and verbs.
# 4) The predictions are related to the context in the transcript.
# EXAMPLES:
# Transcript: Tomorrow night we're going out to
# Prediction: The Movies, A Restaurant, A Baseball Game, The Theater, A Party for a friend
# Transcript: I would like to order a cheeseburger with a side of
# Prediction: Frnech fries, Milkshake, Apple slices, Side salad, Extra katsup
# Transcript: My friend Savanah is
# Prediction: An elecrical engineer, A marine biologist, A classical musician
# Transcript: I need to buy a birthday
# Prediction: Present, Gift, Cake, Card
# Transcript: """
# whisper model specification
model = whisper.load_model("tiny")
openai.api_key = os.environ["Openai_APIkey"]
def debug_inference(audio, prompt, model, temperature, state=""):
# load audio data
audio = whisper.load_audio(audio)
# ensure sample is in correct format for inference
audio = whisper.pad_or_trim(audio)
# generate a log-mel spetrogram of the audio data
mel = whisper.log_mel_spectrogram(audio).to(model.device)
_, probs = model.detect_language(mel)
# decode audio data
options = whisper.DecodingOptions(fp16 = False)
# transcribe speech to text
result = whisper.decode(model, mel, options)
print("result pre gp model from whisper: ", result, ".text ", result.text, "and the data type: ", type(result.text))
text = prompt + result.text + "\nPrediction: "
response = openai.Completion.create(
model=model,
prompt=text,
temperature=temperature,
max_tokens=8,
n=5)
infers = []
temp = []
infered=[]
for i in range(5):
print("print1 ", response['choices'][i]['text'])
temp.append(response['choices'][i]['text'])
print("print2: infers ", infers)
print("print3: Responses ", response)
print("Object type of response: ", type(response))
#infered = list(map(lambda x: x.split(',')[0], infers))
#print("Infered type is: ", type(infered))
infers = list(map(lambda x: x.replace("\n", ""), temp))
#infered = list(map(lambda x: x.split(','), infers))
return result.text, state, infers, text
# get audio from microphone
gr.Interface(
fn=debug_inference,
inputs=[gr.inputs.Audio(source="microphone", type="filepath"),
gr.inputs.Textbox(lines=15, placeholder="Enter a prompt here"),
gr.inputs.Dropdown(["text-ada-001", "text-davinci-002", "text-davinci-003", "gpt-3.5-turbo"], label="Model"),
gr.inputs.Slider(minimum=0.0, maximum=1.0, default=0.8, step=0.1, label="Temperature"),
"state"
],
outputs=["textbox","state","textbox", "textbox"],
live=True).launch()