mskov's picture
Update app.py
f617c7f
raw
history blame
2.93 kB
'''
This script calls the ada model from openai api to predict the next few words.
'''
import os
os.system("pip install git+https://github.com/openai/openai-python.git")
import openai
import os
from pprint import pprint
os.system("pip install git+https://github.com/openai/whisper.git")
import gradio as gr
import whisper
from transformers import pipeline
import torch
from transformers import AutoModelForCausalLM
from transformers import AutoTokenizer
import time
# import streaming.py
# from next_word_prediction import GPT2
#gpt2 = AutoModelForCausalLM.from_pretrained("gpt2", return_dict_in_generate=True)
#tokenizer = AutoTokenizer.from_pretrained("gpt2")
### /code snippet
# get gpt2 model
generator = pipeline('text-generation', model='gpt2')
# whisper model specification
model = whisper.load_model("tiny")
def inference(audio, state=""):
#time.sleep(2)
#text = p(audio)["text"]
#state += text + " "
# load audio data
audio = whisper.load_audio(audio)
# ensure sample is in correct format for inference
audio = whisper.pad_or_trim(audio)
# generate a log-mel spetrogram of the audio data
mel = whisper.log_mel_spectrogram(audio).to(model.device)
_, probs = model.detect_language(mel)
# decode audio data
options = whisper.DecodingOptions(fp16 = False)
# transcribe speech to text
result = whisper.decode(model, mel, options)
PROMPT = """The following is a transcript of a conversation. Predict a few nouns, verbs, or adjectives that may be used next. Predict the next few words as a list of options.
A few examples are provided below and then the current transcript is provided.
Examples:
Transcript: Tomorrow night we're going out to
Next: The Movies, A Restaurant, A Baseball Game, The Theater, A Party
Transcript: I would like to order a cheeseburger with a side of
Next: Fries, Milkshake, Apples, Salad, Katsup
"""
text = PROMPT + result.text
openai.api_key = os.environ["Openai_APIkey"]
response = openai.Completion.create(
model="text-ada-001",
prompt=text,
temperature=1,
max_tokens=6,
n=5)
infers =[]
for i in range(5):
print("print1 ", response['choices'][i]['text'])
infers += response['choices'][i]['text']
print("print2: infers ", infers)
print("print3: Responses ", response)
# result.text
#return getText, gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
return result.text, state, response
# get audio from microphone
gr.Interface(
fn=inference,
inputs=[
gr.inputs.Audio(source="microphone", type="filepath"),
"state"
],
outputs=[
"textbox",
"state",
"textbox"
],
live=True).launch()