File size: 3,672 Bytes
2cc8a36
 
0a574ec
2cc8a36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0a574ec
2cc8a36
0a574ec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2cc8a36
 
 
 
0a574ec
2cc8a36
0a574ec
2cc8a36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0a574ec
2cc8a36
 
0a574ec
2cc8a36
0a574ec
2cc8a36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0a574ec
2cc8a36
 
 
0a574ec
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97

'''
This script calls the model from openai api to predict the next few words.
'''
import os
os.system("pip install --upgrade pip")
from pprint import pprint
os.system("pip install git+https://github.com/openai/whisper.git")
import sys
print("Sys: ", sys.executable)
os.system("pip install openai")
import openai
import gradio as gr
import whisper
from transformers import pipeline
import torch
from transformers import AutoModelForCausalLM
from transformers import AutoTokenizer
import time

# PROMPT = """This is a tool for helping someone with memory issues remember the next word. 

# The predictions follow a few rules:
# 1) The predictions are suggestions of ways to continue the transcript as if someone forgot what the next word was.
# 2) The predictions do not repeat themselves.
# 3) The predictions focus on suggesting nouns, adjectives, and verbs.
# 4) The predictions are related to the context in the transcript.
    
# EXAMPLES:
# Transcript: Tomorrow night we're going out to 
# Prediction: The Movies, A Restaurant, A Baseball Game, The Theater, A Party for a friend   
# Transcript: I would like to order a cheeseburger with a side of
# Prediction: Frnech fries, Milkshake, Apple slices, Side salad, Extra katsup 
# Transcript: My friend Savanah is
# Prediction: An elecrical engineer, A marine biologist, A classical musician 
# Transcript: I need to buy a birthday
# Prediction: Present, Gift, Cake, Card
# Transcript: """

# whisper model specification 
model = whisper.load_model("tiny")

openai.api_key = os.environ["Openai_APIkey"]
        
def debug_inference(audio, prompt, model, temperature, state=""):
    # load audio data
    audio = whisper.load_audio(audio)
    # ensure sample is in correct format for inference
    audio = whisper.pad_or_trim(audio)

    # generate a log-mel spetrogram of the audio data
    mel = whisper.log_mel_spectrogram(audio).to(model.device)
    
    _, probs = model.detect_language(mel)

    # decode audio data
    options = whisper.DecodingOptions(fp16 = False)
    # transcribe speech to text
    result = whisper.decode(model, mel, options)
    print("result pre gp model from whisper: ", result, ".text ", result.text, "and the data type: ", type(result.text))

    text = prompt + result.text + "\nPrediction: "
    
    response = openai.Completion.create(
                        model=model,
                        prompt=text,
                        temperature=temperature,
                        max_tokens=8,
                        n=5)

    infers = []
    temp = []
    infered=[]
    for i in range(5):
        print("print1 ", response['choices'][i]['text'])
        temp.append(response['choices'][i]['text'])
        print("print2: infers ", infers)
        print("print3: Responses ", response)
        print("Object type of response: ", type(response))
        #infered = list(map(lambda x: x.split(',')[0], infers))
        #print("Infered type is: ", type(infered))
        infers = list(map(lambda x: x.replace("\n", ""), temp))
        #infered = list(map(lambda x: x.split(','), infers))

    return result.text, state, infers, text

# get audio from microphone 
gr.Interface(
    fn=debug_inference, 
    inputs=[gr.inputs.Audio(source="microphone", type="filepath"),
            gr.inputs.Textbox(lines=15, placeholder="Enter a prompt here"),
            gr.inputs.Dropdown(["text-ada-001", "text-davinci-002", "text-davinci-003", "gpt-3.5-turbo"], label="Model"),
            gr.inputs.Slider(minimum=0.0, maximum=1.0, default=0.8, step=0.1, label="Temperature"),
            "state"
            ],
    outputs=["textbox","state","textbox", "textbox"],
    live=True).launch()