whisper_fileStream

Sleeping

App Files Files Community

whisper_fileStream / app.py

Firefly777a

Major Changes changed approach to now do debugging

0a574ec over 2 years ago

raw

history blame

3.67 kB


	'''
	This script calls the model from openai api to predict the next few words.
	'''
	import os
	os.system("pip install --upgrade pip")
	from pprint import pprint
	os.system("pip install git+https://github.com/openai/whisper.git")
	import sys
	print("Sys: ", sys.executable)
	os.system("pip install openai")
	import openai
	import gradio as gr
	import whisper
	from transformers import pipeline
	import torch
	from transformers import AutoModelForCausalLM
	from transformers import AutoTokenizer
	import time

	# PROMPT = """This is a tool for helping someone with memory issues remember the next word.

	# The predictions follow a few rules:
	# 1) The predictions are suggestions of ways to continue the transcript as if someone forgot what the next word was.
	# 2) The predictions do not repeat themselves.
	# 3) The predictions focus on suggesting nouns, adjectives, and verbs.
	# 4) The predictions are related to the context in the transcript.

	# EXAMPLES:
	# Transcript: Tomorrow night we're going out to
	# Prediction: The Movies, A Restaurant, A Baseball Game, The Theater, A Party for a friend
	# Transcript: I would like to order a cheeseburger with a side of
	# Prediction: Frnech fries, Milkshake, Apple slices, Side salad, Extra katsup
	# Transcript: My friend Savanah is
	# Prediction: An elecrical engineer, A marine biologist, A classical musician
	# Transcript: I need to buy a birthday
	# Prediction: Present, Gift, Cake, Card
	# Transcript: """

	# whisper model specification
	model = whisper.load_model("tiny")

	openai.api_key = os.environ["Openai_APIkey"]

	def debug_inference(audio, prompt, model, temperature, state=""):
	# load audio data
	audio = whisper.load_audio(audio)
	# ensure sample is in correct format for inference
	audio = whisper.pad_or_trim(audio)

	# generate a log-mel spetrogram of the audio data
	mel = whisper.log_mel_spectrogram(audio).to(model.device)

	_, probs = model.detect_language(mel)

	# decode audio data
	options = whisper.DecodingOptions(fp16 = False)
	# transcribe speech to text
	result = whisper.decode(model, mel, options)
	print("result pre gp model from whisper: ", result, ".text ", result.text, "and the data type: ", type(result.text))

	text = prompt + result.text + "\nPrediction: "

	response = openai.Completion.create(
	model=model,
	prompt=text,
	temperature=temperature,
	max_tokens=8,
	n=5)

	infers = []
	temp = []
	infered=[]
	for i in range(5):
	print("print1 ", response['choices'][i]['text'])
	temp.append(response['choices'][i]['text'])
	print("print2: infers ", infers)
	print("print3: Responses ", response)
	print("Object type of response: ", type(response))
	#infered = list(map(lambda x: x.split(',')[0], infers))
	#print("Infered type is: ", type(infered))
	infers = list(map(lambda x: x.replace("\n", ""), temp))
	#infered = list(map(lambda x: x.split(','), infers))

	return result.text, state, infers, text

	# get audio from microphone
	gr.Interface(
	fn=debug_inference,
	inputs=[gr.inputs.Audio(source="microphone", type="filepath"),
	gr.inputs.Textbox(lines=15, placeholder="Enter a prompt here"),
	gr.inputs.Dropdown(["text-ada-001", "text-davinci-002", "text-davinci-003", "gpt-3.5-turbo"], label="Model"),
	gr.inputs.Slider(minimum=0.0, maximum=1.0, default=0.8, step=0.1, label="Temperature"),
	"state"
	],
	outputs=["textbox","state","textbox", "textbox"],
	live=True).launch()