speech-to-text / speech_to_text.py
MSchell0129
initial commit
2befe53
raw
history blame
3.95 kB
import openai
import whisper
from langchain import OpenAI, SQLDatabase, SQLDatabaseChain
from langchain.llms import OpenAI
from langchain.chains.summarize import load_summarize_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from api_key import open_ai_key
llm = OpenAI(temperature=0, openai_api_key='open_ai_key')
#This is another alternative, but this block allows for the detection of the language and it also provides lowever-level access to the model
def transcribe(aud_inp, whisper_lang):
if aud_inp is None:
return ''
model = whisper.load_audo('base')
#load audo and pad/trim it to fit 30seconds
audio = whisper.load_audio(aud_inp)
audio = whisper.pad_or_trim(audio)
#make log-Mel spectrogram and move to the same devcice as the model
mel = whisper.log_mel_spectogram(audio).to(model.device)
#detect the spoken language
_,probs = model.detect_language(mel)
print(f'Detected language: {max(probs, key=probs.get)}')
#decode the audio
options = whisper.DecodingOptions()
result = whisper.decode(model, mel, options)
print(result.text)
return result
#Function to convert speech to text
#These two functions might need to go away but I am not entirely sure yet
# def transcribe_audio(audio_file_path):
# #not sure what the path to the audio file will be so just putting a string as a place holder
# with open('audio file path') as audio_file:
# transcribtion = openai.Audio.transcribe('whisper-1', audio_file)
# return transcribtion['text']
# #Save the transcribed text to a docx file
# def save_as_doc(question, filename):
# doc=Document()
# for key, value in minutes.items():
# heading = ' '.join(word.capitalize() for word in key.split('_'))
# doc.add_heading(heading, level=1)
# doc.add_paragraph(value)
# doc.add_page_break()
# doc.save(f'{filename}.docx')
#Not sure how the data will be stored, but my idea is that when a question or prompt is asked the audio file will be stored as text which then be fed into the llm
#to then query the database and return the answer.
#estbalish the question to be asked
# question = transcribe
# #I feel like I need another step here so that the model takes the question, goes to the db and knows that it needs to look for the answer to the question
# # I am wondering if I need to setup an extraction algorithm here, but then how do I link the extraction algorithm to the database?
# #Creating link to db
# # I am also wondering if there should be an api for the model to call in order to access the database? Thinking that might be more better?
# sqlite_db_path = 'sqlite:///database.db'
# db = SQLDatabase.from_uri(f'sqlite:///{sqlite_db_path}')
# db_chain = SQLDatabaseChain(llm-llm, database=db)
# db_results = db_chain.run(transcribe)
#After retrieving the data from the database, have llm summarize the data and return the answer to the question
# with open(db_results) as file:
# text = file.read()
# text_splitter = RecursiveCharacterTextSplitter(separators = ['\n\n', '\n'], chunk_size = 100, chunk_overlap = 0)
# docs = text_splitter.create_documents([text])
# chain = load_summarize_chain(llm=llm, chain_type = 'map_reduce')
# output = chain.run(docs)
# #Setup for the model to recevie a question and return the answer
# context = output
# answer = llm(context+question)
# def save_as_doc(answer, filename):
# doc=Document()
# #not sure what the data will look like, as to what the keys and values will be, so just putting a place holder
# for key, value in minutes.items():
# heading = ' '.join(word.capitalize() for word in key.split('_'))
# doc.add_heading(heading, level=1)
# doc.add_paragraph(value)
# doc.add_page_break()
# doc.save(f'{filename}.docx')
#Next part is to take the saved docx file and convert it to an audio file to be played back to the user