Spaces:

michaelschell
/

speech-to-text

Runtime error

speech-to-text / speech_to_text.py

MSchell0129

initial commit

2befe53 almost 2 years ago

3.95 kB

	import openai
	import whisper
	from langchain import OpenAI, SQLDatabase, SQLDatabaseChain
	from langchain.llms import OpenAI
	from langchain.chains.summarize import load_summarize_chain
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from api_key import open_ai_key

	llm = OpenAI(temperature=0, openai_api_key='open_ai_key')



	#This is another alternative, but this block allows for the detection of the language and it also provides lowever-level access to the model

	def transcribe(aud_inp, whisper_lang):
	if aud_inp is None:
	return ''
	model = whisper.load_audo('base')
	#load audo and pad/trim it to fit 30seconds
	audio = whisper.load_audio(aud_inp)
	audio = whisper.pad_or_trim(audio)
	#make log-Mel spectrogram and move to the same devcice as the model
	mel = whisper.log_mel_spectogram(audio).to(model.device)
	#detect the spoken language
	_,probs = model.detect_language(mel)
	print(f'Detected language: {max(probs, key=probs.get)}')
	#decode the audio
	options = whisper.DecodingOptions()
	result = whisper.decode(model, mel, options)
	print(result.text)
	return result
	#Function to convert speech to text

	#These two functions might need to go away but I am not entirely sure yet
	# def transcribe_audio(audio_file_path):
	# #not sure what the path to the audio file will be so just putting a string as a place holder
	# with open('audio file path') as audio_file:
	# transcribtion = openai.Audio.transcribe('whisper-1', audio_file)
	# return transcribtion['text']
	# #Save the transcribed text to a docx file
	# def save_as_doc(question, filename):
	# doc=Document()
	# for key, value in minutes.items():
	# heading = ' '.join(word.capitalize() for word in key.split('_'))
	# doc.add_heading(heading, level=1)
	# doc.add_paragraph(value)
	# doc.add_page_break()
	# doc.save(f'{filename}.docx')
	#Not sure how the data will be stored, but my idea is that when a question or prompt is asked the audio file will be stored as text which then be fed into the llm
	#to then query the database and return the answer.

	#estbalish the question to be asked
	# question = transcribe

	# #I feel like I need another step here so that the model takes the question, goes to the db and knows that it needs to look for the answer to the question
	# # I am wondering if I need to setup an extraction algorithm here, but then how do I link the extraction algorithm to the database?
	# #Creating link to db
	# # I am also wondering if there should be an api for the model to call in order to access the database? Thinking that might be more better?
	# sqlite_db_path = 'sqlite:///database.db'
	# db = SQLDatabase.from_uri(f'sqlite:///{sqlite_db_path}')

	# db_chain = SQLDatabaseChain(llm-llm, database=db)

	# db_results = db_chain.run(transcribe)

	#After retrieving the data from the database, have llm summarize the data and return the answer to the question

	# with open(db_results) as file:
	# text = file.read()


	# text_splitter = RecursiveCharacterTextSplitter(separators = ['\n\n', '\n'], chunk_size = 100, chunk_overlap = 0)
	# docs = text_splitter.create_documents([text])

	# chain = load_summarize_chain(llm=llm, chain_type = 'map_reduce')

	# output = chain.run(docs)

	# #Setup for the model to recevie a question and return the answer
	# context = output


	# answer = llm(context+question)


	# def save_as_doc(answer, filename):
	# doc=Document()
	# #not sure what the data will look like, as to what the keys and values will be, so just putting a place holder
	# for key, value in minutes.items():
	# heading = ' '.join(word.capitalize() for word in key.split('_'))
	# doc.add_heading(heading, level=1)
	# doc.add_paragraph(value)
	# doc.add_page_break()
	# doc.save(f'{filename}.docx')

	#Next part is to take the saved docx file and convert it to an audio file to be played back to the user