import openai import whisper from langchain import OpenAI, SQLDatabase, SQLDatabaseChain from langchain.llms import OpenAI from langchain.chains.summarize import load_summarize_chain from langchain.text_splitter import RecursiveCharacterTextSplitter from api_key import open_ai_key llm = OpenAI(temperature=0, openai_api_key='open_ai_key') #This is another alternative, but this block allows for the detection of the language and it also provides lowever-level access to the model def transcribe(aud_inp, whisper_lang): if aud_inp is None: return '' model = whisper.load_audo('base') #load audo and pad/trim it to fit 30seconds audio = whisper.load_audio(aud_inp) audio = whisper.pad_or_trim(audio) #make log-Mel spectrogram and move to the same devcice as the model mel = whisper.log_mel_spectogram(audio).to(model.device) #detect the spoken language _,probs = model.detect_language(mel) print(f'Detected language: {max(probs, key=probs.get)}') #decode the audio options = whisper.DecodingOptions() result = whisper.decode(model, mel, options) print(result.text) return result #Function to convert speech to text #These two functions might need to go away but I am not entirely sure yet # def transcribe_audio(audio_file_path): # #not sure what the path to the audio file will be so just putting a string as a place holder # with open('audio file path') as audio_file: # transcribtion = openai.Audio.transcribe('whisper-1', audio_file) # return transcribtion['text'] # #Save the transcribed text to a docx file # def save_as_doc(question, filename): # doc=Document() # for key, value in minutes.items(): # heading = ' '.join(word.capitalize() for word in key.split('_')) # doc.add_heading(heading, level=1) # doc.add_paragraph(value) # doc.add_page_break() # doc.save(f'{filename}.docx') #Not sure how the data will be stored, but my idea is that when a question or prompt is asked the audio file will be stored as text which then be fed into the llm #to then query the database and return the answer. #estbalish the question to be asked # question = transcribe # #I feel like I need another step here so that the model takes the question, goes to the db and knows that it needs to look for the answer to the question # # I am wondering if I need to setup an extraction algorithm here, but then how do I link the extraction algorithm to the database? # #Creating link to db # # I am also wondering if there should be an api for the model to call in order to access the database? Thinking that might be more better? # sqlite_db_path = 'sqlite:///database.db' # db = SQLDatabase.from_uri(f'sqlite:///{sqlite_db_path}') # db_chain = SQLDatabaseChain(llm-llm, database=db) # db_results = db_chain.run(transcribe) #After retrieving the data from the database, have llm summarize the data and return the answer to the question # with open(db_results) as file: # text = file.read() # text_splitter = RecursiveCharacterTextSplitter(separators = ['\n\n', '\n'], chunk_size = 100, chunk_overlap = 0) # docs = text_splitter.create_documents([text]) # chain = load_summarize_chain(llm=llm, chain_type = 'map_reduce') # output = chain.run(docs) # #Setup for the model to recevie a question and return the answer # context = output # answer = llm(context+question) # def save_as_doc(answer, filename): # doc=Document() # #not sure what the data will look like, as to what the keys and values will be, so just putting a place holder # for key, value in minutes.items(): # heading = ' '.join(word.capitalize() for word in key.split('_')) # doc.add_heading(heading, level=1) # doc.add_paragraph(value) # doc.add_page_break() # doc.save(f'{filename}.docx') #Next part is to take the saved docx file and convert it to an audio file to be played back to the user