Spaces:

michaelschell
/

speech-to-text

Runtime error

File size: 3,952 Bytes

2befe53

import openai
import whisper
from langchain import OpenAI, SQLDatabase, SQLDatabaseChain
from langchain.llms import OpenAI
from langchain.chains.summarize import load_summarize_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from api_key import open_ai_key

llm = OpenAI(temperature=0, openai_api_key='open_ai_key')



#This is another alternative, but this block allows for the detection of the language and it also provides lowever-level access to the model

def transcribe(aud_inp, whisper_lang):
    if aud_inp is None:
        return ''
    model = whisper.load_audo('base')
    #load audo and pad/trim it to fit 30seconds
    audio = whisper.load_audio(aud_inp)
    audio = whisper.pad_or_trim(audio)
    #make log-Mel spectrogram and move to the same devcice as the model
    mel = whisper.log_mel_spectogram(audio).to(model.device)
    #detect the spoken language
    _,probs = model.detect_language(mel)
    print(f'Detected language: {max(probs, key=probs.get)}')
    #decode the audio
    options = whisper.DecodingOptions()
    result = whisper.decode(model, mel, options)
    print(result.text)
    return result 
#Function to convert speech to text

#These two functions might need to go away but I am not entirely sure yet
# def transcribe_audio(audio_file_path):
#     #not sure what the path to the audio file will be so just putting a string as a place holder
#     with open('audio file path') as audio_file:
#         transcribtion = openai.Audio.transcribe('whisper-1', audio_file)
#     return transcribtion['text']
# #Save the transcribed text to a docx file
# def save_as_doc(question, filename):
#     doc=Document()
#     for key, value in minutes.items():
#         heading = ' '.join(word.capitalize() for word in key.split('_'))
#         doc.add_heading(heading, level=1)
#         doc.add_paragraph(value)
#         doc.add_page_break()
#     doc.save(f'{filename}.docx')
#Not sure how the data will be stored, but my idea is that when a question or prompt is asked the audio file will be stored as text which then be fed into the llm 
#to then query the database and return the answer. 

#estbalish the question to be asked
# question = transcribe

# #I feel like I need another step here so that the model takes the question, goes to the db and knows that it needs to look for the answer to the question
# # I am wondering if I need to setup an extraction algorithm here, but then how do I link the extraction algorithm to the database?
# #Creating link to db
# # I am also wondering if there should be an api for the model to call in order to access the database? Thinking that might be more better?
# sqlite_db_path = 'sqlite:///database.db'
# db = SQLDatabase.from_uri(f'sqlite:///{sqlite_db_path}')

# db_chain = SQLDatabaseChain(llm-llm, database=db)

# db_results = db_chain.run(transcribe)

#After retrieving the data from the database, have llm summarize the data and return the answer to the question

# with open(db_results) as file:
#     text = file.read()


# text_splitter = RecursiveCharacterTextSplitter(separators = ['\n\n', '\n'], chunk_size = 100, chunk_overlap = 0)
# docs = text_splitter.create_documents([text])

# chain = load_summarize_chain(llm=llm, chain_type = 'map_reduce')

# output = chain.run(docs)

# #Setup for the model to recevie a question and return the answer
# context = output


# answer = llm(context+question)


# def save_as_doc(answer, filename):
#     doc=Document()
#     #not sure what the data will look like, as to what the keys and values will be, so just putting a place holder
#     for key, value in minutes.items():
#         heading = ' '.join(word.capitalize() for word in key.split('_'))
#         doc.add_heading(heading, level=1)
#         doc.add_paragraph(value)
#         doc.add_page_break()
#     doc.save(f'{filename}.docx')

#Next part is to take the saved docx file and convert it to an audio file to be played back to the user