MSchell0129 commited on
Commit
a76862a
·
1 Parent(s): 51a1b0b

separated files based on function

Browse files
Files changed (3) hide show
  1. database_search.py +33 -0
  2. model_response.py +28 -0
  3. speech_to_text.py +4 -70
database_search.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain import OpenAI, SQLDatabase, SQLDatabaseChain
2
+ from langchain.llms import OpenAI
3
+ from api_key import open_ai_key
4
+ from speech_to_text import transcribe
5
+
6
+ llm = OpenAI(temperature=0, openai_api_key='open_ai_key')
7
+
8
+
9
+ #Not sure how the data will be stored, but my idea is that when a question or prompt is asked the audio file will be stored as text which then be fed into the llm
10
+ #to then query the database and return the answer.
11
+
12
+ #estbalish the question to be asked
13
+ question = transcribe
14
+
15
+ # #I feel like I need another step here so that the model takes the question, goes to the db and knows that it needs to look for the answer to the question
16
+ # # I am wondering if I need to setup an extraction algorithm here, but then how do I link the extraction algorithm to the database?
17
+ # #Creating link to db
18
+ # # I am also wondering if there should be an api for the model to call in order to access the database? Thinking that might be more better?
19
+ def database(transcribe):
20
+ sqlite_db_path = 'sqlite:///database.db'
21
+ db = SQLDatabase.from_uri(f'sqlite:///{sqlite_db_path}')
22
+
23
+ db_chain = SQLDatabaseChain(llm-llm, database=db)
24
+
25
+ db_results = db_chain.run(transcribe)
26
+ return db_results
27
+ #After retrieving the data from the database, have llm summarize the data and return the answer to the question
28
+
29
+ if __name__ == '__main__':
30
+ database(transcribe)
31
+
32
+
33
+
model_response.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.chains.summarize import load_summarize_chain
2
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
3
+ from api_key import open_ai_key
4
+ import openai
5
+ from database_search import database
6
+ llm = openai(temperature=0, openai_api_key='open_ai_key')
7
+
8
+ def model_response(database):
9
+ with open(database) as file:
10
+ text = file.read()
11
+
12
+
13
+ text_splitter = RecursiveCharacterTextSplitter(separators = ['\n\n', '\n'], chunk_size = 100, chunk_overlap = 0)
14
+ docs = text_splitter.create_documents([text])
15
+
16
+ chain = load_summarize_chain(llm=llm, chain_type = 'map_reduce')
17
+
18
+ output = chain.run(docs)
19
+
20
+ #Setup for the model to recevie a question and return the answer
21
+ context = output
22
+
23
+
24
+ answer = llm(context)
25
+ #Next part is to take the saved docx file and convert it to an audio file to be played back to the user
26
+
27
+ if __name__ == '__main__':
28
+ model_response(database)
speech_to_text.py CHANGED
@@ -1,12 +1,9 @@
1
  import openai
2
  import whisper
3
- from langchain import OpenAI, SQLDatabase, SQLDatabaseChain
4
- from langchain.llms import OpenAI
5
- from langchain.chains.summarize import load_summarize_chain
6
- from langchain.text_splitter import RecursiveCharacterTextSplitter
7
  from api_key import open_ai_key
8
 
9
- llm = OpenAI(temperature=0, openai_api_key='open_ai_key')
10
 
11
 
12
 
@@ -29,68 +26,5 @@ def transcribe(aud_inp, whisper_lang):
29
  result = whisper.decode(model, mel, options)
30
  print(result.text)
31
  return result
32
- #Function to convert speech to text
33
-
34
- #These two functions might need to go away but I am not entirely sure yet
35
- # def transcribe_audio(audio_file_path):
36
- # #not sure what the path to the audio file will be so just putting a string as a place holder
37
- # with open('audio file path') as audio_file:
38
- # transcribtion = openai.Audio.transcribe('whisper-1', audio_file)
39
- # return transcribtion['text']
40
- # #Save the transcribed text to a docx file
41
- # def save_as_doc(question, filename):
42
- # doc=Document()
43
- # for key, value in minutes.items():
44
- # heading = ' '.join(word.capitalize() for word in key.split('_'))
45
- # doc.add_heading(heading, level=1)
46
- # doc.add_paragraph(value)
47
- # doc.add_page_break()
48
- # doc.save(f'{filename}.docx')
49
- #Not sure how the data will be stored, but my idea is that when a question or prompt is asked the audio file will be stored as text which then be fed into the llm
50
- #to then query the database and return the answer.
51
-
52
- #estbalish the question to be asked
53
- # question = transcribe
54
-
55
- # #I feel like I need another step here so that the model takes the question, goes to the db and knows that it needs to look for the answer to the question
56
- # # I am wondering if I need to setup an extraction algorithm here, but then how do I link the extraction algorithm to the database?
57
- # #Creating link to db
58
- # # I am also wondering if there should be an api for the model to call in order to access the database? Thinking that might be more better?
59
- # sqlite_db_path = 'sqlite:///database.db'
60
- # db = SQLDatabase.from_uri(f'sqlite:///{sqlite_db_path}')
61
-
62
- # db_chain = SQLDatabaseChain(llm-llm, database=db)
63
-
64
- # db_results = db_chain.run(transcribe)
65
-
66
- #After retrieving the data from the database, have llm summarize the data and return the answer to the question
67
-
68
- # with open(db_results) as file:
69
- # text = file.read()
70
-
71
-
72
- # text_splitter = RecursiveCharacterTextSplitter(separators = ['\n\n', '\n'], chunk_size = 100, chunk_overlap = 0)
73
- # docs = text_splitter.create_documents([text])
74
-
75
- # chain = load_summarize_chain(llm=llm, chain_type = 'map_reduce')
76
-
77
- # output = chain.run(docs)
78
-
79
- # #Setup for the model to recevie a question and return the answer
80
- # context = output
81
-
82
-
83
- # answer = llm(context+question)
84
-
85
-
86
- # def save_as_doc(answer, filename):
87
- # doc=Document()
88
- # #not sure what the data will look like, as to what the keys and values will be, so just putting a place holder
89
- # for key, value in minutes.items():
90
- # heading = ' '.join(word.capitalize() for word in key.split('_'))
91
- # doc.add_heading(heading, level=1)
92
- # doc.add_paragraph(value)
93
- # doc.add_page_break()
94
- # doc.save(f'{filename}.docx')
95
-
96
- #Next part is to take the saved docx file and convert it to an audio file to be played back to the user
 
1
  import openai
2
  import whisper
3
+
 
 
 
4
  from api_key import open_ai_key
5
 
6
+ llm = openai(temperature=0, openai_api_key='open_ai_key')
7
 
8
 
9
 
 
26
  result = whisper.decode(model, mel, options)
27
  print(result.text)
28
  return result
29
+ if __name__ == '__main__':
30
+ transcribe('audio_file_path', 'whisper-1')