awacke1 commited on
Commit
27c0f85
Β·
1 Parent(s): 5a183a5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +398 -187
app.py CHANGED
@@ -1,202 +1,413 @@
1
- import gradio as gr
2
- import os
3
- import json
 
 
 
 
 
 
4
  import requests
 
 
 
5
 
6
- #Streaming endpoint
7
- API_URL = "https://api.openai.com/v1/chat/completions" #os.getenv("API_URL") + "/generate_stream"
8
- OPENAI_API_KEY= os.environ["HF_TOKEN"] # Add a token to this space . Then copy it to the repository secret in this spaces settings panel. os.environ reads from there.
9
- # Keys for Open AI ChatGPT API usage are created from here: https://platform.openai.com/account/api-keys
10
-
11
- def predict(inputs, top_p, temperature, chat_counter, chatbot=[], history=[]): #repetition_penalty, top_k
12
-
13
- # 1. Set up a payload
14
- payload = {
15
- "model": "gpt-3.5-turbo",
16
- "messages": [{"role": "user", "content": f"{inputs}"}],
17
- "temperature" : 1.0,
18
- "top_p":1.0,
19
- "n" : 1,
20
- "stream": True,
21
- "presence_penalty":0,
22
- "frequency_penalty":0,
23
- }
 
 
 
 
 
 
24
 
25
- # 2. Define your headers and add a key from https://platform.openai.com/account/api-keys
 
 
26
  headers = {
27
- "Content-Type": "application/json",
28
- "Authorization": f"Bearer {OPENAI_API_KEY}"
29
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
- # 3. Create a chat counter loop that feeds [Predict next best anything based on last input and attention with memory defined by introspective attention over time]
32
- print(f"chat_counter - {chat_counter}")
33
- if chat_counter != 0 :
34
- messages=[]
35
- for data in chatbot:
36
- temp1 = {}
37
- temp1["role"] = "user"
38
- temp1["content"] = data[0]
39
- temp2 = {}
40
- temp2["role"] = "assistant"
41
- temp2["content"] = data[1]
42
- messages.append(temp1)
43
- messages.append(temp2)
44
- temp3 = {}
45
- temp3["role"] = "user"
46
- temp3["content"] = inputs
47
- messages.append(temp3)
48
- payload = {
49
- "model": "gpt-3.5-turbo",
50
- "messages": messages, #[{"role": "user", "content": f"{inputs}"}],
51
- "temperature" : temperature, #1.0,
52
- "top_p": top_p, #1.0,
53
- "n" : 1,
54
- "stream": True,
55
- "presence_penalty":0,
56
- "frequency_penalty":0,
57
- }
58
- chat_counter+=1
59
-
60
- # 4. POST it to OPENAI API
61
- history.append(inputs)
62
- print(f"payload is - {payload}")
63
- response = requests.post(API_URL, headers=headers, json=payload, stream=True)
64
- token_counter = 0
65
- partial_words = ""
66
-
67
- # 5. Iterate through response lines and structure readable response
68
- counter=0
69
- for chunk in response.iter_lines():
70
- if counter == 0:
71
- counter+=1
72
- continue
73
- if chunk.decode() :
74
- chunk = chunk.decode()
75
- if len(chunk) > 12 and "content" in json.loads(chunk[6:])['choices'][0]['delta']:
76
- partial_words = partial_words + json.loads(chunk[6:])['choices'][0]["delta"]["content"]
77
- if token_counter == 0:
78
- history.append(" " + partial_words)
79
- else:
80
- history[-1] = partial_words
81
- chat = [(history[i], history[i + 1]) for i in range(0, len(history) - 1, 2) ] # convert to tuples of list
82
- token_counter+=1
83
- yield chat, history, chat_counter
84
-
85
-
86
- def reset_textbox():
87
- return gr.update(value='')
88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
 
 
 
 
 
 
90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
- # Episodic and Semantic IO
93
- def list_files(file_path):
94
- import os
95
- icon_csv = "πŸ“„ "
96
- icon_txt = "πŸ“‘ "
97
- current_directory = os.getcwd()
98
- file_list = []
99
- for filename in os.listdir(current_directory):
100
- if filename.endswith(".csv"):
101
- file_list.append(icon_csv + filename)
102
- elif filename.endswith(".txt"):
103
- file_list.append(icon_txt + filename)
104
- if file_list:
105
- return "\n".join(file_list)
106
  else:
107
- return "No .csv or .txt files found in the current directory."
108
-
109
- # Function to read a file
110
- def read_file(file_path):
111
- try:
112
- with open(file_path, "r") as file:
113
- contents = file.read()
114
- return f"{contents}"
115
- #return f"Contents of {file_path}:\n{contents}"
116
- except FileNotFoundError:
117
- return "File not found."
118
-
119
- # Function to delete a file
120
- def delete_file(file_path):
121
- try:
122
- import os
123
- os.remove(file_path)
124
- return f"{file_path} has been deleted."
125
- except FileNotFoundError:
126
- return "File not found."
127
-
128
- # Function to write to a file
129
- def write_file(file_path, content):
130
- try:
131
- with open(file_path, "w") as file:
132
- file.write(content)
133
- return f"Successfully written to {file_path}."
134
- except:
135
- return "Error occurred while writing to file."
136
-
137
- # Function to append to a file
138
- def append_file(file_path, content):
139
- try:
140
- with open(file_path, "a") as file:
141
- file.write(content)
142
- return f"Successfully appended to {file_path}."
143
- except:
144
- return "Error occurred while appending to file."
145
-
146
-
147
- title = """<h1 align="center">Generative AI Intelligence Amplifier - GAIA</h1>"""
148
- description = """
149
- ## GAIA Dataset References: πŸ“š
150
- - **WebText:** A dataset of web pages crawled from domains on the Alexa top 5,000 list. This dataset was used to pretrain GPT-2.
151
- - [WebText: A Large-Scale Unsupervised Text Corpus by Radford et al.](https://paperswithcode.com/dataset/webtext)
152
- - **Common Crawl:** A dataset of web pages from a variety of domains, which is updated regularly. This dataset was used to pretrain GPT-3.
153
- - [Language Models are Few-Shot Learners](https://paperswithcode.com/dataset/common-crawl) by Brown et al.
154
- - **BooksCorpus:** A dataset of over 11,000 books from a variety of genres.
155
- - [Scalable Methods for 8 Billion Token Language Modeling](https://paperswithcode.com/dataset/bookcorpus) by Zhu et al.
156
- - **English Wikipedia:** A dump of the English-language Wikipedia as of 2018, with articles from 2001-2017.
157
- - [Improving Language Understanding by Generative Pre-Training](https://huggingface.co/spaces/awacke1/WikipediaUltimateAISearch?logs=build) Space for Wikipedia Search
158
- - **Toronto Books Corpus:** A dataset of over 7,000 books from a variety of genres, collected by the University of Toronto.
159
- - [Massively Multilingual Sentence Embeddings for Zero-Shot Cross-Lingual Transfer and Beyond](https://paperswithcode.com/dataset/bookcorpus) by Schwenk and Douze.
160
- - **OpenWebText:** A dataset of web pages that were filtered to remove content that was likely to be low-quality or spammy. This dataset was used to pretrain GPT-3.
161
- - [Language Models are Few-Shot Learners](https://paperswithcode.com/dataset/openwebtext) by Brown et al.
162
- """
163
-
164
- # 6. Use Gradio to pull it all together
165
- with gr.Blocks(css = """#col_container {width: 100%; margin-left: auto; margin-right: auto;} #chatbot {height: 400px; overflow: auto;}""") as demo:
166
- gr.HTML(title)
167
- with gr.Column(elem_id = "col_container"):
168
- inputs = gr.Textbox(placeholder= "Paste Prompt with Context Data Here", label= "Type an input and press Enter")
169
- chatbot = gr.Chatbot(elem_id='chatbot')
170
- state = gr.State([])
171
- b1 = gr.Button()
172
- with gr.Accordion("Parameters", open=False):
173
- top_p = gr.Slider( minimum=-0, maximum=1.0, value=1.0, step=0.05, interactive=True, label="Top-p (nucleus sampling)",)
174
- temperature = gr.Slider( minimum=-0, maximum=5.0, value=1.0, step=0.1, interactive=True, label="Temperature",)
175
- chat_counter = gr.Number(value=0, visible=True, precision=0)
176
 
177
-
178
- # Episodic/Semantic IO
179
- fileName = gr.Textbox(label="Filename")
180
- fileContent = gr.TextArea(label="File Content")
181
- completedMessage = gr.Textbox(label="Completed")
182
- label = gr.Label()
183
- with gr.Row():
184
- listFiles = gr.Button("πŸ“„ List File(s)")
185
- readFile = gr.Button("πŸ“– Read File")
186
- saveFile = gr.Button("πŸ’Ύ Save File")
187
- deleteFile = gr.Button("πŸ—‘οΈ Delete File")
188
- appendFile = gr.Button("βž• Append File")
189
- listFiles.click(list_files, inputs=fileName, outputs=fileContent)
190
- readFile.click(read_file, inputs=fileName, outputs=fileContent)
191
- saveFile.click(write_file, inputs=[fileName, fileContent], outputs=completedMessage)
192
- deleteFile.click(delete_file, inputs=fileName, outputs=completedMessage)
193
- appendFile.click(append_file, inputs=[fileName, fileContent], outputs=completedMessage )
 
 
194
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
 
196
- inputs.submit(predict, [inputs, top_p, temperature,chat_counter, chatbot, state], [chatbot, state, chat_counter])
197
- b1.click(predict, [inputs, top_p, temperature, chat_counter, chatbot, state], [chatbot, state, chat_counter])
198
- b1.click(reset_textbox, [], [inputs])
199
- inputs.submit(reset_textbox, [], [inputs])
200
- gr.Markdown(description)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
 
202
- demo.queue().launch(debug=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import openai
3
+ import os
4
+ import base64
5
+ import glob
6
+ import json
7
+ import mistune
8
+ import pytz
9
+ import math
10
  import requests
11
+ import time
12
+ import re
13
+ import textract
14
 
15
+ from datetime import datetime
16
+ from openai import ChatCompletion
17
+ from xml.etree import ElementTree as ET
18
+ from bs4 import BeautifulSoup
19
+ from collections import deque
20
+ from audio_recorder_streamlit import audio_recorder
21
+
22
+ from dotenv import load_dotenv
23
+ from PyPDF2 import PdfReader
24
+ from langchain.text_splitter import CharacterTextSplitter
25
+ from langchain.embeddings import OpenAIEmbeddings
26
+ from langchain.vectorstores import FAISS
27
+ from langchain.chat_models import ChatOpenAI
28
+ from langchain.memory import ConversationBufferMemory
29
+ from langchain.chains import ConversationalRetrievalChain
30
+ from templates import css, bot_template, user_template
31
+
32
+
33
+
34
+ def generate_filename(prompt, file_type):
35
+ central = pytz.timezone('US/Central')
36
+ safe_date_time = datetime.now(central).strftime("%m%d_%H%M") # Date and time DD-HHMM
37
+ safe_prompt = "".join(x for x in prompt if x.isalnum())[:90] # Limit file name size and trim whitespace
38
+ return f"{safe_date_time}_{safe_prompt}.{file_type}" # Return a safe file name
39
 
40
+
41
+ def transcribe_audio(openai_key, file_path, model):
42
+ OPENAI_API_URL = "https://api.openai.com/v1/audio/transcriptions"
43
  headers = {
44
+ "Authorization": f"Bearer {openai_key}",
 
45
  }
46
+ with open(file_path, 'rb') as f:
47
+ data = {'file': f}
48
+ response = requests.post(OPENAI_API_URL, headers=headers, files=data, data={'model': model})
49
+ if response.status_code == 200:
50
+ st.write(response.json())
51
+ chatResponse = chat_with_model(response.json().get('text'), '') # *************************************
52
+ transcript = response.json().get('text')
53
+ #st.write('Responses:')
54
+ #st.write(chatResponse)
55
+ filename = generate_filename(transcript, 'txt')
56
+ create_file(filename, transcript, chatResponse)
57
+ return transcript
58
+ else:
59
+ st.write(response.json())
60
+ st.error("Error in API call.")
61
+ return None
62
 
63
+ def save_and_play_audio(audio_recorder):
64
+ audio_bytes = audio_recorder()
65
+ if audio_bytes:
66
+ filename = generate_filename("Recording", "wav")
67
+ with open(filename, 'wb') as f:
68
+ f.write(audio_bytes)
69
+ st.audio(audio_bytes, format="audio/wav")
70
+ return filename
71
+ return None
72
+
73
+ def create_file(filename, prompt, response):
74
+ if filename.endswith(".txt"):
75
+ with open(filename, 'w') as file:
76
+ file.write(f"{prompt}\n{response}")
77
+ elif filename.endswith(".htm"):
78
+ with open(filename, 'w') as file:
79
+ file.write(f"{prompt} {response}")
80
+ elif filename.endswith(".md"):
81
+ with open(filename, 'w') as file:
82
+ file.write(f"{prompt}\n\n{response}")
83
+
84
+ def truncate_document(document, length):
85
+ return document[:length]
86
+ def divide_document(document, max_length):
87
+ return [document[i:i+max_length] for i in range(0, len(document), max_length)]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
+ def get_table_download_link(file_path):
90
+ with open(file_path, 'r') as file:
91
+ try:
92
+ data = file.read()
93
+ except:
94
+ st.write('')
95
+ return file_path
96
+ b64 = base64.b64encode(data.encode()).decode()
97
+ file_name = os.path.basename(file_path)
98
+ ext = os.path.splitext(file_name)[1] # get the file extension
99
+ if ext == '.txt':
100
+ mime_type = 'text/plain'
101
+ elif ext == '.py':
102
+ mime_type = 'text/plain'
103
+ elif ext == '.xlsx':
104
+ mime_type = 'text/plain'
105
+ elif ext == '.csv':
106
+ mime_type = 'text/plain'
107
+ elif ext == '.htm':
108
+ mime_type = 'text/html'
109
+ elif ext == '.md':
110
+ mime_type = 'text/markdown'
111
+ else:
112
+ mime_type = 'application/octet-stream' # general binary data type
113
+ href = f'<a href="data:{mime_type};base64,{b64}" target="_blank" download="{file_name}">{file_name}</a>'
114
+ return href
115
 
116
+ def CompressXML(xml_text):
117
+ root = ET.fromstring(xml_text)
118
+ for elem in list(root.iter()):
119
+ if isinstance(elem.tag, str) and 'Comment' in elem.tag:
120
+ elem.parent.remove(elem)
121
+ return ET.tostring(root, encoding='unicode', method="xml")
122
 
123
+ def read_file_content(file,max_length):
124
+ if file.type == "application/json":
125
+ content = json.load(file)
126
+ return str(content)
127
+ elif file.type == "text/html" or file.type == "text/htm":
128
+ content = BeautifulSoup(file, "html.parser")
129
+ return content.text
130
+ elif file.type == "application/xml" or file.type == "text/xml":
131
+ tree = ET.parse(file)
132
+ root = tree.getroot()
133
+ xml = CompressXML(ET.tostring(root, encoding='unicode'))
134
+ return xml
135
+ elif file.type == "text/markdown" or file.type == "text/md":
136
+ md = mistune.create_markdown()
137
+ content = md(file.read().decode())
138
+ return content
139
+ elif file.type == "text/plain":
140
+ return file.getvalue().decode()
141
+ else:
142
+ return ""
143
+
144
+ def chat_with_model(prompt, document_section, model_choice='gpt-3.5-turbo'):
145
+ model = model_choice
146
+ conversation = [{'role': 'system', 'content': 'You are a helpful assistant.'}]
147
+ conversation.append({'role': 'user', 'content': prompt})
148
+ if len(document_section)>0:
149
+ conversation.append({'role': 'assistant', 'content': document_section})
150
+
151
+ start_time = time.time()
152
+ report = []
153
+ res_box = st.empty()
154
+ collected_chunks = []
155
+ collected_messages = []
156
+
157
+ for chunk in openai.ChatCompletion.create(
158
+ model='gpt-3.5-turbo',
159
+ messages=conversation,
160
+ temperature=0.5,
161
+ stream=True
162
+ ):
163
+
164
+ collected_chunks.append(chunk) # save the event response
165
+ chunk_message = chunk['choices'][0]['delta'] # extract the message
166
+ collected_messages.append(chunk_message) # save the message
167
+
168
+ content=chunk["choices"][0].get("delta",{}).get("content")
169
+
170
+ try:
171
+ report.append(content)
172
+ if len(content) > 0:
173
+ result = "".join(report).strip()
174
+ #result = result.replace("\n", "")
175
+ res_box.markdown(f'*{result}*')
176
+ except:
177
+ st.write(' ')
178
+
179
+ full_reply_content = ''.join([m.get('content', '') for m in collected_messages])
180
+ st.write("Elapsed time:")
181
+ st.write(time.time() - start_time)
182
+ return full_reply_content
183
+
184
+ def chat_with_file_contents(prompt, file_content, model_choice='gpt-3.5-turbo'):
185
+ conversation = [{'role': 'system', 'content': 'You are a helpful assistant.'}]
186
+ conversation.append({'role': 'user', 'content': prompt})
187
+ if len(file_content)>0:
188
+ conversation.append({'role': 'assistant', 'content': file_content})
189
+ response = openai.ChatCompletion.create(model=model_choice, messages=conversation)
190
+ return response['choices'][0]['message']['content']
191
 
192
+ def extract_mime_type(file):
193
+ # Check if the input is a string
194
+ if isinstance(file, str):
195
+ pattern = r"type='(.*?)'"
196
+ match = re.search(pattern, file)
197
+ if match:
198
+ return match.group(1)
199
+ else:
200
+ raise ValueError(f"Unable to extract MIME type from {file}")
201
+ # If it's not a string, assume it's a streamlit.UploadedFile object
202
+ elif isinstance(file, streamlit.UploadedFile):
203
+ return file.type
 
 
204
  else:
205
+ raise TypeError("Input should be a string or a streamlit.UploadedFile object")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
 
207
+ from io import BytesIO
208
+ import re
209
+
210
+ def extract_file_extension(file):
211
+ # get the file name directly from the UploadedFile object
212
+ file_name = file.name
213
+ pattern = r".*?\.(.*?)$"
214
+ match = re.search(pattern, file_name)
215
+ if match:
216
+ return match.group(1)
217
+ else:
218
+ raise ValueError(f"Unable to extract file extension from {file_name}")
219
+
220
+ def pdf2txt(docs):
221
+ text = ""
222
+ for file in docs:
223
+ file_extension = extract_file_extension(file)
224
+ # print the file extension
225
+ st.write(f"File type extension: {file_extension}")
226
 
227
+ # read the file according to its extension
228
+ try:
229
+ if file_extension.lower() in ['py', 'txt', 'html', 'htm', 'xml', 'json']:
230
+ text += file.getvalue().decode('utf-8')
231
+ elif file_extension.lower() == 'pdf':
232
+ from PyPDF2 import PdfReader
233
+ pdf = PdfReader(BytesIO(file.getvalue()))
234
+ for page in range(len(pdf.pages)):
235
+ text += pdf.pages[page].extract_text() # new PyPDF2 syntax
236
+ except Exception as e:
237
+ st.write(f"Error processing file {file.name}: {e}")
238
+
239
+ return text
240
+
241
+ def pdf2txt_old(pdf_docs):
242
+ st.write(pdf_docs)
243
+ for file in pdf_docs:
244
+ mime_type = extract_mime_type(file)
245
+ st.write(f"MIME type of file: {mime_type}")
246
 
247
+ text = ""
248
+ for pdf in pdf_docs:
249
+ pdf_reader = PdfReader(pdf)
250
+ for page in pdf_reader.pages:
251
+ text += page.extract_text()
252
+ return text
253
+
254
+ def txt2chunks(text):
255
+ text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1000, chunk_overlap=200, length_function=len)
256
+ return text_splitter.split_text(text)
257
+
258
+ def vector_store(text_chunks):
259
+ key = os.getenv('OPENAI_API_KEY')
260
+ embeddings = OpenAIEmbeddings(openai_api_key=key)
261
+ return FAISS.from_texts(texts=text_chunks, embedding=embeddings)
262
+
263
+ def get_chain(vectorstore):
264
+ llm = ChatOpenAI()
265
+ memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
266
+ return ConversationalRetrievalChain.from_llm(llm=llm, retriever=vectorstore.as_retriever(), memory=memory)
267
+
268
+ def process_user_input(user_question):
269
+ response = st.session_state.conversation({'question': user_question})
270
+ st.session_state.chat_history = response['chat_history']
271
+ for i, message in enumerate(st.session_state.chat_history):
272
+ template = user_template if i % 2 == 0 else bot_template
273
+ st.write(template.replace("{{MSG}}", message.content), unsafe_allow_html=True)
274
+ # Save file output from PDF query results
275
+ filename = generate_filename(user_question, 'txt')
276
+ create_file(filename, user_question, message.content)
277
+
278
+ #st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
279
+
280
+
281
+ def main():
282
+ # Sidebar and global
283
+ openai.api_key = os.getenv('OPENAI_API_KEY')
284
+ st.set_page_config(page_title="GPT Streamlit Document Reasoner",layout="wide")
285
+
286
+ # File type for output, model choice
287
+ menu = ["htm", "txt", "xlsx", "csv", "md", "py"] #619
288
+ choice = st.sidebar.selectbox("Output File Type:", menu)
289
+ model_choice = st.sidebar.radio("Select Model:", ('gpt-3.5-turbo', 'gpt-3.5-turbo-0301'))
290
 
291
+ # Audio, transcribe, GPT:
292
+ filename = save_and_play_audio(audio_recorder)
293
+ if filename is not None:
294
+ transcription = transcribe_audio(openai.api_key, filename, "whisper-1")
295
+ st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
296
+ filename=None # since transcription is finished next time just use the saved transcript
297
+
298
+ # prompt interfaces
299
+ user_prompt = st.text_area("Enter prompts, instructions & questions:", '', height=100)
300
+
301
+ # file section interface for prompts against large documents as context
302
+ collength, colupload = st.columns([2,3]) # adjust the ratio as needed
303
+ with collength:
304
+ max_length = st.slider("File section length for large files", min_value=1000, max_value=128000, value=12000, step=1000)
305
+ with colupload:
306
+ uploaded_file = st.file_uploader("Add a file for context:", type=["pdf", "xml", "json", "xlsx","csv","html", "htm", "md", "txt"])
307
+
308
+ # Document section chat
309
+ document_sections = deque()
310
+ document_responses = {}
311
+ if uploaded_file is not None:
312
+ file_content = read_file_content(uploaded_file, max_length)
313
+ document_sections.extend(divide_document(file_content, max_length))
314
+ if len(document_sections) > 0:
315
+ if st.button("πŸ‘οΈ View Upload"):
316
+ st.markdown("**Sections of the uploaded file:**")
317
+ for i, section in enumerate(list(document_sections)):
318
+ st.markdown(f"**Section {i+1}**\n{section}")
319
+ st.markdown("**Chat with the model:**")
320
+ for i, section in enumerate(list(document_sections)):
321
+ if i in document_responses:
322
+ st.markdown(f"**Section {i+1}**\n{document_responses[i]}")
323
+ else:
324
+ if st.button(f"Chat about Section {i+1}"):
325
+ st.write('Reasoning with your inputs...')
326
+ response = chat_with_model(user_prompt, section, model_choice) # *************************************
327
+ st.write('Response:')
328
+ st.write(response)
329
+ document_responses[i] = response
330
+ filename = generate_filename(f"{user_prompt}_section_{i+1}", choice)
331
+ create_file(filename, user_prompt, response)
332
+ st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
333
+
334
+ if st.button('πŸ’¬ Chat'):
335
+ st.write('Reasoning with your inputs...')
336
+ response = chat_with_model(user_prompt, ''.join(list(document_sections,)), model_choice) # *************************************
337
+ st.write('Response:')
338
+ st.write(response)
339
+
340
+ filename = generate_filename(user_prompt, choice)
341
+ create_file(filename, user_prompt, response)
342
+ st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
343
+
344
+ all_files = glob.glob("*.*")
345
+ all_files = [file for file in all_files if len(os.path.splitext(file)[0]) >= 20] # exclude files with short names
346
+ all_files.sort(key=lambda x: (os.path.splitext(x)[1], x), reverse=True) # sort by file type and file name in descending order
347
+
348
+ # sidebar of files
349
+ file_contents=''
350
+ next_action=''
351
+ for file in all_files:
352
+ col1, col2, col3, col4, col5 = st.sidebar.columns([1,6,1,1,1]) # adjust the ratio as needed
353
+ with col1:
354
+ if st.button("🌐", key="md_"+file): # md emoji button
355
+ with open(file, 'r') as f:
356
+ file_contents = f.read()
357
+ next_action='md'
358
+ with col2:
359
+ st.markdown(get_table_download_link(file), unsafe_allow_html=True)
360
+ with col3:
361
+ if st.button("πŸ“‚", key="open_"+file): # open emoji button
362
+ with open(file, 'r') as f:
363
+ file_contents = f.read()
364
+ next_action='open'
365
+ with col4:
366
+ if st.button("πŸ”", key="read_"+file): # search emoji button
367
+ with open(file, 'r') as f:
368
+ file_contents = f.read()
369
+ next_action='search'
370
+ with col5:
371
+ if st.button("πŸ—‘", key="delete_"+file):
372
+ os.remove(file)
373
+ st.experimental_rerun()
374
+
375
+ if len(file_contents) > 0:
376
+ if next_action=='open':
377
+ file_content_area = st.text_area("File Contents:", file_contents, height=500)
378
+ if next_action=='md':
379
+ st.markdown(file_contents)
380
+ if next_action=='search':
381
+ file_content_area = st.text_area("File Contents:", file_contents, height=500)
382
+ st.write('Reasoning with your inputs...')
383
+ response = chat_with_model(user_prompt, file_contents, model_choice)
384
+ filename = generate_filename(file_contents, choice)
385
+ create_file(filename, file_contents, response)
386
+
387
+ st.experimental_rerun()
388
+ #st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
389
+
390
+ if __name__ == "__main__":
391
+ main()
392
+
393
+ load_dotenv()
394
+ st.write(css, unsafe_allow_html=True)
395
+
396
+ st.header("Chat with documents :books:")
397
+ user_question = st.text_input("Ask a question about your documents:")
398
+ if user_question:
399
+ process_user_input(user_question)
400
+
401
+ with st.sidebar:
402
+ st.subheader("Your documents")
403
+ docs = st.file_uploader("import documents", accept_multiple_files=True)
404
+ with st.spinner("Processing"):
405
+ raw = pdf2txt(docs)
406
+ if len(raw) > 0:
407
+ length = str(len(raw))
408
+ text_chunks = txt2chunks(raw)
409
+ vectorstore = vector_store(text_chunks)
410
+ st.session_state.conversation = get_chain(vectorstore)
411
+ st.markdown('# AI Search Index of Length:' + length + ' Created.') # add timing
412
+ filename = generate_filename(raw, 'txt')
413
+ create_file(filename, raw, '')