Mattral commited on
Commit
aa8e6f0
·
verified ·
1 Parent(s): 46c5199

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -55
app.py CHANGED
@@ -35,7 +35,12 @@ def get_page_urls(url):
35
  links.append(url)
36
  return set(links)
37
 
38
-
 
 
 
 
 
39
 
40
  def get_url_content(url):
41
  response = requests.get(url)
@@ -48,17 +53,16 @@ def get_url_content(url):
48
  return (url, ''.join([text for page in doc for text in page.get_text()]))
49
  else:
50
  soup = BeautifulSoup(response.content, 'html.parser')
51
-
52
- # Content containers. Here wordpress specific container css class name
53
- # used. This will be different for each website.
54
  content = soup.find_all('div', class_='wpb_content_element')
55
  text = [c.get_text().strip() for c in content if c.get_text().strip() != '']
56
  text = [line for item in text for line in item.split('\n') if line.strip() != '']
57
 
58
- # Post processing to exclude footer content.
59
- # This will be different for each website.
60
- arts_on = text.index('ARTS ON:')
61
- return (url, '\n'.join(text[:arts_on]))
 
 
62
 
63
 
64
  @st.cache_resource
@@ -121,12 +125,9 @@ def create_chain(_retriever):
121
  return qa_chain
122
 
123
 
124
- # Set the webpage title
125
- st.set_page_config(
126
- page_title="Your own AI-Chat!"
127
- )
128
 
129
- # Create a header element
 
130
  st.header("Your own AI-Chat!")
131
 
132
  # This sets the LLM's personality.
@@ -136,20 +137,27 @@ st.header("Your own AI-Chat!")
136
  # label="System Prompt",
137
  # value="You are a helpful AI assistant who answers questions in short sentences.",
138
  # key="system_prompt")
139
-
140
- if "base_url" not in st.session_state:
141
- st.session_state.base_url = ""
142
-
143
- base_url = st.text_input("Enter the site url here", key="base_url")
144
-
145
- if st.session_state.base_url != "":
146
- urls = get_page_urls(base_url)
147
-
148
- retriever = get_retriever(urls)
 
 
 
 
 
 
149
 
150
  # We store the conversation in the session state.
151
  # This will be used to render the chat conversation.
152
- # We initialize it with the first message we want to be greeted with.
 
153
  if "messages" not in st.session_state:
154
  st.session_state.messages = [
155
  {"role": "assistant", "content": "How may I help you today?"}
@@ -164,34 +172,36 @@ if st.session_state.base_url != "":
164
  with st.chat_message(message["role"]):
165
  st.markdown(message["content"])
166
 
167
- # We initialize the quantized LLM from a local path.
168
- # Currently most parameters are fixed but we can make them
169
- # configurable.
170
- llm_chain = create_chain(retriever)
171
-
172
- # We take questions/instructions from the chat input to pass to the LLM
173
- if user_prompt := st.chat_input("Your message here", key="user_input"):
174
-
175
- # Add our input to the session state
176
- st.session_state.messages.append(
177
- {"role": "user", "content": user_prompt}
178
- )
179
-
180
- # Add our input to the chat window
181
- with st.chat_message("user"):
182
- st.markdown(user_prompt)
183
-
184
- # Pass our input to the llm chain and capture the final responses.
185
- # It is worth noting that the Stream Handler is already receiving the
186
- # streaming response as the llm is generating. We get our response
187
- # here once the llm has finished generating the complete response.
188
- response = llm_chain.run(user_prompt)
189
-
190
- # Add the response to the session state
191
- st.session_state.messages.append(
192
- {"role": "assistant", "content": response}
193
- )
194
-
195
- # Add the response to the chat window
196
- with st.chat_message("assistant"):
197
- st.markdown(response)
 
 
 
35
  links.append(url)
36
  return set(links)
37
 
38
+ @st.cache(allow_output_mutation=True)
39
+ def process_pdf(file):
40
+ # Reads PDF from bytes, processes it, and returns extracted text
41
+ doc = fitz.open(stream=file)
42
+ texts = [page.get_text() for page in doc]
43
+ return '\n'.join(texts)
44
 
45
  def get_url_content(url):
46
  response = requests.get(url)
 
53
  return (url, ''.join([text for page in doc for text in page.get_text()]))
54
  else:
55
  soup = BeautifulSoup(response.content, 'html.parser')
 
 
 
56
  content = soup.find_all('div', class_='wpb_content_element')
57
  text = [c.get_text().strip() for c in content if c.get_text().strip() != '']
58
  text = [line for item in text for line in item.split('\n') if line.strip() != '']
59
 
60
+ # Post processing to exclude footer content, only if 'ARTS ON:' is present.
61
+ try:
62
+ arts_on_index = text.index('ARTS ON:')
63
+ return (url, '\n'.join(text[:arts_on_index]))
64
+ except ValueError:
65
+ return (url, '\n'.join(text)) # If 'ARTS ON:' not found, return full text
66
 
67
 
68
  @st.cache_resource
 
125
  return qa_chain
126
 
127
 
 
 
 
 
128
 
129
+ # Set the webpage title
130
+ st.set_page_config(page_title="Your own AI-Chat!")
131
  st.header("Your own AI-Chat!")
132
 
133
  # This sets the LLM's personality.
 
137
  # label="System Prompt",
138
  # value="You are a helpful AI assistant who answers questions in short sentences.",
139
  # key="system_prompt")
140
+ # Choose input method
141
+
142
+ input_type = st.radio("Choose an input method:", ['URL', 'Upload PDF'])
143
+
144
+ if input_type == 'URL':
145
+ base_url = st.text_input("Enter the site URL here:", key="base_url")
146
+ if base_url:
147
+ urls = get_page_urls(base_url)
148
+ retriever = get_retriever(urls)
149
+ elif input_type == 'Upload PDF':
150
+ uploaded_file = st.file_uploader("Upload your PDF here:", type="pdf")
151
+ if uploaded_file:
152
+ pdf_text = process_pdf(uploaded_file)
153
+ # Assume we process the PDF text into a format that can be used by your LLM
154
+ urls = [pdf_text] # This should be adjusted to match your system's needs
155
+ retriever = get_retriever(urls)
156
 
157
  # We store the conversation in the session state.
158
  # This will be used to render the chat conversation.
159
+ # We initialize it with the first message we want to be greeted with
160
+
161
  if "messages" not in st.session_state:
162
  st.session_state.messages = [
163
  {"role": "assistant", "content": "How may I help you today?"}
 
172
  with st.chat_message(message["role"]):
173
  st.markdown(message["content"])
174
 
175
+
176
+ if retriever:
177
+ # We initialize the quantized LLM from a local path.
178
+ # Currently most parameters are fixed but we can make them
179
+ # configurable.
180
+ llm_chain = create_chain(retriever)
181
+
182
+ # We take questions/instructions from the chat input to pass to the LLM
183
+ if user_prompt := st.chat_input("Your message here", key="user_input"):
184
+
185
+ # Add our input to the session state
186
+ st.session_state.messages.append(
187
+ {"role": "user", "content": user_prompt}
188
+ )
189
+
190
+ # Add our input to the chat window
191
+ with st.chat_message("user"):
192
+ st.markdown(user_prompt)
193
+
194
+ # Pass our input to the llm chain and capture the final responses.
195
+ # It is worth noting that the Stream Handler is already receiving the
196
+ # streaming response as the llm is generating. We get our response
197
+ # here once the llm has finished generating the complete response.
198
+ response = llm_chain.run(user_prompt)
199
+
200
+ # Add the response to the session state
201
+ st.session_state.messages.append(
202
+ {"role": "assistant", "content": response}
203
+ )
204
+
205
+ # Add the response to the chat window
206
+ with st.chat_message("assistant"):
207
+ st.markdown(response)