Mattral commited on
Commit
4adbe51
·
verified ·
1 Parent(s): c85cac1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -194
app.py CHANGED
@@ -1,212 +1,57 @@
1
  import streamlit as st
2
- from bs4 import BeautifulSoup
3
- import io
4
- import fitz # PyMuPDF
5
- import requests
6
  from langchain.llms import LlamaCpp
7
- from langchain.callbacks.base import BaseCallbackHandler
8
- from langchain.vectorstores import DocArrayInMemorySearch
9
- from langchain.docstore.document import Document
10
- from langchain.embeddings import HuggingFaceEmbeddings
11
  from langchain.memory import ConversationBufferMemory
12
- from langchain.chains import ConversationalRetrievalChain
13
- from langchain.text_splitter import RecursiveCharacterTextSplitter
14
- from docarray import Document, DocumentArray
15
- from sentence_transformers import SentenceTransformer
16
 
17
- # StreamHandler to intercept streaming output from the LLM.
18
- # This makes it appear that the Language Model is "typing"
19
- # in realtime.
20
- class StreamHandler(BaseCallbackHandler):
21
- def __init__(self, container, initial_text=""):
22
- self.container = container
23
- self.text = initial_text
24
 
25
- def on_llm_new_token(self, token: str, **kwargs) -> None:
26
- self.text += token
27
- self.container.markdown(self.text)
28
-
29
- from langchain_core import BaseRetriever
30
-
31
- class SimpleEmbeddingRetriever(BaseRetriever):
32
- def __init__(self, documents):
33
- self.documents = documents
34
-
35
- def _get_relevant_documents(self, query: str, num_documents: int = 5):
36
- query_doc = Document(text=query)
37
- query_embedding = self.documents.embeddings.model.encode([query_doc.text])[0]
38
- query_doc.embedding = query_embedding
39
- scores = self.documents.match(query_doc, limit=num_documents, metric='cosine', use_scipy=True)
40
- return [(doc.text, score) for doc, score in scores]
41
-
42
-
43
- @st.cache_data
44
- def get_page_urls(url):
45
- try:
46
- page = requests.get(url)
47
- soup = BeautifulSoup(page.content, 'html.parser')
48
- links = [link['href'] for link in soup.find_all('a') if 'href' in link.attrs and link['href'].startswith(url) and link['href'] not in [url]]
49
- links.append(url)
50
- return set(links)
51
- except requests.RequestException as e:
52
- st.error(f"Failed to load page: {e}")
53
- return set()
54
-
55
-
56
- def get_url_content(url):
57
- try:
58
- response = requests.get(url)
59
- response.raise_for_status()
60
- if url.endswith('.pdf'):
61
- pdf = io.BytesIO(response.content)
62
- doc = fitz.open(stream=pdf, filetype="pdf")
63
- text = ''.join([page.get_text("text") for page in doc])
64
- else:
65
- soup = BeautifulSoup(response.content, 'html.parser')
66
- content = soup.find_all('div', class_='wpb_content_element')
67
- text = ' '.join([c.get_text().strip() for c in content if c.get_text().strip() != ''])
68
-
69
- # Create a single document with metadata
70
- document = Document(text=text, tags={'url': url})
71
- return DocumentArray([document])
72
- except Exception as e:
73
- st.error(f"Failed to process URL content: {e}")
74
- return DocumentArray()
75
-
76
-
77
- @st.cache_resource
78
- def get_retriever(urls):
79
- documents = DocumentArray()
80
- for url in urls:
81
- content = get_url_content(url)
82
- if content:
83
- documents.extend(content)
84
-
85
- model = SentenceTransformer('all-MiniLM-L6-v2')
86
- embeddings = model.encode([doc.text for doc in documents], show_progress_bar=True)
87
- for doc, emb in zip(documents, embeddings):
88
- doc.embedding = emb
89
-
90
- return SimpleEmbeddingRetriever(documents)
91
-
92
-
93
- @st.cache_resource
94
- def create_chain(_retriever):
95
- # A stream handler to direct streaming output on the chat screen.
96
- # This will need to be handled somewhat differently.
97
- # But it demonstrates what potential it carries.
98
- # stream_handler = StreamHandler(st.empty())
99
-
100
- # Callback manager is a way to intercept streaming output from the
101
- # LLM and take some action on it. Here we are giving it our custom
102
- # stream handler to make it appear as if the LLM is typing the
103
- # responses in real time.
104
- # callback_manager = CallbackManager([stream_handler])
105
-
106
- n_gpu_layers = 5 # Change this value based on your model and your GPU VRAM pool.
107
- n_batch = 512 # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
108
 
109
  llm = LlamaCpp(
110
- model_path="models /mistral-7b-instruct-v0.1.Q5_0.gguf",
111
- n_gpu_layers=n_gpu_layers,
112
- n_batch=n_batch,
113
- n_ctx=2048,
114
- # max_tokens=2048,
115
- temperature=0,
116
- # callback_manager=callback_manager,
117
- verbose=False,
118
- streaming=True,
119
- )
120
-
121
- # Template for the prompt.
122
- # template = "{question}"
123
-
124
- # We create a prompt from the template so we can use it with langchain
125
- # prompt = PromptTemplate(template=template, input_variables=["question"])
126
 
127
  # Setup memory for contextual conversation
128
  memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
129
 
130
- # We create a qa chain with our llm, retriever, and memory
131
- qa_chain = ConversationalRetrievalChain.from_llm(
132
- llm, retriever=_retriever, memory=memory, verbose=False
133
- )
134
-
135
- return qa_chain
136
-
137
-
138
- # Set the webpage title
139
- st.set_page_config(
140
- page_title="Your own AI-Chat!"
141
- )
142
-
143
- # Create a header element
144
- st.header("Your own AI-Chat!")
145
-
146
- # This sets the LLM's personality.
147
- # The initial personality privided is basic.
148
- # Try something interesting and notice how the LLM responses are affected.
149
- # system_prompt = st.text_area(
150
- # label="System Prompt",
151
- # value="You are a helpful AI assistant who answers questions in short sentences.",
152
- # key="system_prompt")
153
-
154
- if "base_url" not in st.session_state:
155
- st.session_state.base_url = ""
156
-
157
- base_url = st.text_input("Enter the site url here", key="base_url")
158
-
159
- if st.session_state.base_url != "":
160
- urls = get_page_urls(base_url)
161
-
162
- retriever = get_retriever(urls)
163
-
164
- # We store the conversation in the session state.
165
- # This will be used to render the chat conversation.
166
- # We initialize it with the first message we want to be greeted with.
167
- if "messages" not in st.session_state:
168
- st.session_state.messages = [
169
- {"role": "assistant", "content": "How may I help you today?"}
170
- ]
171
-
172
- if "current_response" not in st.session_state:
173
- st.session_state.current_response = ""
174
-
175
- # We loop through each message in the session state and render it as
176
- # a chat message.
177
- for message in st.session_state.messages:
178
- with st.chat_message(message["role"]):
179
- st.markdown(message["content"])
180
-
181
- # We initialize the quantized LLM from a local path.
182
- # Currently most parameters are fixed but we can make them
183
- # configurable.
184
- llm_chain = create_chain(retriever)
185
 
186
- # We take questions/instructions from the chat input to pass to the LLM
187
- if user_prompt := st.chat_input("Your message here", key="user_input"):
188
 
189
- # Add our input to the session state
190
- st.session_state.messages.append(
191
- {"role": "user", "content": user_prompt}
192
- )
193
 
194
- # Add our input to the chat window
195
- with st.chat_message("user"):
196
- st.markdown(user_prompt)
 
197
 
198
- # Pass our input to the llm chain and capture the final responses.
199
- # It is worth noting that the Stream Handler is already receiving the
200
- # streaming response as the llm is generating. We get our response
201
- # here once the llm has finished generating the complete response.
202
- response = llm_chain.run(user_prompt)
203
 
204
- # Add the response to the session state
205
- st.session_state.messages.append(
206
- {"role": "assistant", "content": response}
207
- )
208
 
209
- # Add the response to the chat window
210
- with st.chat_message("assistant"):
211
- st.markdown(response)
212
 
 
 
 
 
1
  import streamlit as st
 
 
 
 
2
  from langchain.llms import LlamaCpp
 
 
 
 
3
  from langchain.memory import ConversationBufferMemory
4
+ from langchain.chains import ConversationalChain
 
 
 
5
 
6
+ # Streamlit page configuration
7
+ st.set_page_config(page_title="Simple AI Chatbot")
8
+ st.header("Simple AI Chatbot")
 
 
 
 
9
 
10
+ # Initialize the Language Model Chain
11
+ @st.experimental_singleton
12
+ def initialize_chain():
13
+ n_gpu_layers = 40
14
+ n_batch = 2048
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  llm = LlamaCpp(
17
+ model_path="models/mistral-7b-instruct-v0.1.Q5_0.gguf",
18
+ n_gpu_layers=n_gpu_layers,
19
+ n_batch=n_batch,
20
+ n_ctx=2048,
21
+ temperature=0,
22
+ verbose=False,
23
+ streaming=True,
24
+ )
 
 
 
 
 
 
 
 
25
 
26
  # Setup memory for contextual conversation
27
  memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
28
 
29
+ # Initialize the conversational chain
30
+ chat_chain = ConversationalChain(llm=llm, memory=memory, verbose=False)
31
+ return chat_chain
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
+ llm_chain = initialize_chain()
 
34
 
35
+ if "messages" not in st.session_state:
36
+ st.session_state.messages = [{"role": "assistant", "content": "Hello! How can I assist you today?"}]
 
 
37
 
38
+ # Display conversation messages
39
+ for message in st.session_state.messages:
40
+ with st.chat_message(message["role"]):
41
+ st.markdown(message["content"])
42
 
43
+ # Handling user input
44
+ user_input = st.chat_input("Type your message...", key="user_input")
45
+ if user_input:
46
+ # Append user message to the conversation
47
+ st.session_state.messages.append({"role": "user", "content": user_input})
48
 
49
+ # Get response from the LLM
50
+ response = llm_chain.run(user_input)
 
 
51
 
52
+ # Append LLM response to the conversation
53
+ st.session_state.messages.append({"role": "assistant", "content": response})
 
54
 
55
+ # Update chat window with the assistant's response
56
+ with st.chat_message("assistant"):
57
+ st.markdown(response)