Mattral commited on
Commit
2ad184d
·
verified ·
1 Parent(s): 65d7365

Upload 2 files

Browse files
Files changed (2) hide show
  1. main.py +195 -0
  2. requirements.txt +8 -0
main.py ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from bs4 import BeautifulSoup
3
+ import io
4
+ import fitz
5
+ import requests
6
+ from langchain.llms import LlamaCpp
7
+ from langchain.callbacks.base import BaseCallbackHandler
8
+ from langchain.vectorstores import DocArrayInMemorySearch
9
+ from langchain.docstore.document import Document
10
+ from langchain.embeddings import HuggingFaceEmbeddings
11
+ from langchain.memory import ConversationBufferMemory
12
+ from langchain.chains import ConversationalRetrievalChain
13
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
14
+
15
+
16
+ # StreamHandler to intercept streaming output from the LLM.
17
+ # This makes it appear that the Language Model is "typing"
18
+ # in realtime.
19
+ class StreamHandler(BaseCallbackHandler):
20
+ def __init__(self, container, initial_text=""):
21
+ self.container = container
22
+ self.text = initial_text
23
+
24
+ def on_llm_new_token(self, token: str, **kwargs) -> None:
25
+ self.text += token
26
+ self.container.markdown(self.text)
27
+
28
+
29
+ @st.cache_data
30
+ def get_page_urls(url):
31
+ page = requests.get(url)
32
+ soup = BeautifulSoup(page.content, 'html.parser')
33
+ links = [link['href'] for link in soup.find_all('a') if link['href'].startswith(url) and link['href'] not in [url]]
34
+ links.append(url)
35
+ return set(links)
36
+
37
+
38
+ def get_url_content(url):
39
+ response = requests.get(url)
40
+ if url.endswith('.pdf'):
41
+ pdf = io.BytesIO(response.content)
42
+ file = open('pdf.pdf', 'wb')
43
+ file.write(pdf.read())
44
+ file.close()
45
+ doc = fitz.open('pdf.pdf')
46
+ return (url, ''.join([text for page in doc for text in page.get_text()]))
47
+ else:
48
+ soup = BeautifulSoup(response.content, 'html.parser')
49
+
50
+ # Content containers. Here wordpress specific container css class name
51
+ # used. This will be different for each website.
52
+ content = soup.find_all('div', class_='wpb_content_element')
53
+ text = [c.get_text().strip() for c in content if c.get_text().strip() != '']
54
+ text = [line for item in text for line in item.split('\n') if line.strip() != '']
55
+
56
+ # Post processing to exclude footer content.
57
+ # This will be different for each website.
58
+ arts_on = text.index('ARTS ON:')
59
+ return (url, '\n'.join(text[:arts_on]))
60
+
61
+
62
+ @st.cache_resource
63
+ def get_retriever(urls):
64
+ all_content = [get_url_content(url) for url in urls]
65
+ documents = [Document(page_content=doc, metadata={'url': url}) for (url, doc) in all_content]
66
+
67
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=200)
68
+ docs = text_splitter.split_documents(documents)
69
+
70
+ embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
71
+
72
+ db = DocArrayInMemorySearch.from_documents(docs, embeddings)
73
+ retriever = db.as_retriever(search_type="mmr", search_kwargs={"k": 5, "fetch_k": 10})
74
+ return retriever
75
+
76
+
77
+ @st.cache_resource
78
+ def create_chain(_retriever):
79
+ # A stream handler to direct streaming output on the chat screen.
80
+ # This will need to be handled somewhat differently.
81
+ # But it demonstrates what potential it carries.
82
+ # stream_handler = StreamHandler(st.empty())
83
+
84
+ # Callback manager is a way to intercept streaming output from the
85
+ # LLM and take some action on it. Here we are giving it our custom
86
+ # stream handler to make it appear as if the LLM is typing the
87
+ # responses in real time.
88
+ # callback_manager = CallbackManager([stream_handler])
89
+
90
+ n_gpu_layers = 40 # Change this value based on your model and your GPU VRAM pool.
91
+ n_batch = 2048 # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
92
+
93
+ llm = LlamaCpp(
94
+ model_path="models/mistral-7b-instruct-v0.1.Q5_0.gguf",
95
+ n_gpu_layers=n_gpu_layers,
96
+ n_batch=n_batch,
97
+ n_ctx=2048,
98
+ # max_tokens=2048,
99
+ temperature=0,
100
+ # callback_manager=callback_manager,
101
+ verbose=False,
102
+ streaming=True,
103
+ )
104
+
105
+ # Template for the prompt.
106
+ # template = "{question}"
107
+
108
+ # We create a prompt from the template so we can use it with langchain
109
+ # prompt = PromptTemplate(template=template, input_variables=["question"])
110
+
111
+ # Setup memory for contextual conversation
112
+ memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
113
+
114
+ # We create a qa chain with our llm, retriever, and memory
115
+ qa_chain = ConversationalRetrievalChain.from_llm(
116
+ llm, retriever=_retriever, memory=memory, verbose=False
117
+ )
118
+
119
+ return qa_chain
120
+
121
+
122
+ # Set the webpage title
123
+ st.set_page_config(
124
+ page_title="Your own AI-Chat!"
125
+ )
126
+
127
+ # Create a header element
128
+ st.header("Your own AI-Chat!")
129
+
130
+ # This sets the LLM's personality.
131
+ # The initial personality privided is basic.
132
+ # Try something interesting and notice how the LLM responses are affected.
133
+ # system_prompt = st.text_area(
134
+ # label="System Prompt",
135
+ # value="You are a helpful AI assistant who answers questions in short sentences.",
136
+ # key="system_prompt")
137
+
138
+ if "base_url" not in st.session_state:
139
+ st.session_state.base_url = ""
140
+
141
+ base_url = st.text_input("Enter the site url here", key="base_url")
142
+
143
+ if st.session_state.base_url != "":
144
+ urls = get_page_urls(base_url)
145
+
146
+ retriever = get_retriever(urls)
147
+
148
+ # We store the conversation in the session state.
149
+ # This will be used to render the chat conversation.
150
+ # We initialize it with the first message we want to be greeted with.
151
+ if "messages" not in st.session_state:
152
+ st.session_state.messages = [
153
+ {"role": "assistant", "content": "How may I help you today?"}
154
+ ]
155
+
156
+ if "current_response" not in st.session_state:
157
+ st.session_state.current_response = ""
158
+
159
+ # We loop through each message in the session state and render it as
160
+ # a chat message.
161
+ for message in st.session_state.messages:
162
+ with st.chat_message(message["role"]):
163
+ st.markdown(message["content"])
164
+
165
+ # We initialize the quantized LLM from a local path.
166
+ # Currently most parameters are fixed but we can make them
167
+ # configurable.
168
+ llm_chain = create_chain(retriever)
169
+
170
+ # We take questions/instructions from the chat input to pass to the LLM
171
+ if user_prompt := st.chat_input("Your message here", key="user_input"):
172
+
173
+ # Add our input to the session state
174
+ st.session_state.messages.append(
175
+ {"role": "user", "content": user_prompt}
176
+ )
177
+
178
+ # Add our input to the chat window
179
+ with st.chat_message("user"):
180
+ st.markdown(user_prompt)
181
+
182
+ # Pass our input to the llm chain and capture the final responses.
183
+ # It is worth noting that the Stream Handler is already receiving the
184
+ # streaming response as the llm is generating. We get our response
185
+ # here once the llm has finished generating the complete response.
186
+ response = llm_chain.run(user_prompt)
187
+
188
+ # Add the response to the session state
189
+ st.session_state.messages.append(
190
+ {"role": "assistant", "content": response}
191
+ )
192
+
193
+ # Add the response to the chat window
194
+ with st.chat_message("assistant"):
195
+ st.markdown(response)
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ beautifulsoup4==4.12.2
2
+ docarray==0.39.1
3
+ langchain==0.0.321
4
+ llama_cpp_python==0.2.11
5
+ pydantic==1.10.8
6
+ PyMuPDF==1.23.5
7
+ sentence-transformers==2.2.2
8
+ streamlit==1.27.2