File size: 7,326 Bytes
2ad184d 46c5199 2ad184d 46c5199 2ad184d aa8e6f0 452bd06 aa8e6f0 46c5199 452bd06 2ad184d 452bd06 3a411d7 2ad184d 3a411d7 aa8e6f0 3a411d7 2ad184d 8ef7048 2ad184d 8ef7048 2ad184d 8ef7048 2ad184d e6beffa 2ad184d aa8e6f0 2ad184d aa8e6f0 da06dc3 aa8e6f0 da06dc3 452bd06 2ad184d aa8e6f0 da06dc3 aa8e6f0 da06dc3 4fcf54f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 |
import streamlit as st
from bs4 import BeautifulSoup
import io
import fitz
import requests
from langchain.llms import LlamaCpp
from langchain.callbacks.base import BaseCallbackHandler
from langchain.vectorstores import DocArrayInMemorySearch
from langchain.docstore.document import Document
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.text_splitter import RecursiveCharacterTextSplitter
# StreamHandler to intercept streaming output from the LLM.
# This makes it appear that the Language Model is "typing"
# in realtime.
class StreamHandler(BaseCallbackHandler):
def __init__(self, container, initial_text=""):
self.container = container
self.text = initial_text
def on_llm_new_token(self, token: str, **kwargs) -> None:
self.text += token
self.container.markdown(self.text)
@st.cache_data
def get_page_urls(url):
page = requests.get(url)
soup = BeautifulSoup(page.content, 'html.parser')
links = [link['href'] for link in soup.find_all('a') if 'href' in link.attrs and link['href'].startswith(url) and link['href'] not in [url]]
links.append(url)
return set(links)
@st.cache(allow_output_mutation=True)
def process_pdf(file):
# file is expected to be a BytesIO object directly from the file uploader
doc = fitz.open("pdf", file.read()) # "pdf" indicates file format is PDF, reading the BytesIO stream
texts = [page.get_text() for page in doc]
return '\n'.join(texts)
def get_url_content(url):
response = requests.get(url)
if url.endswith('.pdf'):
pdf = io.BytesIO(response.content)
doc = fitz.open(stream=pdf, filetype="pdf")
return (url, ''.join(page.get_text() for page in doc))
else:
soup = BeautifulSoup(response.content, 'html.parser')
content = soup.find_all('div', class_='wpb_content_element')
text = [c.get_text().strip() for c in content if c.get_text().strip() != '']
text = [line for item in text for line in item.split('\n') if line.strip() != '']
# Exclude footer content
try:
arts_on_index = text.index('ARTS ON:')
return (url, '\n'.join(text[:arts_on_index]))
except ValueError:
return (url, '\n'.join(text)) # Return full text if specific marker not found
@st.cache_resource
def get_retriever(urls):
all_content = [get_url_content(url) for url in urls]
print(all_content) # See what is actually fetched
documents = [Document(page_content=doc, metadata={'url': url}) for (url, doc) in all_content]
print(documents) # Verify that documents are created correctly
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=200)
docs = text_splitter.split_documents(documents)
print(docs) # Check the final structure of split documents
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
db = DocArrayInMemorySearch.from_documents(docs, embeddings)
retriever = db.as_retriever(search_type="mmr", search_kwargs={"k": 5, "fetch_k": 10})
return retriever
@st.cache_resource
def create_chain(_retriever):
# A stream handler to direct streaming output on the chat screen.
# This will need to be handled somewhat differently.
# But it demonstrates what potential it carries.
# stream_handler = StreamHandler(st.empty())
# Callback manager is a way to intercept streaming output from the
# LLM and take some action on it. Here we are giving it our custom
# stream handler to make it appear as if the LLM is typing the
# responses in real time.
# callback_manager = CallbackManager([stream_handler])
n_gpu_layers = 40 # Change this value based on your model and your GPU VRAM pool.
n_batch = 2048 # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
llm = LlamaCpp(
model_path="models /mistral-7b-instruct-v0.1.Q5_0.gguf",
n_gpu_layers=n_gpu_layers,
n_batch=n_batch,
n_ctx=2048,
# max_tokens=2048,
temperature=0,
# callback_manager=callback_manager,
verbose=False,
streaming=True,
)
# Template for the prompt.
# template = "{question}"
# We create a prompt from the template so we can use it with langchain
# prompt = PromptTemplate(template=template, input_variables=["question"])
# Setup memory for contextual conversation
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
# We create a qa chain with our llm, retriever, and memory
qa_chain = ConversationalRetrievalChain.from_llm(
llm, retriever=_retriever, memory=memory, verbose=False
)
return qa_chain
# Set the webpage title
st.set_page_config(page_title="Your own AI-Chat!")
st.header("Your own AI-Chat!")
# This sets the LLM's personality.
# The initial personality privided is basic.
# Try something interesting and notice how the LLM responses are affected.
# system_prompt = st.text_area(
# label="System Prompt",
# value="You are a helpful AI assistant who answers questions in short sentences.",
# key="system_prompt")
# Choose input method
input_type = st.radio("Choose an input method:", ['URL', 'Upload PDF'])
if input_type == 'URL':
base_url = st.text_input("Enter the site URL here:", key="base_url")
if base_url:
urls = get_page_urls(base_url)
retriever = get_retriever(urls)
llm_chain = create_chain(retriever)
elif input_type == 'Upload PDF':
uploaded_file = st.file_uploader("Upload your PDF here:", type="pdf")
if uploaded_file:
pdf_text = process_pdf(uploaded_file)
# Process the PDF text into a format that can be used by your LLM
urls = [pdf_text] # Adapt as needed for your system
retriever = get_retriever(urls) # Ensure your retriever can handle raw text; if not, adapt it.
llm_chain = create_chain(retriever)
# We store the conversation in the session state.
# This will be used to render the chat conversation.
# We initialize it with the first message we want to be greeted with
# Initialize chat session state for storing messages and responses
if "messages" not in st.session_state:
st.session_state.messages = [{"role": "assistant", "content": "How may I help you today?"}]
if "current_response" not in st.session_state:
st.session_state.current_response = ""
# Render the chat messages
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
# Input and response handling
if llm_chain and (user_prompt := st.chat_input("Your message here", key="user_input")):
# Add user input to the session state and chat window
st.session_state.messages.append({"role": "user", "content": user_prompt})
with st.chat_message("user"):
st.markdown(user_prompt)
# Generate and display the response using the LLM chain
response = llm_chain.run(user_prompt)
st.session_state.messages.append({"role": "assistant", "content": response})
with st.chat_message("assistant"):
st.markdown(response)
|