Spaces:

Mattral
/

RAG-bot

Sleeping

App Files Files Community

Mattral commited on May 13, 2024

Commit

4adbe51

verified ·

1 Parent(s): c85cac1

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -194

app.py CHANGED Viewed

@@ -1,212 +1,57 @@
 import streamlit as st
-from bs4 import BeautifulSoup
-import io
-import fitz # PyMuPDF
-import requests
 from langchain.llms import LlamaCpp
-from langchain.callbacks.base import BaseCallbackHandler
-from langchain.vectorstores import DocArrayInMemorySearch
-from langchain.docstore.document import Document
-from langchain.embeddings import HuggingFaceEmbeddings
 from langchain.memory import ConversationBufferMemory
-from langchain.chains import ConversationalRetrievalChain
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-from docarray import Document, DocumentArray
-from sentence_transformers import SentenceTransformer
-# StreamHandler to intercept streaming output from the LLM.
-# This makes it appear that the Language Model is "typing"
-# in realtime.
-class StreamHandler(BaseCallbackHandler):
-    def __init__(self, container, initial_text=""):
-        self.container = container
-        self.text = initial_text
-    def on_llm_new_token(self, token: str, **kwargs) -> None:
-        self.text += token
-        self.container.markdown(self.text)
-from langchain_core import BaseRetriever
-class SimpleEmbeddingRetriever(BaseRetriever):
-    def __init__(self, documents):
-        self.documents = documents
-    def _get_relevant_documents(self, query: str, num_documents: int = 5):
-        query_doc = Document(text=query)
-        query_embedding = self.documents.embeddings.model.encode([query_doc.text])[0]
-        query_doc.embedding = query_embedding
-        scores = self.documents.match(query_doc, limit=num_documents, metric='cosine', use_scipy=True)
-        return [(doc.text, score) for doc, score in scores]
-@st.cache_data
-def get_page_urls(url):
-    try:
-        page = requests.get(url)
-        soup = BeautifulSoup(page.content, 'html.parser')
-        links = [link['href'] for link in soup.find_all('a') if 'href' in link.attrs and link['href'].startswith(url) and link['href'] not in [url]]
-        links.append(url)
-        return set(links)
-    except requests.RequestException as e:
-        st.error(f"Failed to load page: {e}")
-        return set()
-def get_url_content(url):
-    try:
-        response = requests.get(url)
-        response.raise_for_status()
-        if url.endswith('.pdf'):
-            pdf = io.BytesIO(response.content)
-            doc = fitz.open(stream=pdf, filetype="pdf")
-            text = ''.join([page.get_text("text") for page in doc])
-        else:
-            soup = BeautifulSoup(response.content, 'html.parser')
-            content = soup.find_all('div', class_='wpb_content_element')
-            text = ' '.join([c.get_text().strip() for c in content if c.get_text().strip() != ''])
-        # Create a single document with metadata
-        document = Document(text=text, tags={'url': url})
-        return DocumentArray([document])
-    except Exception as e:
-        st.error(f"Failed to process URL content: {e}")
-        return DocumentArray()
-@st.cache_resource
-def get_retriever(urls):
-    documents = DocumentArray()
-    for url in urls:
-        content = get_url_content(url)
-        if content:
-            documents.extend(content)
-    model = SentenceTransformer('all-MiniLM-L6-v2')
-    embeddings = model.encode([doc.text for doc in documents], show_progress_bar=True)
-    for doc, emb in zip(documents, embeddings):
-        doc.embedding = emb
-    return SimpleEmbeddingRetriever(documents)
-@st.cache_resource
-def create_chain(_retriever):
-    # A stream handler to direct streaming output on the chat screen.
-    # This will need to be handled somewhat differently.
-    # But it demonstrates what potential it carries.
-    # stream_handler = StreamHandler(st.empty())
-    # Callback manager is a way to intercept streaming output from the
-    # LLM and take some action on it. Here we are giving it our custom
-    # stream handler to make it appear as if the LLM is typing the
-    # responses in real time.
-    # callback_manager = CallbackManager([stream_handler])
-    n_gpu_layers = 5  # Change this value based on your model and your GPU VRAM pool.
-    n_batch = 512  # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
     llm = LlamaCpp(
-            model_path="models /mistral-7b-instruct-v0.1.Q5_0.gguf",
-            n_gpu_layers=n_gpu_layers,
-            n_batch=n_batch,
-            n_ctx=2048,
-            # max_tokens=2048,
-            temperature=0,
-            # callback_manager=callback_manager,
-            verbose=False,
-            streaming=True,
-            )
-    # Template for the prompt.
-    # template = "{question}"
-    # We create a prompt from the template so we can use it with langchain
-    # prompt = PromptTemplate(template=template, input_variables=["question"])
     # Setup memory for contextual conversation
     memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
-    # We create a qa chain with our llm, retriever, and memory
-    qa_chain = ConversationalRetrievalChain.from_llm(
-        llm, retriever=_retriever, memory=memory, verbose=False
-    )
-    return qa_chain
-# Set the webpage title
-st.set_page_config(
-    page_title="Your own AI-Chat!"
-)
-# Create a header element
-st.header("Your own AI-Chat!")
-# This sets the LLM's personality.
-# The initial personality privided is basic.
-# Try something interesting and notice how the LLM responses are affected.
-# system_prompt = st.text_area(
-#    label="System Prompt",
-#    value="You are a helpful AI assistant who answers questions in short sentences.",
-#    key="system_prompt")
-if "base_url" not in st.session_state:
-    st.session_state.base_url = ""
-base_url = st.text_input("Enter the site url here", key="base_url")
-if st.session_state.base_url != "":
-    urls = get_page_urls(base_url)
-    retriever = get_retriever(urls)
-    # We store the conversation in the session state.
-    # This will be used to render the chat conversation.
-    # We initialize it with the first message we want to be greeted with.
-    if "messages" not in st.session_state:
-        st.session_state.messages = [
-            {"role": "assistant", "content": "How may I help you today?"}
-        ]
-    if "current_response" not in st.session_state:
-        st.session_state.current_response = ""
-    # We loop through each message in the session state and render it as
-    # a chat message.
-    for message in st.session_state.messages:
-        with st.chat_message(message["role"]):
-            st.markdown(message["content"])
-    # We initialize the quantized LLM from a local path.
-    # Currently most parameters are fixed but we can make them
-    # configurable.
-    llm_chain = create_chain(retriever)
-    # We take questions/instructions from the chat input to pass to the LLM
-    if user_prompt := st.chat_input("Your message here", key="user_input"):
-        # Add our input to the session state
-        st.session_state.messages.append(
-            {"role": "user", "content": user_prompt}
-        )
-        # Add our input to the chat window
-        with st.chat_message("user"):
-            st.markdown(user_prompt)
-        # Pass our input to the llm chain and capture the final responses.
-        # It is worth noting that the Stream Handler is already receiving the
-        # streaming response as the llm is generating. We get our response
-        # here once the llm has finished generating the complete response.
-        response = llm_chain.run(user_prompt)
-        # Add the response to the session state
-        st.session_state.messages.append(
-            {"role": "assistant", "content": response}
-        )
-        # Add the response to the chat window
-        with st.chat_message("assistant"):
-            st.markdown(response)

 import streamlit as st
 from langchain.llms import LlamaCpp
 from langchain.memory import ConversationBufferMemory
+from langchain.chains import ConversationalChain
+# Streamlit page configuration
+st.set_page_config(page_title="Simple AI Chatbot")
+st.header("Simple AI Chatbot")
+# Initialize the Language Model Chain
+@st.experimental_singleton
+def initialize_chain():
+    n_gpu_layers = 40
+    n_batch = 2048
     llm = LlamaCpp(
+        model_path="models/mistral-7b-instruct-v0.1.Q5_0.gguf",
+        n_gpu_layers=n_gpu_layers,
+        n_batch=n_batch,
+        n_ctx=2048,
+        temperature=0,
+        verbose=False,
+        streaming=True,
+    )
     # Setup memory for contextual conversation
     memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
+    # Initialize the conversational chain
+    chat_chain = ConversationalChain(llm=llm, memory=memory, verbose=False)
+    return chat_chain
+llm_chain = initialize_chain()
+if "messages" not in st.session_state:
+    st.session_state.messages = [{"role": "assistant", "content": "Hello! How can I assist you today?"}]
+# Display conversation messages
+for message in st.session_state.messages:
+    with st.chat_message(message["role"]):
+        st.markdown(message["content"])
+# Handling user input
+user_input = st.chat_input("Type your message...", key="user_input")
+if user_input:
+    # Append user message to the conversation
+    st.session_state.messages.append({"role": "user", "content": user_input})
+    # Get response from the LLM
+    response = llm_chain.run(user_input)
+    # Append LLM response to the conversation
+    st.session_state.messages.append({"role": "assistant", "content": response})
+    # Update chat window with the assistant's response
+    with st.chat_message("assistant"):
+        st.markdown(response)