File size: 8,020 Bytes
2ad184d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46c5199
2ad184d
 
 
46c5199
2ad184d
 
 
aa8e6f0
 
452bd06
 
aa8e6f0
 
46c5199
452bd06
2ad184d
 
 
 
 
 
 
452bd06
2ad184d
 
 
 
 
 
 
aa8e6f0
 
 
 
 
 
2ad184d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e6beffa
2ad184d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aa8e6f0
 
2ad184d
 
 
 
 
 
 
 
 
aa8e6f0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
452bd06
 
2ad184d
 
 
aa8e6f0
 
2ad184d
 
 
 
 
 
 
 
 
 
 
 
 
 
aa8e6f0
74bf15b
 
 
 
aa8e6f0
74bf15b
 
aa8e6f0
74bf15b
 
 
 
aa8e6f0
74bf15b
 
 
aa8e6f0
74bf15b
 
 
 
 
aa8e6f0
74bf15b
 
 
 
aa8e6f0
74bf15b
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
import streamlit as st
from bs4 import BeautifulSoup
import io
import fitz
import requests
from langchain.llms import LlamaCpp
from langchain.callbacks.base import BaseCallbackHandler
from langchain.vectorstores import DocArrayInMemorySearch
from langchain.docstore.document import Document
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.text_splitter import RecursiveCharacterTextSplitter


# StreamHandler to intercept streaming output from the LLM.
# This makes it appear that the Language Model is "typing"
# in realtime.
class StreamHandler(BaseCallbackHandler):
    def __init__(self, container, initial_text=""):
        self.container = container
        self.text = initial_text

    def on_llm_new_token(self, token: str, **kwargs) -> None:
        self.text += token
        self.container.markdown(self.text)


@st.cache_data

def get_page_urls(url):
    page = requests.get(url)
    soup = BeautifulSoup(page.content, 'html.parser')
    links = [link['href'] for link in soup.find_all('a') if 'href' in link.attrs and link['href'].startswith(url) and link['href'] not in [url]]
    links.append(url)
    return set(links)

@st.cache(allow_output_mutation=True)
def process_pdf(file):
    # file is expected to be a BytesIO object directly from the file uploader
    doc = fitz.open("pdf", file.read())  # "pdf" indicates file format is PDF, reading the BytesIO stream
    texts = [page.get_text() for page in doc]
    return '\n'.join(texts)


def get_url_content(url):
    response = requests.get(url)
    if url.endswith('.pdf'):
        pdf = io.BytesIO(response.content)
        file = open('pdf.pdf', 'wb')
        file.write(pdf.read())
        file.close()
        doc = fitz.open(stream=pdf, filetype="pdf")
        return (url, ''.join([text for page in doc for text in page.get_text()]))
    else:
        soup = BeautifulSoup(response.content, 'html.parser')
        content = soup.find_all('div', class_='wpb_content_element')
        text = [c.get_text().strip() for c in content if c.get_text().strip() != '']
        text = [line for item in text for line in item.split('\n') if line.strip() != '']

        # Post processing to exclude footer content, only if 'ARTS ON:' is present.
        try:
            arts_on_index = text.index('ARTS ON:')
            return (url, '\n'.join(text[:arts_on_index]))
        except ValueError:
            return (url, '\n'.join(text))  # If 'ARTS ON:' not found, return full text


@st.cache_resource
def get_retriever(urls):
    all_content = [get_url_content(url) for url in urls]
    documents = [Document(page_content=doc, metadata={'url': url}) for (url, doc) in all_content]

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=200)
    docs = text_splitter.split_documents(documents)

    embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

    db = DocArrayInMemorySearch.from_documents(docs, embeddings)
    retriever = db.as_retriever(search_type="mmr", search_kwargs={"k": 5, "fetch_k": 10})
    return retriever


@st.cache_resource
def create_chain(_retriever):
    # A stream handler to direct streaming output on the chat screen.
    # This will need to be handled somewhat differently.
    # But it demonstrates what potential it carries.
    # stream_handler = StreamHandler(st.empty())

    # Callback manager is a way to intercept streaming output from the
    # LLM and take some action on it. Here we are giving it our custom
    # stream handler to make it appear as if the LLM is typing the
    # responses in real time.
    # callback_manager = CallbackManager([stream_handler])

    n_gpu_layers = 40  # Change this value based on your model and your GPU VRAM pool.
    n_batch = 2048  # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.

    llm = LlamaCpp(
            model_path="models /mistral-7b-instruct-v0.1.Q5_0.gguf",
            n_gpu_layers=n_gpu_layers,
            n_batch=n_batch,
            n_ctx=2048,
            # max_tokens=2048,
            temperature=0,
            # callback_manager=callback_manager,
            verbose=False,
            streaming=True,
            )

    # Template for the prompt.
    # template = "{question}"

    # We create a prompt from the template so we can use it with langchain
    # prompt = PromptTemplate(template=template, input_variables=["question"])

    # Setup memory for contextual conversation
    memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

    # We create a qa chain with our llm, retriever, and memory
    qa_chain = ConversationalRetrievalChain.from_llm(
        llm, retriever=_retriever, memory=memory, verbose=False
    )

    return qa_chain



# Set the webpage title
st.set_page_config(page_title="Your own AI-Chat!")
st.header("Your own AI-Chat!")

# This sets the LLM's personality.
# The initial personality privided is basic.
# Try something interesting and notice how the LLM responses are affected.
# system_prompt = st.text_area(
#    label="System Prompt",
#    value="You are a helpful AI assistant who answers questions in short sentences.",
#    key="system_prompt")
# Choose input method

input_type = st.radio("Choose an input method:", ['URL', 'Upload PDF'])

if input_type == 'URL':
    base_url = st.text_input("Enter the site URL here:", key="base_url")
    if base_url:
        urls = get_page_urls(base_url)
        retriever = get_retriever(urls)
elif input_type == 'Upload PDF':
    uploaded_file = st.file_uploader("Upload your PDF here:", type="pdf")
    if uploaded_file:
        pdf_text = process_pdf(uploaded_file)
        # Assume we process the PDF text into a format that can be used by your LLM
        urls = [pdf_text]  # This should be adjusted to match your system's needs
        retriever = get_retriever(urls)  # Make sure your retriever can handle raw text if not, adapt it.
        llm_chain = create_chain(retriever)

    # We store the conversation in the session state.
    # This will be used to render the chat conversation.
    # We initialize it with the first message we want to be greeted with
    
    if "messages" not in st.session_state:
        st.session_state.messages = [
            {"role": "assistant", "content": "How may I help you today?"}
        ]

    if "current_response" not in st.session_state:
        st.session_state.current_response = ""

    # We loop through each message in the session state and render it as
    # a chat message.
    for message in st.session_state.messages:
        with st.chat_message(message["role"]):
            st.markdown(message["content"])

    
    # We initialize the quantized LLM from a local path.
    # Currently most parameters are fixed but we can make them
    # configurable.
    llm_chain = create_chain(retriever)
    
    # We take questions/instructions from the chat input to pass to the LLM
    if user_prompt := st.chat_input("Your message here", key="user_input"):
    
        # Add our input to the session state
        st.session_state.messages.append(
            {"role": "user", "content": user_prompt}
        )
    
        # Add our input to the chat window
        with st.chat_message("user"):
            st.markdown(user_prompt)
    
        # Pass our input to the llm chain and capture the final responses.
        # It is worth noting that the Stream Handler is already receiving the
        # streaming response as the llm is generating. We get our response
        # here once the llm has finished generating the complete response.
        response = llm_chain.run(user_prompt)
    
        # Add the response to the session state
        st.session_state.messages.append(
            {"role": "assistant", "content": response}
        )
    
        # Add the response to the chat window
        with st.chat_message("assistant"):
            st.markdown(response)