Spaces:
Sleeping
Sleeping
File size: 5,012 Bytes
b4deae8 244c875 b4deae8 244c875 b4deae8 244c875 b4deae8 244c875 b4deae8 244c875 b4deae8 244c875 b4deae8 244c875 b4deae8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
from langchain.chains import RetrievalQA
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.callbacks.manager import CallbackManager
from langchain_community.llms import Ollama
from langchain_community.embeddings.ollama import OllamaEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory
import streamlit as st
import os
import time
if not os.path.exists('files'):
os.mkdir('files')
if not os.path.exists('jj'):
os.mkdir('jj')
if 'template' not in st.session_state:
st.session_state.template = """You are a knowledgeable chatbot, here to help with questions of the user. Your tone should be professional and informative.
Context: {context}
History: {history}
User: {question}
Chatbot:"""
if 'prompt' not in st.session_state:
st.session_state.prompt = PromptTemplate(
input_variables=["history", "context", "question"],
template=st.session_state.template,
)
if 'memory' not in st.session_state:
st.session_state.memory = ConversationBufferMemory(
memory_key="history",
return_messages=True,
input_key="question")
if 'vectorstore' not in st.session_state:
st.session_state.vectorstore = Chroma(persist_directory='jj',
embedding_function=OllamaEmbeddings(base_url='http://localhost:11434',
model="mistral")
)
if 'llm' not in st.session_state:
st.session_state.llm = Ollama(base_url="http://localhost:11434",
model="mistral",
verbose=True,
callback_manager=CallbackManager(
[StreamingStdOutCallbackHandler()]),
)
# Initialize session state
if 'chat_history' not in st.session_state:
st.session_state.chat_history = []
st.title("PDF Chatbot")
# Upload a PDF file
uploaded_file = st.file_uploader("Upload your PDF", type='pdf')
for message in st.session_state.chat_history:
with st.chat_message(message["role"]):
st.markdown(message["message"])
if uploaded_file is not None:
if not os.path.isfile("files/"+uploaded_file.name+".pdf"):
with st.status("Analyzing your document..."):
bytes_data = uploaded_file.read()
f = open("files/"+uploaded_file.name+".pdf", "wb")
f.write(bytes_data)
f.close()
loader = PyPDFLoader("files/"+uploaded_file.name+".pdf")
data = loader.load()
# Initialize text splitter
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1500,
chunk_overlap=200,
length_function=len
)
all_splits = text_splitter.split_documents(data)
# Create and persist the vector store
st.session_state.vectorstore = Chroma.from_documents(
documents=all_splits,
embedding=OllamaEmbeddings(model="mistral")
)
st.session_state.vectorstore.persist()
st.session_state.retriever = st.session_state.vectorstore.as_retriever()
# Initialize the QA chain
if 'qa_chain' not in st.session_state:
st.session_state.qa_chain = RetrievalQA.from_chain_type(
llm=st.session_state.llm,
chain_type='stuff',
retriever=st.session_state.retriever,
verbose=True,
chain_type_kwargs={
"verbose": True,
"prompt": st.session_state.prompt,
"memory": st.session_state.memory,
}
)
# Chat input
if user_input := st.chat_input("You:", key="user_input"):
user_message = {"role": "user", "message": user_input}
st.session_state.chat_history.append(user_message)
with st.chat_message("user"):
st.markdown(user_input)
with st.chat_message("assistant"):
with st.spinner("Assistant is typing..."):
response = st.session_state.qa_chain(user_input)
message_placeholder = st.empty()
full_response = ""
for chunk in response['result'].split():
full_response += chunk + " "
time.sleep(0.05)
# Add a blinking cursor to simulate typing
message_placeholder.markdown(full_response + "▌")
message_placeholder.markdown(full_response)
chatbot_message = {"role": "assistant", "message": response['result']}
st.session_state.chat_history.append(chatbot_message)
else:
st.write("Please upload a PDF file.") |