anasmkh's picture
Update app.py
c95116b verified
from langchain.chat_models import ChatOpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.retrievers import BM25Retriever,EnsembleRetriever
from langchain.vectorstores import Qdrant
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from qdrant_client import QdrantClient
from qdrant_client.http import models as qdrant_models
import os
from langchain.document_loaders import PyPDFLoader
import gradio as gr
from langchain_qdrant import QdrantVectorStore, FastEmbedSparse, RetrievalMode
openai_api_key = os.getenv('OPENAI_API_KEY')
os.environ["OPENAI_API_KEY"] = openai_api_key
pdf_folder_path = "files"
documents = []
for filename in os.listdir(pdf_folder_path):
if filename.endswith(".pdf"):
file_path = os.path.join(pdf_folder_path, filename)
loader = PyPDFLoader(file_path)
documents.extend(loader.load())
text_splitter = CharacterTextSplitter(chunk_size=128, chunk_overlap=5)
docs = text_splitter.split_documents(documents)
embeddings = OpenAIEmbeddings()
qdrant_client = QdrantClient(":memory:")
qdrant_client.create_collection(
collection_name="langchain_collection",
vectors_config=qdrant_models.VectorParams(size=3000, distance=qdrant_models.Distance.COSINE)
)
sparse_embeddings = FastEmbedSparse(model_name="Qdrant/bm25")
vector_store = QdrantVectorStore.from_documents(
docs,
embedding=embeddings,
sparse_embedding=sparse_embeddings,
location=":memory:",
collection_name="langchain_collection",
retrieval_mode=RetrievalMode.HYBRID,
)
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 3})
keyword_retriever = BM25Retriever.from_documents(docs)
keyword_retriever.k = 3
ensemble_retriever = EnsembleRetriever(retrievers=[retriever,keyword_retriever],
weights=[0.5, 0.5])
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.4)
conversational_chain = ConversationalRetrievalChain.from_llm(
llm=llm,
retriever=ensemble_retriever,
memory=memory,
verbose=True
)
def chat_with_ai(user_input, chat_history):
response = conversational_chain({"question": user_input})
chat_history.append((user_input, response['answer']))
return chat_history, ""
def clear_history():
return [], ""
def gradio_chatbot():
with gr.Blocks() as demo:
gr.Markdown("# Chat Interface for Langchain")
chatbot = gr.Chatbot(label="Langchain Chatbot")
user_input = gr.Textbox(
placeholder="Ask a question...", label="Enter your question"
)
submit_button = gr.Button("Send")
btn_clear = gr.Button("Delete Context")
chat_history = gr.State([])
submit_button.click(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])
user_input.submit(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])
btn_clear.click(fn=clear_history, outputs=[chatbot, user_input])
return demo
gradio_chatbot().launch(debug=True)