Spaces:

anasmkh
/

Ensemble_retriever_with_memory

Runtime error

App Files Files Community

anasmkh commited on Oct 3, 2024

Commit

9890c92

verified ·

1 Parent(s): 3d24a6d

Update app.py

Browse files

Files changed (1) hide show

app.py +2 -15

app.py CHANGED Viewed

@@ -12,11 +12,9 @@ from langchain.document_loaders import PyPDFLoader
 import gradio as gr
 from langchain_qdrant import QdrantVectorStore, FastEmbedSparse, RetrievalMode
-# Set OpenAI API Key
 openai_api_key = os.getenv('OPENAI_API_KEY')
 os.environ["OPENAI_API_KEY"] = openai_api_key
-# Load PDF documents
 pdf_folder_path = "files"
 documents = []
 for filename in os.listdir(pdf_folder_path):
@@ -25,24 +23,19 @@ for filename in os.listdir(pdf_folder_path):
         loader = PyPDFLoader(file_path)
         documents.extend(loader.load())
-# Split the documents into manageable chunks
 text_splitter = CharacterTextSplitter(chunk_size=128, chunk_overlap=5)
 docs = text_splitter.split_documents(documents)
-# Initialize embeddings and Qdrant client
 embeddings = OpenAIEmbeddings()
 qdrant_client = QdrantClient(":memory:")
-# Recreate Qdrant collection
 qdrant_client.create_collection(
     collection_name="langchain_collection",
     vectors_config=qdrant_models.VectorParams(size=3000, distance=qdrant_models.Distance.COSINE)
 )
-# Set up the sparse embeddings for hybrid retrieval
 sparse_embeddings = FastEmbedSparse(model_name="Qdrant/bm25")
-# Initialize the vector store with hybrid retrieval mode
 vector_store = QdrantVectorStore.from_documents(
     docs,
     embedding=embeddings,
@@ -52,10 +45,8 @@ vector_store = QdrantVectorStore.from_documents(
     retrieval_mode=RetrievalMode.HYBRID,
 )
-# Set up conversational memory
 memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
-# Set up the retriever
 retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 3})
 keyword_retriever = BM25Retriever.from_documents(docs)
@@ -65,10 +56,9 @@ ensemble_retriever = EnsembleRetriever(retrievers=[retriever,keyword_retriever],
 llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.3)
-# Set up the conversational retrieval chain with memory
 conversational_chain = ConversationalRetrievalChain.from_llm(
     llm=llm,
-    retriever=ensemble_retriever,
     memory=memory,
     verbose=True
 )
@@ -81,7 +71,6 @@ def chat_with_ai(user_input, chat_history):
     return chat_history, ""
-# Gradio interface
 def gradio_chatbot():
     with gr.Blocks() as demo:
         gr.Markdown("# Chat Interface for Langchain")
@@ -95,11 +84,9 @@ def gradio_chatbot():
         chat_history = gr.State([])
-        # Bind button and textbox to chat function
         submit_button.click(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])
         user_input.submit(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])
     return demo
-# Launch Gradio interface
-gradio_chatbot().launch(debug=True)

 import gradio as gr
 from langchain_qdrant import QdrantVectorStore, FastEmbedSparse, RetrievalMode
 openai_api_key = os.getenv('OPENAI_API_KEY')
 os.environ["OPENAI_API_KEY"] = openai_api_key
 pdf_folder_path = "files"
 documents = []
 for filename in os.listdir(pdf_folder_path):
         loader = PyPDFLoader(file_path)
         documents.extend(loader.load())
 text_splitter = CharacterTextSplitter(chunk_size=128, chunk_overlap=5)
 docs = text_splitter.split_documents(documents)
 embeddings = OpenAIEmbeddings()
 qdrant_client = QdrantClient(":memory:")
 qdrant_client.create_collection(
     collection_name="langchain_collection",
     vectors_config=qdrant_models.VectorParams(size=3000, distance=qdrant_models.Distance.COSINE)
 )
 sparse_embeddings = FastEmbedSparse(model_name="Qdrant/bm25")
 vector_store = QdrantVectorStore.from_documents(
     docs,
     embedding=embeddings,
     retrieval_mode=RetrievalMode.HYBRID,
 )
 memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
 retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 3})
 keyword_retriever = BM25Retriever.from_documents(docs)
 llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.3)
 conversational_chain = ConversationalRetrievalChain.from_llm(
     llm=llm,
+    retriever=keyword_retriever,
     memory=memory,
     verbose=True
 )
     return chat_history, ""
 def gradio_chatbot():
     with gr.Blocks() as demo:
         gr.Markdown("# Chat Interface for Langchain")
         chat_history = gr.State([])
         submit_button.click(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])
         user_input.submit(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])
     return demo
+gradio_chatbot().launch(debug=True)