anasmkh commited on
Commit
f8bbed0
·
verified ·
1 Parent(s): f59b537

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -5
app.py CHANGED
@@ -1,5 +1,6 @@
1
  from langchain.chat_models import ChatOpenAI
2
  from langchain.embeddings.openai import OpenAIEmbeddings
 
3
  from langchain.vectorstores import Qdrant
4
  from langchain.text_splitter import CharacterTextSplitter
5
  from langchain.chains import ConversationalRetrievalChain
@@ -25,7 +26,7 @@ for filename in os.listdir(pdf_folder_path):
25
  documents.extend(loader.load())
26
 
27
  # Split the documents into manageable chunks
28
- text_splitter = CharacterTextSplitter(chunk_size=512, chunk_overlap=25)
29
  docs = text_splitter.split_documents(documents)
30
 
31
  # Initialize embeddings and Qdrant client
@@ -35,7 +36,7 @@ qdrant_client = QdrantClient(":memory:")
35
  # Recreate Qdrant collection
36
  qdrant_client.create_collection(
37
  collection_name="langchain_collection",
38
- vectors_config=qdrant_models.VectorParams(size=1536, distance=qdrant_models.Distance.COSINE)
39
  )
40
 
41
  # Set up the sparse embeddings for hybrid retrieval
@@ -55,15 +56,19 @@ vector_store = QdrantVectorStore.from_documents(
55
  memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
56
 
57
  # Set up the retriever
58
- retriever = vector_store.as_retriever( search_kwargs={"k": 3})
 
 
 
 
 
59
 
60
- # Set up the language model
61
  llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.3)
62
 
63
  # Set up the conversational retrieval chain with memory
64
  conversational_chain = ConversationalRetrievalChain.from_llm(
65
  llm=llm,
66
- retriever=retriever,
67
  memory=memory,
68
  verbose=True
69
  )
 
1
  from langchain.chat_models import ChatOpenAI
2
  from langchain.embeddings.openai import OpenAIEmbeddings
3
+ from langchain.retrievers import BM25Retriever,EnsembleRetriever
4
  from langchain.vectorstores import Qdrant
5
  from langchain.text_splitter import CharacterTextSplitter
6
  from langchain.chains import ConversationalRetrievalChain
 
26
  documents.extend(loader.load())
27
 
28
  # Split the documents into manageable chunks
29
+ text_splitter = CharacterTextSplitter(chunk_size=128, chunk_overlap=5)
30
  docs = text_splitter.split_documents(documents)
31
 
32
  # Initialize embeddings and Qdrant client
 
36
  # Recreate Qdrant collection
37
  qdrant_client.create_collection(
38
  collection_name="langchain_collection",
39
+ vectors_config=qdrant_models.VectorParams(size=3000, distance=qdrant_models.Distance.COSINE)
40
  )
41
 
42
  # Set up the sparse embeddings for hybrid retrieval
 
56
  memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
57
 
58
  # Set up the retriever
59
+ retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 3})
60
+
61
+ keyword_retriever = BM25Retriever.from_documents(docs)
62
+ keyword_retriever.k = 3
63
+ ensemble_retriever = EnsembleRetriever(retrievers=[retriever,keyword_retriever],
64
+ weights=[0.6, 0.5])
65
 
 
66
  llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.3)
67
 
68
  # Set up the conversational retrieval chain with memory
69
  conversational_chain = ConversationalRetrievalChain.from_llm(
70
  llm=llm,
71
+ retriever=ensemble_retriever,
72
  memory=memory,
73
  verbose=True
74
  )