Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
from langchain.chat_models import ChatOpenAI
|
2 |
from langchain.embeddings.openai import OpenAIEmbeddings
|
|
|
3 |
from langchain.vectorstores import Qdrant
|
4 |
from langchain.text_splitter import CharacterTextSplitter
|
5 |
from langchain.chains import ConversationalRetrievalChain
|
@@ -25,7 +26,7 @@ for filename in os.listdir(pdf_folder_path):
|
|
25 |
documents.extend(loader.load())
|
26 |
|
27 |
# Split the documents into manageable chunks
|
28 |
-
text_splitter = CharacterTextSplitter(chunk_size=
|
29 |
docs = text_splitter.split_documents(documents)
|
30 |
|
31 |
# Initialize embeddings and Qdrant client
|
@@ -35,7 +36,7 @@ qdrant_client = QdrantClient(":memory:")
|
|
35 |
# Recreate Qdrant collection
|
36 |
qdrant_client.create_collection(
|
37 |
collection_name="langchain_collection",
|
38 |
-
vectors_config=qdrant_models.VectorParams(size=
|
39 |
)
|
40 |
|
41 |
# Set up the sparse embeddings for hybrid retrieval
|
@@ -55,15 +56,19 @@ vector_store = QdrantVectorStore.from_documents(
|
|
55 |
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
|
56 |
|
57 |
# Set up the retriever
|
58 |
-
retriever = vector_store.as_retriever( search_kwargs={"k": 3})
|
|
|
|
|
|
|
|
|
|
|
59 |
|
60 |
-
# Set up the language model
|
61 |
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.3)
|
62 |
|
63 |
# Set up the conversational retrieval chain with memory
|
64 |
conversational_chain = ConversationalRetrievalChain.from_llm(
|
65 |
llm=llm,
|
66 |
-
retriever=
|
67 |
memory=memory,
|
68 |
verbose=True
|
69 |
)
|
|
|
1 |
from langchain.chat_models import ChatOpenAI
|
2 |
from langchain.embeddings.openai import OpenAIEmbeddings
|
3 |
+
from langchain.retrievers import BM25Retriever,EnsembleRetriever
|
4 |
from langchain.vectorstores import Qdrant
|
5 |
from langchain.text_splitter import CharacterTextSplitter
|
6 |
from langchain.chains import ConversationalRetrievalChain
|
|
|
26 |
documents.extend(loader.load())
|
27 |
|
28 |
# Split the documents into manageable chunks
|
29 |
+
text_splitter = CharacterTextSplitter(chunk_size=128, chunk_overlap=5)
|
30 |
docs = text_splitter.split_documents(documents)
|
31 |
|
32 |
# Initialize embeddings and Qdrant client
|
|
|
36 |
# Recreate Qdrant collection
|
37 |
qdrant_client.create_collection(
|
38 |
collection_name="langchain_collection",
|
39 |
+
vectors_config=qdrant_models.VectorParams(size=3000, distance=qdrant_models.Distance.COSINE)
|
40 |
)
|
41 |
|
42 |
# Set up the sparse embeddings for hybrid retrieval
|
|
|
56 |
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
|
57 |
|
58 |
# Set up the retriever
|
59 |
+
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 3})
|
60 |
+
|
61 |
+
keyword_retriever = BM25Retriever.from_documents(docs)
|
62 |
+
keyword_retriever.k = 3
|
63 |
+
ensemble_retriever = EnsembleRetriever(retrievers=[retriever,keyword_retriever],
|
64 |
+
weights=[0.6, 0.5])
|
65 |
|
|
|
66 |
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.3)
|
67 |
|
68 |
# Set up the conversational retrieval chain with memory
|
69 |
conversational_chain = ConversationalRetrievalChain.from_llm(
|
70 |
llm=llm,
|
71 |
+
retriever=ensemble_retriever,
|
72 |
memory=memory,
|
73 |
verbose=True
|
74 |
)
|