Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
from langchain.chat_models import ChatOpenAI
|
| 2 |
from langchain.embeddings.openai import OpenAIEmbeddings
|
|
|
|
| 3 |
from langchain.vectorstores import Qdrant
|
| 4 |
from langchain.text_splitter import CharacterTextSplitter
|
| 5 |
from langchain.chains import ConversationalRetrievalChain
|
|
@@ -25,7 +26,7 @@ for filename in os.listdir(pdf_folder_path):
|
|
| 25 |
documents.extend(loader.load())
|
| 26 |
|
| 27 |
# Split the documents into manageable chunks
|
| 28 |
-
text_splitter = CharacterTextSplitter(chunk_size=
|
| 29 |
docs = text_splitter.split_documents(documents)
|
| 30 |
|
| 31 |
# Initialize embeddings and Qdrant client
|
|
@@ -35,7 +36,7 @@ qdrant_client = QdrantClient(":memory:")
|
|
| 35 |
# Recreate Qdrant collection
|
| 36 |
qdrant_client.create_collection(
|
| 37 |
collection_name="langchain_collection",
|
| 38 |
-
vectors_config=qdrant_models.VectorParams(size=
|
| 39 |
)
|
| 40 |
|
| 41 |
# Set up the sparse embeddings for hybrid retrieval
|
|
@@ -55,15 +56,19 @@ vector_store = QdrantVectorStore.from_documents(
|
|
| 55 |
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
|
| 56 |
|
| 57 |
# Set up the retriever
|
| 58 |
-
retriever = vector_store.as_retriever( search_kwargs={"k": 3})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
|
| 60 |
-
# Set up the language model
|
| 61 |
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.3)
|
| 62 |
|
| 63 |
# Set up the conversational retrieval chain with memory
|
| 64 |
conversational_chain = ConversationalRetrievalChain.from_llm(
|
| 65 |
llm=llm,
|
| 66 |
-
retriever=
|
| 67 |
memory=memory,
|
| 68 |
verbose=True
|
| 69 |
)
|
|
|
|
| 1 |
from langchain.chat_models import ChatOpenAI
|
| 2 |
from langchain.embeddings.openai import OpenAIEmbeddings
|
| 3 |
+
from langchain.retrievers import BM25Retriever,EnsembleRetriever
|
| 4 |
from langchain.vectorstores import Qdrant
|
| 5 |
from langchain.text_splitter import CharacterTextSplitter
|
| 6 |
from langchain.chains import ConversationalRetrievalChain
|
|
|
|
| 26 |
documents.extend(loader.load())
|
| 27 |
|
| 28 |
# Split the documents into manageable chunks
|
| 29 |
+
text_splitter = CharacterTextSplitter(chunk_size=128, chunk_overlap=5)
|
| 30 |
docs = text_splitter.split_documents(documents)
|
| 31 |
|
| 32 |
# Initialize embeddings and Qdrant client
|
|
|
|
| 36 |
# Recreate Qdrant collection
|
| 37 |
qdrant_client.create_collection(
|
| 38 |
collection_name="langchain_collection",
|
| 39 |
+
vectors_config=qdrant_models.VectorParams(size=3000, distance=qdrant_models.Distance.COSINE)
|
| 40 |
)
|
| 41 |
|
| 42 |
# Set up the sparse embeddings for hybrid retrieval
|
|
|
|
| 56 |
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
|
| 57 |
|
| 58 |
# Set up the retriever
|
| 59 |
+
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 3})
|
| 60 |
+
|
| 61 |
+
keyword_retriever = BM25Retriever.from_documents(docs)
|
| 62 |
+
keyword_retriever.k = 3
|
| 63 |
+
ensemble_retriever = EnsembleRetriever(retrievers=[retriever,keyword_retriever],
|
| 64 |
+
weights=[0.6, 0.5])
|
| 65 |
|
|
|
|
| 66 |
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.3)
|
| 67 |
|
| 68 |
# Set up the conversational retrieval chain with memory
|
| 69 |
conversational_chain = ConversationalRetrievalChain.from_llm(
|
| 70 |
llm=llm,
|
| 71 |
+
retriever=ensemble_retriever,
|
| 72 |
memory=memory,
|
| 73 |
verbose=True
|
| 74 |
)
|