Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -12,11 +12,9 @@ from langchain.document_loaders import PyPDFLoader
|
|
| 12 |
import gradio as gr
|
| 13 |
from langchain_qdrant import QdrantVectorStore, FastEmbedSparse, RetrievalMode
|
| 14 |
|
| 15 |
-
# Set OpenAI API Key
|
| 16 |
openai_api_key = os.getenv('OPENAI_API_KEY')
|
| 17 |
os.environ["OPENAI_API_KEY"] = openai_api_key
|
| 18 |
|
| 19 |
-
# Load PDF documents
|
| 20 |
pdf_folder_path = "files"
|
| 21 |
documents = []
|
| 22 |
for filename in os.listdir(pdf_folder_path):
|
|
@@ -25,24 +23,19 @@ for filename in os.listdir(pdf_folder_path):
|
|
| 25 |
loader = PyPDFLoader(file_path)
|
| 26 |
documents.extend(loader.load())
|
| 27 |
|
| 28 |
-
# Split the documents into manageable chunks
|
| 29 |
text_splitter = CharacterTextSplitter(chunk_size=128, chunk_overlap=5)
|
| 30 |
docs = text_splitter.split_documents(documents)
|
| 31 |
|
| 32 |
-
# Initialize embeddings and Qdrant client
|
| 33 |
embeddings = OpenAIEmbeddings()
|
| 34 |
qdrant_client = QdrantClient(":memory:")
|
| 35 |
|
| 36 |
-
# Recreate Qdrant collection
|
| 37 |
qdrant_client.create_collection(
|
| 38 |
collection_name="langchain_collection",
|
| 39 |
vectors_config=qdrant_models.VectorParams(size=3000, distance=qdrant_models.Distance.COSINE)
|
| 40 |
)
|
| 41 |
|
| 42 |
-
# Set up the sparse embeddings for hybrid retrieval
|
| 43 |
sparse_embeddings = FastEmbedSparse(model_name="Qdrant/bm25")
|
| 44 |
|
| 45 |
-
# Initialize the vector store with hybrid retrieval mode
|
| 46 |
vector_store = QdrantVectorStore.from_documents(
|
| 47 |
docs,
|
| 48 |
embedding=embeddings,
|
|
@@ -52,10 +45,8 @@ vector_store = QdrantVectorStore.from_documents(
|
|
| 52 |
retrieval_mode=RetrievalMode.HYBRID,
|
| 53 |
)
|
| 54 |
|
| 55 |
-
# Set up conversational memory
|
| 56 |
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
|
| 57 |
|
| 58 |
-
# Set up the retriever
|
| 59 |
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 3})
|
| 60 |
|
| 61 |
keyword_retriever = BM25Retriever.from_documents(docs)
|
|
@@ -65,10 +56,9 @@ ensemble_retriever = EnsembleRetriever(retrievers=[retriever,keyword_retriever],
|
|
| 65 |
|
| 66 |
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.3)
|
| 67 |
|
| 68 |
-
# Set up the conversational retrieval chain with memory
|
| 69 |
conversational_chain = ConversationalRetrievalChain.from_llm(
|
| 70 |
llm=llm,
|
| 71 |
-
retriever=
|
| 72 |
memory=memory,
|
| 73 |
verbose=True
|
| 74 |
)
|
|
@@ -81,7 +71,6 @@ def chat_with_ai(user_input, chat_history):
|
|
| 81 |
|
| 82 |
return chat_history, ""
|
| 83 |
|
| 84 |
-
# Gradio interface
|
| 85 |
def gradio_chatbot():
|
| 86 |
with gr.Blocks() as demo:
|
| 87 |
gr.Markdown("# Chat Interface for Langchain")
|
|
@@ -95,11 +84,9 @@ def gradio_chatbot():
|
|
| 95 |
|
| 96 |
chat_history = gr.State([])
|
| 97 |
|
| 98 |
-
# Bind button and textbox to chat function
|
| 99 |
submit_button.click(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])
|
| 100 |
user_input.submit(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])
|
| 101 |
|
| 102 |
return demo
|
| 103 |
|
| 104 |
-
|
| 105 |
-
gradio_chatbot().launch(debug=True)
|
|
|
|
| 12 |
import gradio as gr
|
| 13 |
from langchain_qdrant import QdrantVectorStore, FastEmbedSparse, RetrievalMode
|
| 14 |
|
|
|
|
| 15 |
openai_api_key = os.getenv('OPENAI_API_KEY')
|
| 16 |
os.environ["OPENAI_API_KEY"] = openai_api_key
|
| 17 |
|
|
|
|
| 18 |
pdf_folder_path = "files"
|
| 19 |
documents = []
|
| 20 |
for filename in os.listdir(pdf_folder_path):
|
|
|
|
| 23 |
loader = PyPDFLoader(file_path)
|
| 24 |
documents.extend(loader.load())
|
| 25 |
|
|
|
|
| 26 |
text_splitter = CharacterTextSplitter(chunk_size=128, chunk_overlap=5)
|
| 27 |
docs = text_splitter.split_documents(documents)
|
| 28 |
|
|
|
|
| 29 |
embeddings = OpenAIEmbeddings()
|
| 30 |
qdrant_client = QdrantClient(":memory:")
|
| 31 |
|
|
|
|
| 32 |
qdrant_client.create_collection(
|
| 33 |
collection_name="langchain_collection",
|
| 34 |
vectors_config=qdrant_models.VectorParams(size=3000, distance=qdrant_models.Distance.COSINE)
|
| 35 |
)
|
| 36 |
|
|
|
|
| 37 |
sparse_embeddings = FastEmbedSparse(model_name="Qdrant/bm25")
|
| 38 |
|
|
|
|
| 39 |
vector_store = QdrantVectorStore.from_documents(
|
| 40 |
docs,
|
| 41 |
embedding=embeddings,
|
|
|
|
| 45 |
retrieval_mode=RetrievalMode.HYBRID,
|
| 46 |
)
|
| 47 |
|
|
|
|
| 48 |
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
|
| 49 |
|
|
|
|
| 50 |
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 3})
|
| 51 |
|
| 52 |
keyword_retriever = BM25Retriever.from_documents(docs)
|
|
|
|
| 56 |
|
| 57 |
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.3)
|
| 58 |
|
|
|
|
| 59 |
conversational_chain = ConversationalRetrievalChain.from_llm(
|
| 60 |
llm=llm,
|
| 61 |
+
retriever=keyword_retriever,
|
| 62 |
memory=memory,
|
| 63 |
verbose=True
|
| 64 |
)
|
|
|
|
| 71 |
|
| 72 |
return chat_history, ""
|
| 73 |
|
|
|
|
| 74 |
def gradio_chatbot():
|
| 75 |
with gr.Blocks() as demo:
|
| 76 |
gr.Markdown("# Chat Interface for Langchain")
|
|
|
|
| 84 |
|
| 85 |
chat_history = gr.State([])
|
| 86 |
|
|
|
|
| 87 |
submit_button.click(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])
|
| 88 |
user_input.submit(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])
|
| 89 |
|
| 90 |
return demo
|
| 91 |
|
| 92 |
+
gradio_chatbot().launch(debug=True)
|
|
|