Spaces:
Runtime error
Runtime error
from langchain.chat_models import ChatOpenAI | |
from langchain.embeddings.openai import OpenAIEmbeddings | |
from langchain.retrievers import BM25Retriever,EnsembleRetriever | |
from langchain.vectorstores import Qdrant | |
from langchain.text_splitter import CharacterTextSplitter | |
from langchain.chains import ConversationalRetrievalChain | |
from langchain.memory import ConversationBufferMemory | |
from qdrant_client import QdrantClient | |
from qdrant_client.http import models as qdrant_models | |
import os | |
from langchain.document_loaders import PyPDFLoader | |
import gradio as gr | |
from langchain_qdrant import QdrantVectorStore, FastEmbedSparse, RetrievalMode | |
openai_api_key = os.getenv('OPENAI_API_KEY') | |
os.environ["OPENAI_API_KEY"] = openai_api_key | |
pdf_folder_path = "files" | |
documents = [] | |
for filename in os.listdir(pdf_folder_path): | |
if filename.endswith(".pdf"): | |
file_path = os.path.join(pdf_folder_path, filename) | |
loader = PyPDFLoader(file_path) | |
documents.extend(loader.load()) | |
text_splitter = CharacterTextSplitter(chunk_size=128, chunk_overlap=5) | |
docs = text_splitter.split_documents(documents) | |
embeddings = OpenAIEmbeddings() | |
qdrant_client = QdrantClient(":memory:") | |
qdrant_client.create_collection( | |
collection_name="langchain_collection", | |
vectors_config=qdrant_models.VectorParams(size=3000, distance=qdrant_models.Distance.COSINE) | |
) | |
sparse_embeddings = FastEmbedSparse(model_name="Qdrant/bm25") | |
vector_store = QdrantVectorStore.from_documents( | |
docs, | |
embedding=embeddings, | |
sparse_embedding=sparse_embeddings, | |
location=":memory:", | |
collection_name="langchain_collection", | |
retrieval_mode=RetrievalMode.HYBRID, | |
) | |
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True) | |
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 3}) | |
keyword_retriever = BM25Retriever.from_documents(docs) | |
keyword_retriever.k = 3 | |
ensemble_retriever = EnsembleRetriever(retrievers=[retriever,keyword_retriever], | |
weights=[0.5, 0.5]) | |
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.4) | |
conversational_chain = ConversationalRetrievalChain.from_llm( | |
llm=llm, | |
retriever=ensemble_retriever, | |
memory=memory, | |
verbose=True | |
) | |
def chat_with_ai(user_input, chat_history): | |
response = conversational_chain({"question": user_input}) | |
chat_history.append((user_input, response['answer'])) | |
return chat_history, "" | |
def clear_history(): | |
return [], "" | |
def gradio_chatbot(): | |
with gr.Blocks() as demo: | |
gr.Markdown("# Chat Interface for Langchain") | |
chatbot = gr.Chatbot(label="Langchain Chatbot") | |
user_input = gr.Textbox( | |
placeholder="Ask a question...", label="Enter your question" | |
) | |
submit_button = gr.Button("Send") | |
btn_clear = gr.Button("Delete Context") | |
chat_history = gr.State([]) | |
submit_button.click(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input]) | |
user_input.submit(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input]) | |
btn_clear.click(fn=clear_history, outputs=[chatbot, user_input]) | |
return demo | |
gradio_chatbot().launch(debug=True) |