Spaces:
Runtime error
Runtime error
File size: 3,249 Bytes
9a9542a 5a5b7c4 f8bbed0 5a5b7c4 9a9542a 5a5b7c4 9a9542a 5a5b7c4 2c0d1c4 dc35573 9a9542a 5a5b7c4 dc35573 5a5b7c4 18aa9f9 9a9542a f8bbed0 5a5b7c4 9a9542a 5a5b7c4 9a9542a f4d21c6 5a5b7c4 f8bbed0 5a5b7c4 9a9542a 5a5b7c4 9a9542a dc35573 5a5b7c4 9a9542a dc35573 5a5b7c4 9a9542a f8bbed0 766685b dc35573 766685b dc35573 5a5b7c4 9a9542a 766685b dc35573 9a9542a dc35573 9a9542a c95116b 9a9542a 57cc49f 9a9542a c95116b 9a9542a c95116b 9a9542a 9890c92 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
from langchain.chat_models import ChatOpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.retrievers import BM25Retriever,EnsembleRetriever
from langchain.vectorstores import Qdrant
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from qdrant_client import QdrantClient
from qdrant_client.http import models as qdrant_models
import os
from langchain.document_loaders import PyPDFLoader
import gradio as gr
from langchain_qdrant import QdrantVectorStore, FastEmbedSparse, RetrievalMode
openai_api_key = os.getenv('OPENAI_API_KEY')
os.environ["OPENAI_API_KEY"] = openai_api_key
pdf_folder_path = "files"
documents = []
for filename in os.listdir(pdf_folder_path):
if filename.endswith(".pdf"):
file_path = os.path.join(pdf_folder_path, filename)
loader = PyPDFLoader(file_path)
documents.extend(loader.load())
text_splitter = CharacterTextSplitter(chunk_size=128, chunk_overlap=5)
docs = text_splitter.split_documents(documents)
embeddings = OpenAIEmbeddings()
qdrant_client = QdrantClient(":memory:")
qdrant_client.create_collection(
collection_name="langchain_collection",
vectors_config=qdrant_models.VectorParams(size=3000, distance=qdrant_models.Distance.COSINE)
)
sparse_embeddings = FastEmbedSparse(model_name="Qdrant/bm25")
vector_store = QdrantVectorStore.from_documents(
docs,
embedding=embeddings,
sparse_embedding=sparse_embeddings,
location=":memory:",
collection_name="langchain_collection",
retrieval_mode=RetrievalMode.HYBRID,
)
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 3})
keyword_retriever = BM25Retriever.from_documents(docs)
keyword_retriever.k = 3
ensemble_retriever = EnsembleRetriever(retrievers=[retriever,keyword_retriever],
weights=[0.5, 0.5])
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.4)
conversational_chain = ConversationalRetrievalChain.from_llm(
llm=llm,
retriever=ensemble_retriever,
memory=memory,
verbose=True
)
def chat_with_ai(user_input, chat_history):
response = conversational_chain({"question": user_input})
chat_history.append((user_input, response['answer']))
return chat_history, ""
def clear_history():
return [], ""
def gradio_chatbot():
with gr.Blocks() as demo:
gr.Markdown("# Chat Interface for Langchain")
chatbot = gr.Chatbot(label="Langchain Chatbot")
user_input = gr.Textbox(
placeholder="Ask a question...", label="Enter your question"
)
submit_button = gr.Button("Send")
btn_clear = gr.Button("Delete Context")
chat_history = gr.State([])
submit_button.click(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])
user_input.submit(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])
btn_clear.click(fn=clear_history, outputs=[chatbot, user_input])
return demo
gradio_chatbot().launch(debug=True) |