Spaces:
Runtime error
Runtime error
File size: 3,270 Bytes
9a9542a 5a5b7c4 9a9542a 5a5b7c4 9a9542a 5a5b7c4 2c0d1c4 dc35573 9a9542a dc35573 5a5b7c4 dc35573 5a5b7c4 dc35573 18aa9f9 9a9542a dc35573 5a5b7c4 9a9542a dc35573 9a9542a 5a5b7c4 9a9542a dc35573 f4d21c6 5a5b7c4 9a9542a dc35573 5a5b7c4 9a9542a dc35573 5a5b7c4 9a9542a dc35573 5a5b7c4 9a9542a dc35573 f59b537 dc35573 5a5b7c4 dc35573 5a5b7c4 9a9542a 5a5b7c4 dc35573 9a9542a dc35573 9a9542a dc35573 9a9542a 57cc49f 9a9542a dc35573 9a9542a dc35573 9a9542a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
from langchain.chat_models import ChatOpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Qdrant
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from qdrant_client import QdrantClient
from qdrant_client.http import models as qdrant_models
import os
from langchain.document_loaders import PyPDFLoader
import gradio as gr
from langchain_qdrant import QdrantVectorStore, FastEmbedSparse, RetrievalMode
# Set OpenAI API Key
openai_api_key = os.getenv('OPENAI_API_KEY')
os.environ["OPENAI_API_KEY"] = openai_api_key
# Load PDF documents
pdf_folder_path = "files"
documents = []
for filename in os.listdir(pdf_folder_path):
if filename.endswith(".pdf"):
file_path = os.path.join(pdf_folder_path, filename)
loader = PyPDFLoader(file_path)
documents.extend(loader.load())
# Split the documents into manageable chunks
text_splitter = CharacterTextSplitter(chunk_size=512, chunk_overlap=25)
docs = text_splitter.split_documents(documents)
# Initialize embeddings and Qdrant client
embeddings = OpenAIEmbeddings()
qdrant_client = QdrantClient(":memory:")
# Recreate Qdrant collection
qdrant_client.create_collection(
collection_name="langchain_collection",
vectors_config=qdrant_models.VectorParams(size=1536, distance=qdrant_models.Distance.COSINE)
)
# Set up the sparse embeddings for hybrid retrieval
sparse_embeddings = FastEmbedSparse(model_name="Qdrant/bm25")
# Initialize the vector store with hybrid retrieval mode
vector_store = QdrantVectorStore.from_documents(
docs,
embedding=embeddings,
sparse_embedding=sparse_embeddings,
location=":memory:",
collection_name="langchain_collection",
retrieval_mode=RetrievalMode.HYBRID,
)
# Set up conversational memory
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
# Set up the retriever
retriever = vector_store.as_retriever( search_kwargs={"k": 3})
# Set up the language model
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.3)
# Set up the conversational retrieval chain with memory
conversational_chain = ConversationalRetrievalChain.from_llm(
llm=llm,
retriever=retriever,
memory=memory,
verbose=True
)
def chat_with_ai(user_input, chat_history):
response = conversational_chain({"question": user_input})
chat_history.append((user_input, response['answer']))
return chat_history, ""
# Gradio interface
def gradio_chatbot():
with gr.Blocks() as demo:
gr.Markdown("# Chat Interface for Langchain")
chatbot = gr.Chatbot(label="Langchain Chatbot")
user_input = gr.Textbox(
placeholder="Ask a question...", label="Enter your question"
)
submit_button = gr.Button("Send")
chat_history = gr.State([])
# Bind button and textbox to chat function
submit_button.click(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])
user_input.submit(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])
return demo
# Launch Gradio interface
gradio_chatbot().launch(debug=True)
|