File size: 3,249 Bytes
9a9542a
5a5b7c4
f8bbed0
5a5b7c4
 
 
9a9542a
5a5b7c4
 
9a9542a
5a5b7c4
2c0d1c4
dc35573
9a9542a
5a5b7c4
dc35573
5a5b7c4
18aa9f9
9a9542a
 
 
 
 
 
 
f8bbed0
5a5b7c4
9a9542a
 
5a5b7c4
9a9542a
f4d21c6
5a5b7c4
f8bbed0
5a5b7c4
9a9542a
5a5b7c4
9a9542a
dc35573
5a5b7c4
 
 
 
 
 
9a9542a
dc35573
5a5b7c4
9a9542a
f8bbed0
 
 
 
 
766685b
dc35573
766685b
dc35573
5a5b7c4
9a9542a
766685b
dc35573
 
9a9542a
 
 
 
dc35573
 
 
9a9542a
 
c95116b
 
9a9542a
 
57cc49f
9a9542a
 
 
 
 
 
 
c95116b
9a9542a
 
 
 
c95116b
 
9a9542a
 
 
9890c92
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
from langchain.chat_models import ChatOpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.retrievers import BM25Retriever,EnsembleRetriever
from langchain.vectorstores import Qdrant
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from qdrant_client import QdrantClient
from qdrant_client.http import models as qdrant_models
import os
from langchain.document_loaders import PyPDFLoader
import gradio as gr
from langchain_qdrant import QdrantVectorStore, FastEmbedSparse, RetrievalMode

openai_api_key = os.getenv('OPENAI_API_KEY')
os.environ["OPENAI_API_KEY"] = openai_api_key

pdf_folder_path = "files"
documents = []
for filename in os.listdir(pdf_folder_path):
    if filename.endswith(".pdf"):
        file_path = os.path.join(pdf_folder_path, filename)
        loader = PyPDFLoader(file_path)
        documents.extend(loader.load())

text_splitter = CharacterTextSplitter(chunk_size=128, chunk_overlap=5)
docs = text_splitter.split_documents(documents)

embeddings = OpenAIEmbeddings()
qdrant_client = QdrantClient(":memory:")

qdrant_client.create_collection(
    collection_name="langchain_collection",
    vectors_config=qdrant_models.VectorParams(size=3000, distance=qdrant_models.Distance.COSINE)
)

sparse_embeddings = FastEmbedSparse(model_name="Qdrant/bm25")

vector_store = QdrantVectorStore.from_documents(
    docs,
    embedding=embeddings,
    sparse_embedding=sparse_embeddings,
    location=":memory:",
    collection_name="langchain_collection",
    retrieval_mode=RetrievalMode.HYBRID,
)

memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 3})

keyword_retriever = BM25Retriever.from_documents(docs)
keyword_retriever.k =  3
ensemble_retriever = EnsembleRetriever(retrievers=[retriever,keyword_retriever],
                                       weights=[0.5, 0.5])

llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.4)

conversational_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=ensemble_retriever,
    memory=memory,
    verbose=True 
)


def chat_with_ai(user_input, chat_history):
    response = conversational_chain({"question": user_input})
    
    chat_history.append((user_input, response['answer']))

    return chat_history, ""
def clear_history():
    return [], ""
def gradio_chatbot():
    with gr.Blocks() as demo:
        gr.Markdown("# Chat Interface for Langchain")

        chatbot = gr.Chatbot(label="Langchain Chatbot")
        user_input = gr.Textbox(
            placeholder="Ask a question...", label="Enter your question"
        )

        submit_button = gr.Button("Send")
        btn_clear = gr.Button("Delete Context") 
        chat_history = gr.State([])

        submit_button.click(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])
        user_input.submit(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])
        btn_clear.click(fn=clear_history, outputs=[chatbot, user_input])  


    return demo

gradio_chatbot().launch(debug=True)