File size: 3,316 Bytes
9a9542a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d2288dd
 
9a9542a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from IPython.display import display, Markdown
from langchain.llms import OpenAI
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.text_splitter import CharacterTextSplitter
from langchain.indexes import VectorstoreIndexCreator
from langchain.document_loaders import PyPDFLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain_core.vectorstores import InMemoryVectorStore
from langchain.vectorstores import FAISS
from langchain.retrievers import BM25Retriever,EnsembleRetriever
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough
import gradio as gr
import os

pdf_folder_path = "files"

documents = []
for filename in os.listdir(pdf_folder_path):
    if filename.endswith(".pdf"):
        file_path = os.path.join(pdf_folder_path, filename)
        loader = PyPDFLoader(file_path)
        documents.extend(loader.load())

text_splitter = CharacterTextSplitter()
text_splits=text_splitter.split_documents(documents)


openai_api_key = os.getenv('OPENAI_API_KEY')
openai_api_key = openai_api_key

embeddings = OpenAIEmbeddings()

vector_store = FAISS.from_documents(documents, embeddings)

retriever_vectordb = vector_store.as_retriever(search_kwargs={"k": 5})
keyword_retriever = BM25Retriever.from_documents(text_splits)
keyword_retriever.k =  5
ensemble_retriever = EnsembleRetriever(retrievers=[retriever_vectordb,keyword_retriever],
                                       weights=[0.5, 0.5])



llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.4, api_key=adminkey)

memory = ConversationBufferMemory(
    memory_key="chat_history",
    input_key="question" ,
    return_messages=True
)


conversation_chain = ConversationalRetrievalChain.from_llm(
    retriever=ensemble_retriever,
    llm=llm,
    memory=memory,
    verbose=False
)


template = """
<|system|>>
You are an AI Assistant that follows instructions extremely well.
Please be truthful and give direct answers. Please tell 'I don't know' if user query is not in CONTEXT

CONTEXT: {context}
</s>
<|user|>
{query}
</s>
<|assistant|>
"""

prompt = ChatPromptTemplate.from_template(template)
output_parser = StrOutputParser()

chain = (
    {"context": conversation_chain, "query": RunnablePassthrough()}
    | prompt
    | llm
    | output_parser
)



def chat_with_ai(user_input, chat_history):
    response = chain.invoke(user_input)

    chat_history.append((user_input, str(response)))

    return chat_history, ""


def gradio_chatbot():
    with gr.Blocks() as demo:
        gr.Markdown("# Chat Interface for LlamaIndex")

        chatbot = gr.Chatbot(label="Langchain Chatbot")
        user_input = gr.Textbox(
            placeholder="Ask a question...", label="Enter your question"
        )

        submit_button = gr.Button("Send")

        chat_history = gr.State([])

        submit_button.click(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])

        user_input.submit(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])

    return demo

gradio_chatbot().launch(debug=True)