anasmkh's picture
Update app.py
d2288dd verified
raw
history blame
3.32 kB
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from IPython.display import display, Markdown
from langchain.llms import OpenAI
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.text_splitter import CharacterTextSplitter
from langchain.indexes import VectorstoreIndexCreator
from langchain.document_loaders import PyPDFLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain_core.vectorstores import InMemoryVectorStore
from langchain.vectorstores import FAISS
from langchain.retrievers import BM25Retriever,EnsembleRetriever
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough
import gradio as gr
import os
pdf_folder_path = "files"
documents = []
for filename in os.listdir(pdf_folder_path):
if filename.endswith(".pdf"):
file_path = os.path.join(pdf_folder_path, filename)
loader = PyPDFLoader(file_path)
documents.extend(loader.load())
text_splitter = CharacterTextSplitter()
text_splits=text_splitter.split_documents(documents)
openai_api_key = os.getenv('OPENAI_API_KEY')
openai_api_key = openai_api_key
embeddings = OpenAIEmbeddings()
vector_store = FAISS.from_documents(documents, embeddings)
retriever_vectordb = vector_store.as_retriever(search_kwargs={"k": 5})
keyword_retriever = BM25Retriever.from_documents(text_splits)
keyword_retriever.k = 5
ensemble_retriever = EnsembleRetriever(retrievers=[retriever_vectordb,keyword_retriever],
weights=[0.5, 0.5])
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.4, api_key=adminkey)
memory = ConversationBufferMemory(
memory_key="chat_history",
input_key="question" ,
return_messages=True
)
conversation_chain = ConversationalRetrievalChain.from_llm(
retriever=ensemble_retriever,
llm=llm,
memory=memory,
verbose=False
)
template = """
<|system|>>
You are an AI Assistant that follows instructions extremely well.
Please be truthful and give direct answers. Please tell 'I don't know' if user query is not in CONTEXT
CONTEXT: {context}
</s>
<|user|>
{query}
</s>
<|assistant|>
"""
prompt = ChatPromptTemplate.from_template(template)
output_parser = StrOutputParser()
chain = (
{"context": conversation_chain, "query": RunnablePassthrough()}
| prompt
| llm
| output_parser
)
def chat_with_ai(user_input, chat_history):
response = chain.invoke(user_input)
chat_history.append((user_input, str(response)))
return chat_history, ""
def gradio_chatbot():
with gr.Blocks() as demo:
gr.Markdown("# Chat Interface for LlamaIndex")
chatbot = gr.Chatbot(label="Langchain Chatbot")
user_input = gr.Textbox(
placeholder="Ask a question...", label="Enter your question"
)
submit_button = gr.Button("Send")
chat_history = gr.State([])
submit_button.click(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])
user_input.submit(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])
return demo
gradio_chatbot().launch(debug=True)