Spaces:
Sleeping
Sleeping
File size: 3,304 Bytes
8ce02a3 1501391 6483701 fa2ec69 f02f8c6 8ce02a3 03d6664 8ce02a3 1501391 8ce02a3 1501391 8ce02a3 281101c 8ce02a3 bd8a766 4a2a968 281101c 4a2a968 281101c bd8a766 8ce02a3 bd8a766 8ce02a3 281101c 8ce02a3 281101c 8ce02a3 281101c 8ce02a3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
import streamlit as st
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
from langchain_core.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
import os
import nltk
import io
nltk.download("punkt")
st.title(':blue[Langchain:] A Rag System on “Leave No Context Behind” Paper')
st.header("AI Chatbot :robot_face:")
os.environ["GOOGLE_API_KEY"] = os.getenv("k4")
# Creating a template
chat_template = ChatPromptTemplate.from_messages([
# System Message establishes bot's role and general behavior guidelines
SystemMessage(content="""You are a Helpful AI Bot.
You take the context and question from user. Your answer should be based on the specific context."""),
# Human Message Prompt Template
HumanMessagePromptTemplate.from_template("""Answer the question based on the given context.
Context:
{context}
Question:
{question}
Answer: """)
])
#user's question.
#how many results we want to print.
from langchain_google_genai import ChatGoogleGenerativeAI
chat_model = ChatGoogleGenerativeAI(model="gemini-1.5-pro-latest")
from langchain_core.output_parsers import StrOutputParser
output_parser = StrOutputParser()
chain = chat_template | chat_model | output_parser
from langchain_community.document_loaders import PDFMinerLoader
from langchain_text_splitters import NLTKTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import Chroma
uploaded_file = st.file_uploader("Choose a pdf file",type = "pdf")
if uploaded_file is not None:
pdf_file = io.BytesIO(uploaded_file.read())
pdf_loader = PDFMinerLoader(pdf_file)
dat_nik = pdf_loader.load()
text_splitter = NLTKTextSplitter(chunk_size = 500,chunk_overlap = 100)
chunks = test_splitter.split_documents(dat_nik)
embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
db = Chroma.from_documents(chunks, embedding_model, persist_directory="./chroma_db_1")
db.persist()
db_connection = Chroma(persist_directory="./chroma_db_", embedding_function=embedding_model)
retriever = db_connection.as_retriever(search_kwargs={"k": 5})
# dat = PDFMinerLoader("2404.07143.pdf")
# dat_nik =dat.load()
# # Split the document into chunks
# text_splitter = NLTKTextSplitter(chunk_size=500, chunk_overlap=100)
# chunks = text_splitter.split_documents(dat_nik)
# Creating Chunks Embedding
# We are just loading OpenAIEmbeddings
# vectors = embeddings.embed_documents(chunks)
# Store the chunks in vector store
# Creating a New Chroma Database
from langchain_core.runnables import RunnablePassthrough #takes user's question.
def format_docs(docs):
return "\n\n".join(doc.page_content for doc in docs)
# format chunks: takes the 5 results, combines all the chunks and displays one output.
rag_chain = (
{"context": retriever | format_docs, "question": RunnablePassthrough()}
| chat_template
| chat_model
| output_parser
)
user_input = st.text_area("Ask Questions to AI")
if st.button("Submit"):
st.subheader(":green[Query:]")
st.subheader(user_input)
response = rag_chain.invoke(user_input)
st.subheader(":green[Response:-]")
st.write(response) |