Spaces:
Sleeping
Sleeping
File size: 3,287 Bytes
8ce02a3 1501391 6483701 fa2ec69 f02f8c6 8ce02a3 03d6664 8ce02a3 1501391 8ce02a3 1501391 8ce02a3 281101c cfb6e62 8ce02a3 bd8a766 4a2a968 cfb6e62 281101c 4a2a968 281101c cfb6e62 bd8a766 8ce02a3 bd8a766 8ce02a3 281101c 8ce02a3 281101c 8ce02a3 281101c 8ce02a3 cfb6e62 8ce02a3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
import streamlit as st
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
from langchain_core.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
import os
import nltk
import io
nltk.download("punkt")
st.title(':blue[Langchain:] A Rag System on “Leave No Context Behind” Paper')
st.header("AI Chatbot :robot_face:")
os.environ["GOOGLE_API_KEY"] = os.getenv("k4")
# Creating a template
chat_template = ChatPromptTemplate.from_messages([
# System Message establishes bot's role and general behavior guidelines
SystemMessage(content="""You are a Helpful AI Bot.
You take the context and question from user. Your answer should be based on the specific context."""),
# Human Message Prompt Template
HumanMessagePromptTemplate.from_template("""Answer the question based on the given context.
Context:
{context}
Question:
{question}
Answer: """)
])
#user's question.
#how many results we want to print.
from langchain_google_genai import ChatGoogleGenerativeAI
chat_model = ChatGoogleGenerativeAI(model="gemini-1.5-pro-latest")
from langchain_core.output_parsers import StrOutputParser
output_parser = StrOutputParser()
chain = chat_template | chat_model | output_parser
from langchain_community.document_loaders import PDFMinerLoader
from langchain_text_splitters import NLTKTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_core.runnables import RunnablePassthrough
uploaded_file = st.file_uploader("Choose a pdf file",type = "pdf")
if uploaded_file is not None:
pdf_file = io.BytesIO(uploaded_file.read())
pdf_loader = PDFMinerLoader(pdf_file)
dat_nik = pdf_loader.load()
text_splitter = NLTKTextSplitter(chunk_size = 500,chunk_overlap = 100)
chunks = test_splitter.split_documents(dat_nik)
embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
db = Chroma.from_documents(chunks, embedding_model, persist_directory="./chroma_db_1")
db.persist()
db_connection = Chroma(persist_directory="./chroma_db_", embedding_function=embedding_model)
retriever = db_connection.as_retriever(search_kwargs={"k": 5})
def format_docs(docs):
return "\n\n".join(doc.page_content for doc in docs)
rag_chain = (
{"context": retriever | format_docs, "question": RunnablePassthrough()}
| chat_template
| chat_model
| output_parser
)
user_input = st.text_area("Ask Questions to AI")
if st.button("Submit"):
st.subheader(":green[Query:]")
st.subheader(user_input)
response = rag_chain.invoke(user_input)
st.subheader(":green[Response:-]")
st.write(response)
# dat = PDFMinerLoader("2404.07143.pdf")
# dat_nik =dat.load()
# # Split the document into chunks
# text_splitter = NLTKTextSplitter(chunk_size=500, chunk_overlap=100)
# chunks = text_splitter.split_documents(dat_nik)
# Creating Chunks Embedding
# We are just loading OpenAIEmbeddings
# vectors = embeddings.embed_documents(chunks)
# Store the chunks in vector store
# Creating a New Chroma Database
#takes user's question.
|