Spaces:

yashasgupta
/

rag_system

Sleeping

App Files Files Community

yashasgupta commited on Jul 16, 2024

Commit

8ce02a3

verified ·

1 Parent(s): e3b31c0

Create app.py

Browse files

Files changed (1) hide show

app.py +92 -0

app.py ADDED Viewed

	@@ -0,0 +1,92 @@

+import streamlit as st
+from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
+from langchain_core.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
+st.title(':blue[Langchain:] A Rag System on “Leave No Context Behind” Paper')
+st.header("AI Chatbot :robot_face:")
+os.environ["GOOGLE_API_KEY"] = os.getenv("k1")
+# Creating a template
+chat_template = ChatPromptTemplate.from_messages([
+    # System Message establishes bot's role and general behavior guidelines
+    SystemMessage(content="""You are a Helpful AI Bot.
+    You take the context and question from user. Your answer should be based on the specific context."""),
+    # Human Message Prompt Template
+    HumanMessagePromptTemplate.from_template("""Answer the question based on the given context.
+    Context:
+    {context}
+    Question:
+    {question}
+    Answer: """)
+])
+#user's question.
+#how many results we want to print.
+from langchain_google_genai import ChatGoogleGenerativeAI
+chat_model = ChatGoogleGenerativeAI(google_api_key=KEY,
+                                model="gemini-1.5-pro-latest")
+from langchain_core.output_parsers import StrOutputParser
+output_parser = StrOutputParser()
+chain = chat_template | chat_model | output_parser
+from langchain_community.document_loaders import PDFMinerLoader  # type: ignore
+dat = PDFMinerLoader(r"D:\Langchain\rag_system\2404.07143.pdf")
+dat_nik =dat.load()
+# Split the document into chunks
+from langchain_text_splitters import NLTKTextSplitter
+text_splitter = NLTKTextSplitter(chunk_size=500, chunk_overlap=100)
+chunks = text_splitter.split_documents(dat_nik)
+# Creating Chunks Embedding
+# We are just loading OpenAIEmbeddings
+from langchain_google_genai import GoogleGenerativeAIEmbeddings  # type: ignore
+embedding_model = GoogleGenerativeAIEmbeddings(google_api_key=KEY,
+                                            model="models/embedding-001")
+# vectors = embeddings.embed_documents(chunks)
+# Store the chunks in vector store
+from langchain_community.vectorstores import Chroma  # type: ignore
+# Creating a New Chroma Database
+db = Chroma.from_documents(chunks, embedding_model, persist_directory="./chroma_db_1")
+# saving the database on drive
+db.persist()
+# Setting a Connection with the ChromaDB
+db_connection = Chroma(persist_directory="./chroma_db_", embedding_function=embedding_model)
+# Converting CHROMA db_connection to Retriever Object, which retrieves top 5 results
+retriever = db_connection.as_retriever(search_kwargs={"k": 5})
+from langchain_core.runnables import RunnablePassthrough #takes user's question.
+def format_docs(docs):
+    return "\n\n".join(doc.page_content for doc in docs)
+# format chunks: takes the 5 results, combines all the chunks and displays one output.
+rag_chain = (
+    {"context": retriever | format_docs, "question": RunnablePassthrough()}
+    | chat_template
+    | chat_model
+    | output_parser
+)
+user_input = st.text_area("Ask Questions to AI")
+if st.button("Submit"):
+    st.subheader(":green[Query:]")
+    st.subheader(user_input)
+    response = rag_chain.invoke(user_input)
+    st.subheader(":green[Response:-]")
+    st.write(response)