yashasgupta commited on
Commit
8ce02a3
·
verified ·
1 Parent(s): e3b31c0

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +92 -0
app.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
3
+ from langchain_core.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
4
+
5
+
6
+ st.title(':blue[Langchain:] A Rag System on “Leave No Context Behind” Paper')
7
+ st.header("AI Chatbot :robot_face:")
8
+
9
+ os.environ["GOOGLE_API_KEY"] = os.getenv("k1")
10
+ # Creating a template
11
+
12
+ chat_template = ChatPromptTemplate.from_messages([
13
+ # System Message establishes bot's role and general behavior guidelines
14
+ SystemMessage(content="""You are a Helpful AI Bot.
15
+ You take the context and question from user. Your answer should be based on the specific context."""),
16
+ # Human Message Prompt Template
17
+ HumanMessagePromptTemplate.from_template("""Answer the question based on the given context.
18
+ Context:
19
+ {context}
20
+
21
+ Question:
22
+ {question}
23
+
24
+ Answer: """)
25
+ ])
26
+
27
+ #user's question.
28
+ #how many results we want to print.
29
+
30
+ from langchain_google_genai import ChatGoogleGenerativeAI
31
+
32
+ chat_model = ChatGoogleGenerativeAI(google_api_key=KEY,
33
+ model="gemini-1.5-pro-latest")
34
+
35
+ from langchain_core.output_parsers import StrOutputParser
36
+
37
+ output_parser = StrOutputParser()
38
+
39
+ chain = chat_template | chat_model | output_parser
40
+
41
+ from langchain_community.document_loaders import PDFMinerLoader # type: ignore
42
+ dat = PDFMinerLoader(r"D:\Langchain\rag_system\2404.07143.pdf")
43
+ dat_nik =dat.load()
44
+ # Split the document into chunks
45
+
46
+ from langchain_text_splitters import NLTKTextSplitter
47
+
48
+ text_splitter = NLTKTextSplitter(chunk_size=500, chunk_overlap=100)
49
+
50
+ chunks = text_splitter.split_documents(dat_nik)
51
+ # Creating Chunks Embedding
52
+ # We are just loading OpenAIEmbeddings
53
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings # type: ignore
54
+
55
+ embedding_model = GoogleGenerativeAIEmbeddings(google_api_key=KEY,
56
+ model="models/embedding-001")
57
+
58
+ # vectors = embeddings.embed_documents(chunks)
59
+ # Store the chunks in vector store
60
+ from langchain_community.vectorstores import Chroma # type: ignore
61
+
62
+ # Creating a New Chroma Database
63
+ db = Chroma.from_documents(chunks, embedding_model, persist_directory="./chroma_db_1")
64
+
65
+ # saving the database on drive
66
+ db.persist()
67
+ # Setting a Connection with the ChromaDB
68
+ db_connection = Chroma(persist_directory="./chroma_db_", embedding_function=embedding_model)
69
+ # Converting CHROMA db_connection to Retriever Object, which retrieves top 5 results
70
+ retriever = db_connection.as_retriever(search_kwargs={"k": 5})
71
+
72
+
73
+ from langchain_core.runnables import RunnablePassthrough #takes user's question.
74
+
75
+ def format_docs(docs):
76
+ return "\n\n".join(doc.page_content for doc in docs)
77
+
78
+ # format chunks: takes the 5 results, combines all the chunks and displays one output.
79
+ rag_chain = (
80
+ {"context": retriever | format_docs, "question": RunnablePassthrough()}
81
+ | chat_template
82
+ | chat_model
83
+ | output_parser
84
+ )
85
+
86
+ user_input = st.text_area("Ask Questions to AI")
87
+ if st.button("Submit"):
88
+ st.subheader(":green[Query:]")
89
+ st.subheader(user_input)
90
+ response = rag_chain.invoke(user_input)
91
+ st.subheader(":green[Response:-]")
92
+ st.write(response)