File size: 3,287 Bytes
8ce02a3
 
 
1501391
6483701
fa2ec69
f02f8c6
8ce02a3
 
 
 
03d6664
8ce02a3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1501391
8ce02a3
 
 
 
 
 
 
1501391
8ce02a3
281101c
 
cfb6e62
 
 
8ce02a3
bd8a766
 
4a2a968
cfb6e62
281101c
 
4a2a968
 
 
281101c
 
 
 
 
 
 
 
 
 
cfb6e62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bd8a766
 
 
 
 
 
 
8ce02a3
bd8a766
8ce02a3
 
281101c
 
8ce02a3
 
 
 
281101c
8ce02a3
 
281101c
8ce02a3
 
cfb6e62
8ce02a3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import streamlit as st  
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
from langchain_core.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
import os 
import nltk 
import io 
nltk.download("punkt")

st.title(':blue[Langchain:] A Rag System on “Leave No Context Behind” Paper')
st.header("AI Chatbot :robot_face:")

os.environ["GOOGLE_API_KEY"] = os.getenv("k4")
# Creating a template

chat_template = ChatPromptTemplate.from_messages([
    # System Message establishes bot's role and general behavior guidelines
    SystemMessage(content="""You are a Helpful AI Bot. 
    You take the context and question from user. Your answer should be based on the specific context."""),
    # Human Message Prompt Template
    HumanMessagePromptTemplate.from_template("""Answer the question based on the given context.
    Context:
    {context} 
    
    Question: 
    {question}
    
    Answer: """)
])

#user's question.
#how many results we want to print.

from langchain_google_genai import ChatGoogleGenerativeAI  

chat_model = ChatGoogleGenerativeAI(model="gemini-1.5-pro-latest")

from langchain_core.output_parsers import StrOutputParser

output_parser = StrOutputParser()

chain = chat_template | chat_model | output_parser

from langchain_community.document_loaders import PDFMinerLoader
from langchain_text_splitters import NLTKTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings 
from langchain_community.vectorstores import Chroma  
from langchain_core.runnables import RunnablePassthrough



uploaded_file = st.file_uploader("Choose a pdf file",type = "pdf")

if uploaded_file is not None:
    
    pdf_file = io.BytesIO(uploaded_file.read())
    pdf_loader = PDFMinerLoader(pdf_file)
    dat_nik = pdf_loader.load()
    text_splitter = NLTKTextSplitter(chunk_size = 500,chunk_overlap = 100)
    chunks = test_splitter.split_documents(dat_nik)

    embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

    db = Chroma.from_documents(chunks, embedding_model, persist_directory="./chroma_db_1")

    db.persist()

    db_connection = Chroma(persist_directory="./chroma_db_", embedding_function=embedding_model)

    retriever = db_connection.as_retriever(search_kwargs={"k": 5})

    def format_docs(docs):
        return "\n\n".join(doc.page_content for doc in docs)

    rag_chain = (
        {"context": retriever | format_docs, "question": RunnablePassthrough()}
        | chat_template
        | chat_model
        | output_parser
    )

    user_input = st.text_area("Ask Questions to AI")
    if st.button("Submit"):
        st.subheader(":green[Query:]")
        st.subheader(user_input)
        response = rag_chain.invoke(user_input)
        st.subheader(":green[Response:-]")
        st.write(response)
    
# dat = PDFMinerLoader("2404.07143.pdf")
# dat_nik =dat.load()
# # Split the document into chunks


# text_splitter = NLTKTextSplitter(chunk_size=500, chunk_overlap=100)

# chunks = text_splitter.split_documents(dat_nik)
# Creating Chunks Embedding
# We are just loading OpenAIEmbeddings
 



# vectors = embeddings.embed_documents(chunks)
# Store the chunks in vector store


# Creating a New Chroma Database



 #takes user's question.