nidhibodar11 commited on
Commit
d626451
·
verified ·
1 Parent(s): 2f42a38

added app.py file

Browse files
Files changed (1) hide show
  1. app.py +90 -0
app.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ from langchain_groq import ChatGroq
4
+ from langchain_community.document_loaders import WebBaseLoader
5
+ # from langchain_community.embeddings import OllamaEmbeddings
6
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
7
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
8
+ from langchain.chains.combine_documents import create_stuff_documents_chain
9
+ from langchain_core.prompts import ChatPromptTemplate
10
+ from langchain.chains import create_retrieval_chain
11
+ from langchain_community.vectorstores.faiss import FAISS
12
+
13
+ import time
14
+ from PyPDF2 import PdfReader
15
+ import tempfile
16
+ from dotenv import load_dotenv
17
+ load_dotenv()
18
+
19
+ ## Load the Groq API key
20
+ groq_api_key = os.environ['GROQ_API_KEY']
21
+ google_api_key = os.environ['GOOGLE_API_KEY']
22
+
23
+ st.title("Ask your questions from pdf(s) or website")
24
+ option = None
25
+
26
+ # Prompt user to choose between PDFs or website
27
+ option = st.radio("Choose input type:", ("PDF(s)", "Website"), index=None)
28
+
29
+
30
+ def get_pdf_processed(pdf_docs):
31
+ text=""
32
+ for pdf in pdf_docs:
33
+ pdf_reader= PdfReader(pdf)
34
+ for page in pdf_reader.pages:
35
+ text += page.extract_text()
36
+ return text
37
+
38
+ def llm_model():
39
+ llm = ChatGroq(groq_api_key=groq_api_key, model="mixtral-8x7b-32768")
40
+ prompt = ChatPromptTemplate.from_template(
41
+ """
42
+ Answer the question based on the provided context only.
43
+ Please provide the most accurate response based on the question
44
+ <context>
45
+ {context}
46
+ </context>
47
+ Questions:{input}
48
+ """
49
+ )
50
+ document_chain = create_stuff_documents_chain(llm,prompt)
51
+ retriever = st.session_state.vector.as_retriever() if st.session_state.vector else None
52
+ retrieval_chain = create_retrieval_chain(retriever,document_chain)
53
+
54
+ prompt = st.text_input("Input your question here")
55
+
56
+ if prompt:
57
+
58
+ start =time.process_time()
59
+ response = retrieval_chain.invoke({"input":prompt})
60
+ print("Response time :", time.process_time()-start)
61
+ st.write(response['answer'])
62
+
63
+ st.session_state.embeddings =GoogleGenerativeAIEmbeddings(model = 'models/embedding-001',google_api_key=google_api_key)
64
+ st.session_state.text_splitter = RecursiveCharacterTextSplitter(chunk_size =1000, chunk_overlap= 200)
65
+
66
+ if option:
67
+ if option == "Website":
68
+ website_link = st.text_input("Enter the website link:")
69
+ if website_link:
70
+ st.session_state.loader = WebBaseLoader(website_link)
71
+ st.session_state.docs = st.session_state.loader.load()
72
+ st.session_state.final_documents = st.session_state.text_splitter.split_documents(st.session_state.docs)
73
+ st.session_state.vector = FAISS.from_documents(st.session_state.final_documents,st.session_state.embeddings)
74
+ llm_model()
75
+
76
+ elif option == "PDF(s)":
77
+ pdf_files = st.file_uploader("Upload your PDF files", type=["pdf"], accept_multiple_files=True)
78
+ if pdf_files:
79
+ st.session_state.docs = get_pdf_processed(pdf_files)
80
+ st.session_state.final_documents = st.session_state.text_splitter.split_text(st.session_state.docs)
81
+ st.session_state.vector = FAISS.from_texts(st.session_state.final_documents,st.session_state.embeddings)
82
+ llm_model()
83
+
84
+ # with st.expander("Document Similarity Search"):
85
+ # for i, doc in enumerate(response['context']):
86
+ # st.write(doc.page_content)
87
+ # st.write("-----------------------------")
88
+
89
+
90
+