File size: 3,800 Bytes
e096a7f 080536a d626451 080536a d626451 080536a d626451 080536a e096a7f d626451 080536a d626451 080536a d626451 080536a d626451 080536a d626451 7a8c2a1 d626451 080536a d626451 080536a d626451 4bf1fe8 7a8c2a1 080536a e096a7f d626451 080536a d626451 080536a d626451 080536a d626451 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
# Langchain imports
from langchain_groq import ChatGroq
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain_pinecone import PineconeVectorStore
# Embedding and model import
# Other
import streamlit as st
import os
import time
from PyPDF2 import PdfReader
import tempfile
import pdfplumber
st.title("Ask questions from your PDF(s) or website")
option = None
# Prompt user to choose between PDFs or website
option = st.radio("Choose input type:", ("PDF(s)", "Website"), index=None)
def get_pdf_processed(pdf_docs):
text = ""
for pdf in pdf_docs:
with pdfplumber.open(pdf) as pdf_file:
for page in pdf_file.pages:
text += page.extract_text()
return text
def llm_model():
# llm = ChatGroq(model="mixtral-8x7b-32768",groq_api_key=st.secrets['GROQ_API_KEY'])
llm = ChatGroq(model="mixtral-8x7b-32768",groq_api_key=groq_api_key)
prompt = ChatPromptTemplate.from_template(
"""
Answer the question based on the provided context only.
Please provide the most accurate response based on the question
<context>
{context}
</context>
Questions:{input}
"""
)
document_chain = create_stuff_documents_chain(llm,prompt)
retriever = st.session_state.vector.as_retriever() if st.session_state.vector else None
retrieval_chain = create_retrieval_chain(retriever,document_chain)
prompt = st.text_input("Input your question here")
if prompt:
start = time.process_time()
response = retrieval_chain.invoke({"input":prompt})
st.write(response['answer'])
st.write("Response time: ", time.process_time() - start)
# st.session_state.embeddings =GoogleGenerativeAIEmbeddings(model = 'models/embedding-001',google_api_key=st.secrets['GOOGLE_API_KEY'])
model_name = "all-MiniLM-L6-v2"
st.session_state.embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
st.session_state.text_splitter = RecursiveCharacterTextSplitter(chunk_size =1000, chunk_overlap= 200)
index_name = "myindex"
st.session_state.vector = PineconeVectorStore(index_name=index_name, embedding=st.session_state.embeddings)
if option:
if option == "Website":
website_link = st.text_input("Enter the website link:")
if website_link:
with st.spinner("Loading website content..."):
st.session_state.loader = WebBaseLoader(website_link)
st.session_state.docs = st.session_state.loader.load()
st.session_state.final_documents = st.session_state.text_splitter.split_documents(st.session_state.docs)
st.session_state.vector = PineconeVectorStore.from_documents(st.session_state.final_documents, index_name=index_name, embedding = st.session_state.embeddings)
st.success("Done!")
llm_model()
elif option == "PDF(s)":
pdf_files = st.file_uploader("Upload your PDF files", type=["pdf"], accept_multiple_files=True)
if pdf_files:
with st.spinner("Loading pdf..."):
st.session_state.docs = get_pdf_processed(pdf_files)
st.session_state.final_documents = st.session_state.text_splitter.split_text(st.session_state.docs)
st.session_state.vector = PineconeVectorStore.from_texts(st.session_state.final_documents, index_name=index_name, embedding = st.session_state.embeddings)
st.success("Done!")
st.empty()
llm_model()
|