Waris01 commited on
Commit
66999f2
Β·
verified Β·
1 Parent(s): b154b72
Files changed (1) hide show
  1. app.py +68 -66
app.py CHANGED
@@ -1,66 +1,68 @@
1
- import streamlit as st
2
- import os
3
- import tempfile
4
- from dotenv import load_dotenv
5
- from langchain_groq import ChatGroq
6
- from langchain.embeddings import HuggingFaceEmbeddings
7
- from langchain.vectorstores import FAISS
8
- from langchain.chains import RetrievalQA
9
- from Datapreprocessing import PreprocessingData
10
- from pdfparsing import ExtractDatafrompdf
11
-
12
- load_dotenv()
13
- GROQ_API_KEY = os.getenv("GROQ_API_KEY")
14
-
15
- st.set_page_config(page_title="πŸ“„ Chat with PDF", layout="wide")
16
-
17
- # Sidebar for PDF Upload
18
- st.sidebar.title("πŸ“‚ Upload your PDF")
19
- uploaded_file = st.sidebar.file_uploader("Choose a PDF", type="pdf")
20
-
21
- # LLM and Embeddings - cached
22
- @st.cache_resource
23
- def get_embeddings():
24
- return HuggingFaceEmbeddings()
25
-
26
- @st.cache_resource
27
- def get_llm():
28
- return ChatGroq(api_key=GROQ_API_KEY, model="gemma2-9b-it", temperature=0.2)
29
-
30
- # Build Retrieval Chain
31
- def get_chain(retriever):
32
- llm = get_llm()
33
- return RetrievalQA.from_chain_type(llm=llm, retriever=retriever, chain_type="stuff")
34
-
35
- # PDF processing pipeline
36
- def process_pdf_and_create_chain(uploaded_file):
37
- with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
38
- tmp.write(uploaded_file.read())
39
- tmp_path = tmp.name
40
-
41
- documents = ExtractDatafrompdf(tmp_path)
42
- chunks = PreprocessingData(documents)
43
- embedder = get_embeddings()
44
- retriever = FAISS.from_documents(chunks, embedder).as_retriever(search_type="similarity", search_kwargs={"k": 1})
45
- return get_chain(retriever)
46
-
47
- # Main UI
48
- st.title("πŸ“„ Ask Questions About Your PDF")
49
-
50
- if uploaded_file:
51
- if "chain" not in st.session_state:
52
- st.success("PDF uploaded successfully! Processing...")
53
- with st.spinner("Extracting and chunking PDF..."):
54
- st.session_state.chain = process_pdf_and_create_chain(uploaded_file)
55
- st.success("Ready to chat with your PDF!")
56
- else:
57
- st.sidebar.info("Using cached PDF session.")
58
-
59
- user_query = st.text_input("Ask a question about your PDF:")
60
- if user_query:
61
- with st.spinner("Generating answer..."):
62
- result = st.session_state.chain.invoke({"query": user_query})
63
- st.markdown("### πŸ“Œ Answer:")
64
- st.write(result["result"])
65
- else:
66
- st.info("πŸ“€ Upload a PDF from the sidebar to begin.")
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import tempfile
4
+ from dotenv import load_dotenv
5
+ from langchain_groq import ChatGroq
6
+ from langchain.embeddings import HuggingFaceEmbeddings
7
+ from langchain.vectorstores import FAISS
8
+ from langchain.chains import RetrievalQA
9
+ from Datapreprocessing import PreprocessingData
10
+ from pdfparsing import ExtractDatafrompdf
11
+
12
+ load_dotenv()
13
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY")
14
+
15
+ st.set_page_config(page_title="πŸ“„ Chat with PDF", layout="wide")
16
+
17
+ # Sidebar for PDF Upload
18
+ st.sidebar.title("πŸ“‚ Upload your PDF")
19
+ uploaded_file = st.sidebar.file_uploader("Choose a PDF", type="pdf")
20
+
21
+ # LLM and Embeddings - cached
22
+ @st.cache_resource
23
+ def get_embeddings():
24
+ return HuggingFaceEmbeddings()
25
+
26
+ @st.cache_resource
27
+ def get_llm():
28
+ return ChatGroq(api_key=GROQ_API_KEY, model="gemma2-9b-it", temperature=0.2)
29
+
30
+ # Build Retrieval Chain
31
+ def get_chain(retriever):
32
+ llm = get_llm()
33
+ return RetrievalQA.from_chain_type(llm=llm, retriever=retriever, chain_type="stuff")
34
+
35
+ # PDF processing pipeline
36
+ def process_pdf_and_create_chain(uploaded_file):
37
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
38
+ tmp.write(uploaded_file.read())
39
+ tmp_path = tmp.name
40
+
41
+ documents = ExtractDatafrompdf(tmp_path)
42
+ chunks = PreprocessingData(documents)
43
+ embedder = get_embeddings()
44
+ retriever = FAISS.from_documents(chunks, embedder).as_retriever(search_type="similarity", search_kwargs={"k": 1})
45
+ return get_chain(retriever)
46
+
47
+ # Main UI
48
+ st.title("πŸ“„ Ask Questions About Your PDF")
49
+
50
+ if uploaded_file:
51
+ if "chain" not in st.session_state:
52
+ st.success("PDF uploaded successfully! Processing...")
53
+ with st.spinner("Extracting and chunking PDF..."):
54
+ st.session_state.chain = process_pdf_and_create_chain(uploaded_file)
55
+ st.success("Ready to chat with your PDF!")
56
+ else:
57
+ st.sidebar.info("Using cached PDF session.")
58
+
59
+ user_query = st.text_input("Ask a question about your PDF:")
60
+ submit = st.button("Search")
61
+ if submit:
62
+ if user_query:
63
+ with st.spinner("Generating answer..."):
64
+ result = st.session_state.chain.invoke({"query": user_query})
65
+ st.markdown("### πŸ“Œ Answer:")
66
+ st.write(result["result"])
67
+ else:
68
+ st.info("πŸ“€ Upload a PDF from the sidebar to begin.")