Spaces:

jatinmehra
/

PDF-Insight-PRO

Running

App Files Files Community

Jatin Mehra commited on Aug 20, 2024

Commit

52c6dbe

0 Parent(s):

initial

Browse files

Files changed (3) hide show

app.py +154 -0
preprocessing.py +128 -0
requirements.txt +5 -0

app.py ADDED Viewed

	@@ -0,0 +1,154 @@

+import os
+import tempfile
+import streamlit as st
+from streamlit_chat import message
+from preprocessing import Model
+# Home Page Setup
+st.set_page_config(
+    page_title="PDF Insight Pro",
+    page_icon="📄",
+    layout="centered",
+)
+# Custom CSS for a more polished look
+st.markdown("""
+    <style>
+        .main {
+            background-color: #f5f5f5;
+        }
+        .stButton button {
+            background-color: #4CAF50;
+            color: white;
+            border-radius: 8px;
+        }
+        .stTextInput input {
+            border-radius: 8px;
+            padding: 10px;
+        }
+        .stFileUploader input {
+            border-radius: 8px;
+        }
+        .stMarkdown h1 {
+            color: #4CAF50;
+        }
+    </style>
+""", unsafe_allow_html=True)
+# Custom title and header
+st.title("📄 PDF Insight Pro")
+st.subheader("Empower Your Documents with AI-Driven Insights")
+def display_messages():
+    """
+    Displays the chat messages in the Streamlit UI.
+    """
+    st.subheader("🗨️ Conversation")
+    st.markdown("---")
+    for i, (msg, is_user) in enumerate(st.session_state["messages"]):
+        message(msg, is_user=is_user, key=str(i))
+    st.session_state["process_input_spinner"] = st.empty()
+def process_user_input():
+    """
+    Processes the user input by generating a response from the assistant.
+    """
+    if st.session_state["user_input"] and len(st.session_state["user_input"].strip()) > 0:
+        user_input = st.session_state["user_input"].strip()
+        with st.session_state["process_input_spinner"], st.spinner("Analyzing..."):
+            agent_response = st.session_state["assistant"].get_response(
+                user_input,
+                st.session_state["temperature"],
+                st.session_state["max_tokens"],
+                st.session_state["model"]
+            )
+        st.session_state["messages"].append((user_input, True))
+        st.session_state["messages"].append((agent_response, False))
+        st.session_state["user_input"] = ""
+def process_file():
+    """
+    Processes the uploaded PDF file and appends its content to the context.
+    """
+    for file in st.session_state["file_uploader"]:
+        with tempfile.NamedTemporaryFile(delete=False) as tf:
+            tf.write(file.getbuffer())
+            file_path = tf.name
+        with st.session_state["process_file_spinner"], st.spinner(f"Processing {file.name}..."):
+            try:
+                st.session_state["assistant"].add_to_context(file_path)
+            except Exception as e:
+                st.error(f"Failed to process file {file.name}: {str(e)}")
+        os.remove(file_path)
+def main_page():
+    """
+    Main function to set up the Streamlit UI and handle user interactions.
+    """
+    # Initialize session state variables
+    if "messages" not in st.session_state:
+        st.session_state["messages"] = []
+    if "assistant" not in st.session_state:
+        st.session_state["assistant"] = Model()
+    if "user_input" not in st.session_state:
+        st.session_state["user_input"] = ""
+    if "temperature" not in st.session_state:
+        st.session_state["temperature"] = 0.5
+    if "max_tokens" not in st.session_state:
+        st.session_state["max_tokens"] = 550
+    if "model" not in st.session_state:
+        st.session_state["model"] = "llama-3.1-8b-instant"
+    # File uploader
+    st.subheader("📤 Upload Your PDF Documents")
+    st.file_uploader(
+        "Choose PDF files to analyze",
+        type=["pdf"],
+        key="file_uploader",
+        on_change=process_file,
+        accept_multiple_files=True,
+    )
+    st.session_state["process_file_spinner"] = st.empty()
+    # Document management section
+    if st.session_state["assistant"].contexts:
+        st.subheader("🗂️ Manage Uploaded Documents")
+        for i, context in enumerate(st.session_state["assistant"].contexts):
+            st.text_area(f"Document {i+1} Context", context[:500] + "..." if len(context) > 500 else context, height=100)
+            if st.button(f"Remove Document {i+1}"):
+                st.session_state["assistant"].remove_from_context(i)
+    # Model settings
+    with st.expander("⚙️ Customize AI Settings", expanded=True):
+        st.slider("Sampling Temperature", min_value=0.0, max_value=1.0, step=0.1, key="temperature", help="Higher values make output more random.")
+        st.slider("Max Tokens", min_value=50, max_value=1000, step=50, key="max_tokens", help="Limits the length of the response.")
+        st.selectbox("Choose AI Model", ["llama-3.1-8b-instant", "llama3-70b-8192", "gemma-7b-it"], key="model")
+    # Display messages and input box
+    display_messages()
+    st.text_input("Type your query and hit Enter", key="user_input", on_change=process_user_input, placeholder="Ask something about your documents...")
+    # Developer info and bug report
+    st.subheader("🐞 Bug Report")
+    st.markdown("""
+        If you encounter any bugs or issues while using the app, please send a bug report to the developer. You can include a screenshot (optional) to help identify the problem.\n
+    """)
+    st.subheader("💡 Suggestions")
+    st.markdown("""
+        Suggestions to improve the app's UI and user interface are also welcome. Feel free to reach out to the developer with your suggestions.\n
+    """)
+    st.subheader("👨‍💻 Developer Info")
+    st.markdown("""
+        **Developer**: Jatin Mehra\n
+        **Email**: [email protected]\n
+        **Mobile**: 9910364780\n
+    """)
+if __name__ == "__main__":
+    main_page()

preprocessing.py ADDED Viewed

	@@ -0,0 +1,128 @@

+import os
+import PyPDF2
+from groq import Groq
+import streamlit as st
+from collections import defaultdict
+class Model:
+    """
+    A class that represents a model for generating responses based on a given context and query.
+    """
+    def __init__(self):
+        """
+        Initializes the Model object and sets up the Groq client.
+        """
+        # api_key = os.getenv("GROQ_API_KEY")
+        api_key = st.secrets["GROQ_API_KEY"]
+        if not api_key:
+            raise ValueError("GROQ_API_KEY environment variable is not set.")
+        self.client = Groq(api_key=api_key)
+        self.contexts = []
+        self.cache = defaultdict(dict)  # Caching for repeated queries
+    def extract_text_from_pdf(self, pdf_file):
+        """
+        Extracts text from a PDF file.
+        Args:
+        - pdf_file: The file-like object of the PDF.
+        Returns:
+        - text: The extracted text from the PDF file.
+        """
+        try:
+            pdf_reader = PyPDF2.PdfReader(pdf_file)
+            text = ""
+            for page in pdf_reader.pages:
+                text += page.extract_text()
+            return text
+        except Exception as e:
+            raise ValueError(f"Error extracting text: {str(e)}")
+    def generate_response(self, context, query, temperature, max_tokens, model):
+        """
+        Generates a response based on the given context and query.
+        Args:
+        - context: The context for generating the response.
+        - query: The query or question.
+        - temperature: The sampling temperature for response generation.
+        - max_tokens: The maximum number of tokens for the response.
+        - model: The model ID to be used for generating the response.
+        Returns:
+        - response: The generated response.
+        """
+        # Caching check
+        if query in self.cache and self.cache[query]["context"] == context:
+            return self.cache[query]["response"]
+        messages = [
+            {"role": "system", "content": f"Context: {context}"},
+            {"role": "user", "content": query},
+        ]
+        try:
+            completion = self.client.chat.completions.create(
+                model=model,  # Model ID
+                messages=messages,
+                temperature=temperature,
+                max_tokens=max_tokens,
+            )
+            response = completion.choices[0].message.content
+            self.cache[query]["context"] = context
+            self.cache[query]["response"] = response  # Cache the response
+            return response
+        except Exception as e:
+            return f"API request failed: {str(e)}"
+    def add_to_context(self, file_path: str):
+        """
+        Reads a PDF file and appends its content to the context for generating responses.
+        Args:
+        - file_path: The path to the PDF file.
+        """
+        try:
+            with open(file_path, "rb") as pdf_file:
+                context = self.extract_text_from_pdf(pdf_file)
+            self.contexts.append(context)
+        except Exception as e:
+            raise ValueError(f"Error processing PDF: {str(e)}")
+    def remove_from_context(self, index: int):
+        """
+        Removes a document from the context based on its index.
+        Args:
+        - index: The index of the document to remove.
+        """
+        if 0 <= index < len(self.contexts):
+            self.contexts.pop(index)
+        else:
+            raise ValueError("Invalid index for removing context.")
+    def get_combined_context(self):
+        """
+        Combines all contexts into a single context string.
+        Returns:
+        - combined_context: The combined context from all documents.
+        """
+        return "\n".join(self.contexts)
+    def get_response(self, question: str, temperature: float, max_tokens: int, model: str):
+        """
+        Generates a response based on the given question and the current combined context.
+        Args:
+        - question: The user's question.
+        - temperature: The sampling temperature for response generation.
+        - max_tokens: The maximum number of tokens for the response.
+        - model: The model ID to be used for generating the response.
+        Returns:
+        - response: The generated response or a prompt to upload a document.
+        """
+        if not self.contexts:
+            return "Please upload a document."
+        combined_context = self.get_combined_context()
+        return self.generate_response(combined_context, question, temperature, max_tokens, model)
+    def clear(self):
+        """
+        Clears the current context.
+        """
+        self.contexts = []
+        self.cache.clear()

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+streamlit==1.31.1
+streamlit-chat==0.1.1
+langchain-community==0.0.24
+PyPDF2==3.0.1
+groq==0.9.0