Jatin Mehra commited on
Commit
52c6dbe
·
0 Parent(s):
Files changed (3) hide show
  1. app.py +154 -0
  2. preprocessing.py +128 -0
  3. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tempfile
3
+ import streamlit as st
4
+ from streamlit_chat import message
5
+ from preprocessing import Model
6
+
7
+ # Home Page Setup
8
+ st.set_page_config(
9
+ page_title="PDF Insight Pro",
10
+ page_icon="📄",
11
+ layout="centered",
12
+ )
13
+
14
+ # Custom CSS for a more polished look
15
+ st.markdown("""
16
+ <style>
17
+ .main {
18
+ background-color: #f5f5f5;
19
+ }
20
+ .stButton button {
21
+ background-color: #4CAF50;
22
+ color: white;
23
+ border-radius: 8px;
24
+ }
25
+ .stTextInput input {
26
+ border-radius: 8px;
27
+ padding: 10px;
28
+ }
29
+ .stFileUploader input {
30
+ border-radius: 8px;
31
+ }
32
+ .stMarkdown h1 {
33
+ color: #4CAF50;
34
+ }
35
+ </style>
36
+ """, unsafe_allow_html=True)
37
+
38
+ # Custom title and header
39
+ st.title("📄 PDF Insight Pro")
40
+ st.subheader("Empower Your Documents with AI-Driven Insights")
41
+
42
+ def display_messages():
43
+ """
44
+ Displays the chat messages in the Streamlit UI.
45
+ """
46
+ st.subheader("🗨️ Conversation")
47
+ st.markdown("---")
48
+ for i, (msg, is_user) in enumerate(st.session_state["messages"]):
49
+ message(msg, is_user=is_user, key=str(i))
50
+ st.session_state["process_input_spinner"] = st.empty()
51
+
52
+ def process_user_input():
53
+ """
54
+ Processes the user input by generating a response from the assistant.
55
+ """
56
+ if st.session_state["user_input"] and len(st.session_state["user_input"].strip()) > 0:
57
+ user_input = st.session_state["user_input"].strip()
58
+ with st.session_state["process_input_spinner"], st.spinner("Analyzing..."):
59
+ agent_response = st.session_state["assistant"].get_response(
60
+ user_input,
61
+ st.session_state["temperature"],
62
+ st.session_state["max_tokens"],
63
+ st.session_state["model"]
64
+ )
65
+
66
+ st.session_state["messages"].append((user_input, True))
67
+ st.session_state["messages"].append((agent_response, False))
68
+ st.session_state["user_input"] = ""
69
+
70
+ def process_file():
71
+ """
72
+ Processes the uploaded PDF file and appends its content to the context.
73
+ """
74
+ for file in st.session_state["file_uploader"]:
75
+ with tempfile.NamedTemporaryFile(delete=False) as tf:
76
+ tf.write(file.getbuffer())
77
+ file_path = tf.name
78
+
79
+ with st.session_state["process_file_spinner"], st.spinner(f"Processing {file.name}..."):
80
+ try:
81
+ st.session_state["assistant"].add_to_context(file_path)
82
+ except Exception as e:
83
+ st.error(f"Failed to process file {file.name}: {str(e)}")
84
+ os.remove(file_path)
85
+
86
+ def main_page():
87
+ """
88
+ Main function to set up the Streamlit UI and handle user interactions.
89
+ """
90
+ # Initialize session state variables
91
+ if "messages" not in st.session_state:
92
+ st.session_state["messages"] = []
93
+
94
+ if "assistant" not in st.session_state:
95
+ st.session_state["assistant"] = Model()
96
+
97
+ if "user_input" not in st.session_state:
98
+ st.session_state["user_input"] = ""
99
+
100
+ if "temperature" not in st.session_state:
101
+ st.session_state["temperature"] = 0.5
102
+
103
+ if "max_tokens" not in st.session_state:
104
+ st.session_state["max_tokens"] = 550
105
+
106
+ if "model" not in st.session_state:
107
+ st.session_state["model"] = "llama-3.1-8b-instant"
108
+
109
+ # File uploader
110
+ st.subheader("📤 Upload Your PDF Documents")
111
+ st.file_uploader(
112
+ "Choose PDF files to analyze",
113
+ type=["pdf"],
114
+ key="file_uploader",
115
+ on_change=process_file,
116
+ accept_multiple_files=True,
117
+ )
118
+
119
+ st.session_state["process_file_spinner"] = st.empty()
120
+
121
+ # Document management section
122
+ if st.session_state["assistant"].contexts:
123
+ st.subheader("🗂️ Manage Uploaded Documents")
124
+ for i, context in enumerate(st.session_state["assistant"].contexts):
125
+ st.text_area(f"Document {i+1} Context", context[:500] + "..." if len(context) > 500 else context, height=100)
126
+ if st.button(f"Remove Document {i+1}"):
127
+ st.session_state["assistant"].remove_from_context(i)
128
+
129
+ # Model settings
130
+ with st.expander("⚙️ Customize AI Settings", expanded=True):
131
+ st.slider("Sampling Temperature", min_value=0.0, max_value=1.0, step=0.1, key="temperature", help="Higher values make output more random.")
132
+ st.slider("Max Tokens", min_value=50, max_value=1000, step=50, key="max_tokens", help="Limits the length of the response.")
133
+ st.selectbox("Choose AI Model", ["llama-3.1-8b-instant", "llama3-70b-8192", "gemma-7b-it"], key="model")
134
+
135
+ # Display messages and input box
136
+ display_messages()
137
+ st.text_input("Type your query and hit Enter", key="user_input", on_change=process_user_input, placeholder="Ask something about your documents...")
138
+ # Developer info and bug report
139
+ st.subheader("🐞 Bug Report")
140
+ st.markdown("""
141
+ If you encounter any bugs or issues while using the app, please send a bug report to the developer. You can include a screenshot (optional) to help identify the problem.\n
142
+ """)
143
+ st.subheader("💡 Suggestions")
144
+ st.markdown("""
145
+ Suggestions to improve the app's UI and user interface are also welcome. Feel free to reach out to the developer with your suggestions.\n
146
+ """)
147
+ st.subheader("👨‍💻 Developer Info")
148
+ st.markdown("""
149
+ **Developer**: Jatin Mehra\n
150
+ **Email**: [email protected]\n
151
+ **Mobile**: 9910364780\n
152
+ """)
153
+ if __name__ == "__main__":
154
+ main_page()
preprocessing.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import PyPDF2
3
+ from groq import Groq
4
+ import streamlit as st
5
+ from collections import defaultdict
6
+
7
+ class Model:
8
+ """
9
+ A class that represents a model for generating responses based on a given context and query.
10
+ """
11
+
12
+ def __init__(self):
13
+ """
14
+ Initializes the Model object and sets up the Groq client.
15
+ """
16
+ # api_key = os.getenv("GROQ_API_KEY")
17
+ api_key = st.secrets["GROQ_API_KEY"]
18
+ if not api_key:
19
+ raise ValueError("GROQ_API_KEY environment variable is not set.")
20
+ self.client = Groq(api_key=api_key)
21
+ self.contexts = []
22
+ self.cache = defaultdict(dict) # Caching for repeated queries
23
+
24
+ def extract_text_from_pdf(self, pdf_file):
25
+ """
26
+ Extracts text from a PDF file.
27
+ Args:
28
+ - pdf_file: The file-like object of the PDF.
29
+ Returns:
30
+ - text: The extracted text from the PDF file.
31
+ """
32
+ try:
33
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
34
+ text = ""
35
+ for page in pdf_reader.pages:
36
+ text += page.extract_text()
37
+ return text
38
+ except Exception as e:
39
+ raise ValueError(f"Error extracting text: {str(e)}")
40
+
41
+ def generate_response(self, context, query, temperature, max_tokens, model):
42
+ """
43
+ Generates a response based on the given context and query.
44
+ Args:
45
+ - context: The context for generating the response.
46
+ - query: The query or question.
47
+ - temperature: The sampling temperature for response generation.
48
+ - max_tokens: The maximum number of tokens for the response.
49
+ - model: The model ID to be used for generating the response.
50
+ Returns:
51
+ - response: The generated response.
52
+ """
53
+ # Caching check
54
+ if query in self.cache and self.cache[query]["context"] == context:
55
+ return self.cache[query]["response"]
56
+
57
+ messages = [
58
+ {"role": "system", "content": f"Context: {context}"},
59
+ {"role": "user", "content": query},
60
+ ]
61
+ try:
62
+ completion = self.client.chat.completions.create(
63
+ model=model, # Model ID
64
+ messages=messages,
65
+ temperature=temperature,
66
+ max_tokens=max_tokens,
67
+ )
68
+ response = completion.choices[0].message.content
69
+ self.cache[query]["context"] = context
70
+ self.cache[query]["response"] = response # Cache the response
71
+ return response
72
+ except Exception as e:
73
+ return f"API request failed: {str(e)}"
74
+
75
+ def add_to_context(self, file_path: str):
76
+ """
77
+ Reads a PDF file and appends its content to the context for generating responses.
78
+ Args:
79
+ - file_path: The path to the PDF file.
80
+ """
81
+ try:
82
+ with open(file_path, "rb") as pdf_file:
83
+ context = self.extract_text_from_pdf(pdf_file)
84
+ self.contexts.append(context)
85
+ except Exception as e:
86
+ raise ValueError(f"Error processing PDF: {str(e)}")
87
+
88
+ def remove_from_context(self, index: int):
89
+ """
90
+ Removes a document from the context based on its index.
91
+ Args:
92
+ - index: The index of the document to remove.
93
+ """
94
+ if 0 <= index < len(self.contexts):
95
+ self.contexts.pop(index)
96
+ else:
97
+ raise ValueError("Invalid index for removing context.")
98
+
99
+ def get_combined_context(self):
100
+ """
101
+ Combines all contexts into a single context string.
102
+ Returns:
103
+ - combined_context: The combined context from all documents.
104
+ """
105
+ return "\n".join(self.contexts)
106
+
107
+ def get_response(self, question: str, temperature: float, max_tokens: int, model: str):
108
+ """
109
+ Generates a response based on the given question and the current combined context.
110
+ Args:
111
+ - question: The user's question.
112
+ - temperature: The sampling temperature for response generation.
113
+ - max_tokens: The maximum number of tokens for the response.
114
+ - model: The model ID to be used for generating the response.
115
+ Returns:
116
+ - response: The generated response or a prompt to upload a document.
117
+ """
118
+ if not self.contexts:
119
+ return "Please upload a document."
120
+ combined_context = self.get_combined_context()
121
+ return self.generate_response(combined_context, question, temperature, max_tokens, model)
122
+
123
+ def clear(self):
124
+ """
125
+ Clears the current context.
126
+ """
127
+ self.contexts = []
128
+ self.cache.clear()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ streamlit==1.31.1
2
+ streamlit-chat==0.1.1
3
+ langchain-community==0.0.24
4
+ PyPDF2==3.0.1
5
+ groq==0.9.0