adrish commited on
Commit
bdf2b0d
·
1 Parent(s): 309441d

initial commit with chatbot

Browse files
Files changed (3) hide show
  1. .gitignore +1 -0
  2. app.py +290 -2
  3. requirements.txt +5 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .env
app.py CHANGED
@@ -1,4 +1,292 @@
1
  import streamlit as st
 
 
 
 
 
 
2
 
3
- x = st.slider('Select a value')
4
- st.write(x, 'squared is', x * x)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ import os
3
+ import PyPDF2
4
+ from io import BytesIO
5
+ from openai import OpenAI
6
+ from huggingface_hub import InferenceClient
7
+ from dotenv import load_dotenv
8
 
9
+ load_dotenv()
10
+
11
+ # ---------------------
12
+ # Utility Functions
13
+ # ---------------------
14
+
15
+
16
+ def authenticate():
17
+ """
18
+ A simple authentication mechanism using a password stored in an environment variable (APP_PASSWORD).
19
+
20
+ Returns True if the user is authenticated, otherwise stops the Streamlit execution.
21
+ """
22
+ app_password = os.getenv("APP_PASSWORD", None)
23
+ if not app_password:
24
+ st.warning("No password set for the app. Please set the 'APP_PASSWORD' environment variable.")
25
+ return True # Or return False if you want to block access
26
+
27
+ if "authenticated" not in st.session_state:
28
+ st.session_state["authenticated"] = False
29
+
30
+ if not st.session_state["authenticated"]:
31
+ st.text_input("Enter your access code:", type="password", key="login_password")
32
+ if st.button("Login"):
33
+ if st.session_state["login_password"] == app_password:
34
+ st.session_state["authenticated"] = True
35
+ st.experimental_rerun()
36
+ else:
37
+ st.error("Invalid password. Please try again.")
38
+ st.stop()
39
+
40
+ return st.session_state["authenticated"]
41
+
42
+
43
+ def read_pdf(file):
44
+ """
45
+ Reads a PDF file using PyPDF2 and returns the extracted text.
46
+ """
47
+ pdf_reader = PyPDF2.PdfReader(file)
48
+ text = []
49
+ for page_num in range(len(pdf_reader.pages)):
50
+ page = pdf_reader.pages[page_num]
51
+ text.append(page.extract_text())
52
+ return "\n".join(text)
53
+
54
+ def call_gpt_4o_api(
55
+ messages,
56
+ model,
57
+ temperature,
58
+ max_tokens,
59
+ stream
60
+ ):
61
+ """
62
+ Calls GPT-4o-compatible API (via OpenAI-like client).
63
+ Expects a list of messages (with "role" and "content" keys),
64
+ including the system message(s) as the first item(s) and
65
+ user/assistant messages subsequently.
66
+
67
+ Yields partial (streaming) or complete text.
68
+ """
69
+ client = OpenAI(api_key=os.getenv("OPENAI_API_KEY", ""))
70
+
71
+ # remove the second element from messages
72
+ # (likely the "additional PDF context" system message, or your second system message)
73
+ messages = [messages[0]] + messages[2:]
74
+
75
+ if stream:
76
+ response = client.chat.completions.create(
77
+ model=model,
78
+ messages=messages,
79
+ temperature=temperature,
80
+ max_tokens=max_tokens,
81
+ stream=True
82
+ )
83
+ partial_text = ""
84
+ for chunk in response:
85
+ delta = chunk.choices[0].delta
86
+ if hasattr(delta, "content") and delta.content:
87
+ partial_text += delta.content
88
+ yield partial_text
89
+ else:
90
+ response = client.chat.completions.create(
91
+ model=model,
92
+ messages=messages,
93
+ temperature=temperature,
94
+ max_tokens=max_tokens,
95
+ stream=False
96
+ )
97
+ complete_text = response.choices[0].message.content
98
+ yield complete_text
99
+
100
+ def call_hf_inference(
101
+ messages,
102
+ model_repo,
103
+ temperature=0.7,
104
+ max_tokens=200,
105
+ stream=False
106
+ ):
107
+ """
108
+ Calls a Hugging Face open-source LLM via the InferenceClient's chat endpoint.
109
+ Expects a list of messages (with "role" and "content"), including
110
+ system and user/assistant roles.
111
+
112
+ Yields partial (streaming) or complete text.
113
+ """
114
+ HF_TOKEN = os.getenv("HF_TOKEN", None)
115
+ if not HF_TOKEN:
116
+ raise ValueError("Please set your HF_TOKEN environment variable.")
117
+
118
+ client = InferenceClient(api_key=HF_TOKEN)
119
+
120
+ # remove the second element from messages
121
+ messages = [messages[0]] + messages[2:]
122
+
123
+ response = client.chat.completions.create(
124
+ model=model_repo,
125
+ messages=messages,
126
+ max_tokens=max_tokens,
127
+ temperature=temperature,
128
+ stream=stream
129
+ )
130
+
131
+ if stream:
132
+ partial_text = ""
133
+ for chunk in response:
134
+ delta = chunk.choices[0].delta
135
+ if isinstance(delta, dict):
136
+ chunk_content = delta.get("content", "")
137
+ partial_text += chunk_content
138
+ yield partial_text
139
+ else:
140
+ complete_text = response.choices[0].message["content"]
141
+ yield complete_text
142
+
143
+ # ---------------------
144
+ # Streamlit App
145
+ # ---------------------
146
+
147
+ def main():
148
+ if not authenticate():
149
+ st.stop() # or just `return` to end early
150
+
151
+ st.set_page_config(page_title="CVI-GPT", layout="centered")
152
+ st.title("CVI-GPT: Conversational Interface")
153
+
154
+ # ---------------------
155
+ # Sidebar: Model & Params
156
+ # ---------------------
157
+ st.sidebar.header("Model & Parameters")
158
+
159
+ # Model selection
160
+ model_choice = st.sidebar.selectbox(
161
+ "Select Model",
162
+ [
163
+ "gpt-4o",
164
+ "gpt-4o-mini",
165
+ "meta-llama/Llama-3.3-70B-Instruct",
166
+ "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
167
+ ]
168
+ )
169
+
170
+ # Temperature & max_tokens
171
+ temperature = st.sidebar.slider("Temperature", 0.0, 1.5, 0.7, 0.1)
172
+ max_tokens = st.sidebar.slider("Max Tokens", 50, 2000, 500, 50)
173
+
174
+ # We store the selected model in session_state to detect changes
175
+ if "selected_model" not in st.session_state:
176
+ st.session_state.selected_model = model_choice
177
+
178
+ # If the user changes the model, reset the conversation
179
+ if model_choice != st.session_state.selected_model:
180
+ st.session_state.selected_model = model_choice
181
+ st.session_state["messages"] = [
182
+ {"role": "assistant", "content": f"Model changed to `{model_choice}`. How can I help you?"}
183
+ ]
184
+
185
+ # System / Instruction Message
186
+ base_instructions = st.sidebar.text_area(
187
+ "System / Instruction Message",
188
+ value=(
189
+ "You are a Helpful Assistant. Respond in a concise, helpful, and markdown-friendly format.\n\n"
190
+ "Formatting Instructions:\n"
191
+ "- Responses should be in markdown.\n"
192
+ "- Use headings, bullet points, bold, italics, etc. for clarity.\n"
193
+ "- Use triple backticks for code blocks.\n"
194
+ "- Provide references or disclaimers when needed."
195
+ ),
196
+ height=200
197
+ )
198
+
199
+ # Clear Chat Button
200
+ if st.sidebar.button("Clear Chat"):
201
+ st.session_state["messages"] = [
202
+ {"role": "assistant", "content": "Chat cleared. How can I help you now?"}
203
+ ]
204
+
205
+ # ---------------------
206
+ # PDF Upload
207
+ # ---------------------
208
+ st.sidebar.header("Optional: PDF Upload")
209
+ uploaded_file = st.sidebar.file_uploader("Upload a PDF", type=["pdf"])
210
+ pdf_text = ""
211
+ if uploaded_file is not None:
212
+ pdf_text = read_pdf(uploaded_file)
213
+ # We do NOT print the PDF content. Just let user know it's loaded.
214
+ st.sidebar.write("PDF content loaded (not displayed).")
215
+
216
+ st.sidebar.divider()
217
+ with st.sidebar:
218
+ st.subheader("👨‍💻 Author: *Adrish Maity*", anchor=False)
219
+
220
+ # ---------------------
221
+ # Initialize conversation if not present
222
+ # ---------------------
223
+ if "messages" not in st.session_state:
224
+ st.session_state["messages"] = [
225
+ {"role": "assistant", "content": "Hello! How can I help you today?"}
226
+ ]
227
+
228
+ # ---------------------
229
+ # Display Conversation
230
+ # ---------------------
231
+ for msg in st.session_state["messages"]:
232
+ with st.chat_message(msg["role"]):
233
+ st.markdown(msg["content"])
234
+
235
+ # ---------------------
236
+ # Chat Input
237
+ # ---------------------
238
+ if user_input := st.chat_input("Type your question..."):
239
+ # Just store the user's typed text
240
+ user_text = user_input
241
+
242
+ st.session_state["messages"].append({"role": "user", "content": user_text})
243
+
244
+ # Display user's message
245
+ with st.chat_message("user"):
246
+ st.markdown(user_text)
247
+
248
+ # Now build the full conversation:
249
+ # 1) A system message (instructions)
250
+ # 2) A second system message with PDF context if present (kept hidden from UI)
251
+ # 3) All prior conversation
252
+ full_conversation = [{"role": "system", "content": base_instructions}]
253
+
254
+ if pdf_text:
255
+ full_conversation[0]["content"] += "\n\n" + "Additional PDF context (user provided):\n" + pdf_text
256
+
257
+ full_conversation.extend(st.session_state["messages"])
258
+
259
+ # Placeholder for assistant's streaming response
260
+ with st.chat_message("assistant"):
261
+ response_placeholder = st.empty()
262
+ streamed_text = ""
263
+
264
+ # Decide how to call the model
265
+ if model_choice in ["gpt-4o", "gpt-4o-mini"]:
266
+ stream_response = call_gpt_4o_api(
267
+ messages=full_conversation,
268
+ model=model_choice,
269
+ temperature=temperature,
270
+ max_tokens=max_tokens,
271
+ stream=True
272
+ )
273
+ for partial_output in stream_response:
274
+ streamed_text = partial_output
275
+ response_placeholder.markdown(streamed_text)
276
+ else:
277
+ hf_stream = call_hf_inference(
278
+ messages=full_conversation,
279
+ model_repo=model_choice,
280
+ temperature=temperature,
281
+ max_tokens=max_tokens,
282
+ stream=True
283
+ )
284
+ for partial_output in hf_stream:
285
+ streamed_text = partial_output
286
+ response_placeholder.markdown(streamed_text)
287
+
288
+ # Once done, store the final assistant message
289
+ st.session_state["messages"].append({"role": "assistant", "content": streamed_text})
290
+
291
+ if __name__ == "__main__":
292
+ main()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ streamlit==1.38.0
2
+ PyPDF2==3.0.1
3
+ requests==2.31.0
4
+ huggingface_hub==0.24.5
5
+ openai==1.60.0