File size: 21,751 Bytes
d7dc7e0 c114fc1 d7dc7e0 7e2c8a0 d7dc7e0 c114fc1 91e2918 c114fc1 67e558e 25414e5 c114fc1 25414e5 c114fc1 d7dc7e0 c114fc1 d7dc7e0 07bfeb2 d7dc7e0 c114fc1 d7dc7e0 c114fc1 d7dc7e0 c114fc1 d7dc7e0 c114fc1 25414e5 07bfeb2 c114fc1 25414e5 d7dc7e0 25414e5 dafc314 07bfeb2 d7dc7e0 c114fc1 d7dc7e0 c114fc1 d7dc7e0 07bfeb2 d7dc7e0 25414e5 07bfeb2 d7dc7e0 b4bff08 d7dc7e0 25414e5 d7dc7e0 07bfeb2 d7dc7e0 07bfeb2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 |
# import streamlit as st
# import os
# import pandas as pd
# import random
# from os.path import join
# from src import preprocess_and_load_df, load_agent, ask_agent, decorate_with_code, show_response, get_from_user, load_smart_df, ask_question
# from dotenv import load_dotenv
# from langchain_groq.chat_models import ChatGroq
# load_dotenv("Groq.txt")
# Groq_Token = os.environ["GROQ_API_KEY"]
# models = {"llama3":"llama3-70b-8192","mixtral": "mixtral-8x7b-32768", "llama2": "llama2-70b-4096", "gemma": "gemma-7b-it"}
# self_path = os.path.dirname(os.path.abspath(__file__))
# # Using HTML and CSS to center the title
# st.write(
# """
# <style>
# .title {
# text-align: center;
# color: #17becf;
# }
# """,
# unsafe_allow_html=True,
# )
# # Displaying the centered title
# st.markdown("<h2 class='title'>VayuBuddy</h2>", unsafe_allow_html=True)
# st.markdown("<div style='text-align:center; padding: 20px;'>VayuBuddy makes pollution monitoring easier by bridging the gap between users and datasets.<br>No coding required—just meaningful insights at your fingertips!</div>", unsafe_allow_html=True)
# # Center-aligned instruction text with bold formatting
# st.markdown("<div style='text-align:center;'>Choose a query from <b>Select a prompt</b> or type a query in the <b>chat box</b>, select a <b>LLM</b> (Large Language Model), and press enter to generate a response.</div>", unsafe_allow_html=True)
# # os.environ["PANDASAI_API_KEY"] = "$2a$10$gbmqKotzJOnqa7iYOun8eO50TxMD/6Zw1pLI2JEoqncwsNx4XeBS2"
# # with open(join(self_path, "context1.txt")) as f:
# # context = f.read().strip()
# # agent = load_agent(join(self_path, "app_trial_1.csv"), context)
# # df = preprocess_and_load_df(join(self_path, "Data.csv"))
# # inference_server = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2"
# # inference_server = "https://api-inference.huggingface.co/models/codellama/CodeLlama-13b-hf"
# # inference_server = "https://api-inference.huggingface.co/models/pandasai/bamboo-llm"
# model_name = st.sidebar.selectbox("Select LLM:", ["llama3","mixtral", "gemma"])
# questions = ('Custom Prompt',
# 'Plot the monthly average PM2.5 for the year 2023.',
# 'Which month in which year has the highest average PM2.5 overall?',
# 'Which month in which year has the highest PM2.5 overall?',
# 'Which month has the highest average PM2.5 in 2023 for Mumbai?',
# 'Plot and compare monthly timeseries of pollution for Mumbai and Bengaluru.',
# 'Plot the yearly average PM2.5.',
# 'Plot the monthly average PM2.5 of Delhi, Mumbai and Bengaluru for the year 2022.',
# 'Which month has the highest pollution?',
# 'Which city has the highest PM2.5 level in July 2022?',
# 'Plot and compare monthly timeseries of PM2.5 for Mumbai and Bengaluru.',
# 'Plot and compare the monthly average PM2.5 of Delhi, Mumbai and Bengaluru for the year 2022.',
# 'Plot the monthly average PM2.5.',
# 'Plot the monthly average PM10 for the year 2023.',
# 'Which (month, year) has the highest PM2.5?',
# 'Plot the monthly average PM2.5 of Delhi for the year 2022.',
# 'Plot the monthly average PM2.5 of Bengaluru for the year 2022.',
# 'Plot the monthly average PM2.5 of Mumbai for the year 2022.',
# 'Which state has the highest average PM2.5?',
# 'Plot monthly PM2.5 in Gujarat for 2023.',
# 'What is the name of the month with the highest average PM2.5 overall?')
# waiting_lines = ("Thinking...", "Just a moment...", "Let me think...", "Working on it...", "Processing...", "Hold on...", "One moment...", "On it...")
# # agent = load_agent(df, context="", inference_server=inference_server, name=model_name)
# # Initialize chat history
# if "responses" not in st.session_state:
# st.session_state.responses = []
# # Display chat responses from history on app rerun
# for response in st.session_state.responses:
# if not response["no_response"]:
# show_response(st, response)
# show = True
# if prompt := st.sidebar.selectbox("Select a Prompt:", questions):
# # add a note "select custom prompt to ask your own question"
# st.sidebar.info("Select 'Custom Prompt' to ask your own question.")
# if prompt == 'Custom Prompt':
# show = False
# # React to user input
# prompt = st.chat_input("Ask me anything about air quality!", key=10)
# if prompt : show = True
# if show :
# # Add user input to chat history
# response = get_from_user(prompt)
# response["no_response"] = False
# st.session_state.responses.append(response)
# # Display user input
# show_response(st, response)
# no_response = False
# # select random waiting line
# with st.spinner(random.choice(waiting_lines)):
# ran = False
# for i in range(1):
# print(f"Attempt {i+1}")
# llm = ChatGroq(model=models[model_name], api_key=os.getenv("GROQ_API"), temperature=0)
# df_check = pd.read_csv("Data.csv")
# df_check["Timestamp"] = pd.to_datetime(df_check["Timestamp"])
# df_check = df_check.head(5)
# new_line = "\n"
# parameters = {"font.size": 12}
# template = f"""```python
# import pandas as pd
# import matplotlib.pyplot as plt
# # plt.rcParams.update({parameters})
# df = pd.read_csv("Data.csv")
# df["Timestamp"] = pd.to_datetime(df["Timestamp"])
# import geopandas as gpd
# india = gpd.read_file("https://gist.githubusercontent.com/jbrobst/56c13bbbf9d97d187fea01ca62ea5112/raw/e388c4cae20aa53cb5090210a42ebb9b765c0a36/india_states.geojson")
# india.loc[india['ST_NM'].isin(['Ladakh', 'Jammu & Kashmir']), 'ST_NM'] = 'Jammu and Kashmir'
# # df.dtypes
# {new_line.join(map(lambda x: '# '+x, str(df_check.dtypes).split(new_line)))}
# # {prompt.strip()}
# # <your code here>
# ```
# """
# query = f"""I have a pandas dataframe data of PM2.5 and PM10.
# * The columns are 'Timestamp', 'station', 'PM2.5', 'PM10', 'address', 'city', 'latitude', 'longitude',and 'state'.
# * Frequency of data is daily.
# * `pollution` generally means `PM2.5`.
# * You already have df, so don't read the csv file
# * Don't print anything, but save result in a variable `answer` and make it global.
# * Unless explicitly mentioned, don't consider the result as a plot.
# * PM2.5 guidelines: India: 60, WHO: 15.
# * PM10 guidelines: India: 100, WHO: 50.
# * If result is a plot, show the India and WHO guidelines in the plot.
# * If result is a plot make it in tight layout, save it and save path in `answer`. Example: `answer='plot.png'`
# * If result is a plot, rotate x-axis tick labels by 45 degrees,
# * If result is not a plot, save it as a string in `answer`. Example: `answer='The city is Mumbai'`
# * I have a geopandas.geodataframe india containining the coordinates required to plot Indian Map with states.
# * If the query asks you to plot on India Map, use that geodataframe to plot and then add more points as per the requirements using the similar code as follows : v = ax.scatter(df['longitude'], df['latitude']). If the colorbar is required, use the following code : plt.colorbar(v)
# * If the query asks you to plot on India Map plot the India Map in Beige color
# * Whenever you do any sort of aggregation, report the corresponding standard deviation, standard error and the number of data points for that aggregation.
# * Whenever you're reporting a floating point number, round it to 2 decimal places.
# * Always report the unit of the data. Example: `The average PM2.5 is 45.67 µg/m³`
# Complete the following code.
# {template}
# """
# answer = None
# code = None
# try:
# answer = llm.invoke(query)
# code = f"""
# {template.split("```python")[1].split("```")[0]}
# {answer.content.split("```python")[1].split("```")[0]}
# """
# # update variable `answer` when code is executed
# exec(code)
# ran = True
# no_response = False
# except Exception as e:
# no_response = True
# exception = e
# if code is not None:
# answer = f"!!!Faced an error while working on your query. Please try again!!!"
# if type(answer) != str:
# answer = f"!!!Faced an error while working on your query. Please try again!!!"
# response = {"role": "assistant", "content": answer, "gen_code": code, "ex_code": code, "last_prompt": prompt, "no_response": no_response}
# # Get response from agent
# # response = ask_question(model_name=model_name, question=prompt)
# # response = ask_agent(agent, prompt)
# if ran:
# break
# # Display agent response
# if code is not None:
# # Add agent response to chat history
# print("Adding response")
# st.session_state.responses.append(response)
# show_response(st, response)
# if no_response:
# print("No response")
# st.error(f"Failed to generate right output due to the following error:\n\n{exception}")
# prompt = 'Custom Prompt'
####################################################Added User Feedback###################################################
import streamlit as st
import os
import pandas as pd
import random
from os.path import join
from src import preprocess_and_load_df, load_agent, ask_agent, decorate_with_code, show_response, get_from_user, load_smart_df, ask_question
from dotenv import load_dotenv
from langchain_groq.chat_models import ChatGroq
from datasets import Dataset, load_dataset, concatenate_datasets
import streamlit as st
from streamlit_feedback import streamlit_feedback
import uuid
from huggingface_hub import login, HfFolder
import os
# Set the token
token = os.getenv("HF_TOKEN") # Replace "YOUR_AUTHENTICATION_TOKEN" with your actual token
# Login using the token
login(token=token)
model_name = st.sidebar.selectbox("Select LLM:", ["llama3","mixtral", "gemma"])
contact_details = """
**Feel free to reach out to us:**
- [Nipun Batra](mailto:[email protected])
- [Zeel B Patel](mailto:[email protected])
- [Yash J Bachwana](mailto:[email protected])
"""
for _ in range(12):
st.sidebar.markdown(" ")
# Display contact details with message
st.sidebar.markdown("<hr>", unsafe_allow_html=True)
st.sidebar.markdown(contact_details, unsafe_allow_html=True)
# Function to push feedback data to Hugging Face Hub dataset
def push_to_dataset(feedback, comments,output,code,error):
# Load existing dataset or create a new one if it doesn't exist
try:
ds = load_dataset("YashB1/Feedbacks_eoc", split="evaluation")
except FileNotFoundError:
# If dataset doesn't exist, create a new one
ds = Dataset.from_dict({"feedback": [], "comments": [], "error": [], "output": [], "code": []})
# Add new feedback to the dataset
new_data = {"feedback": [feedback], "comments": [comments], "error": [error], "output": [output], "code": [code]} # Convert feedback and comments to lists
new_data = Dataset.from_dict(new_data)
ds = concatenate_datasets([ds, new_data])
# Push the updated dataset to Hugging Face Hub
ds.push_to_hub("YashB1/Feedbacks_eoc", split="evaluation")
load_dotenv("Groq.txt")
Groq_Token = os.environ["GROQ_API_KEY"]
models = {"llama3":"llama3-70b-8192","mixtral": "mixtral-8x7b-32768", "llama2": "llama2-70b-4096", "gemma": "gemma-7b-it"}
self_path = os.path.dirname(os.path.abspath(__file__))
# Using HTML and CSS to center the title
st.write(
"""
<style>
.title {
text-align: center;
color: #17becf;
}
""",
unsafe_allow_html=True,
)
# Displaying the centered title
st.markdown("<h2 class='title'>VayuBuddy</h2>", unsafe_allow_html=True)
st.markdown("<div style='text-align:center; padding: 20px;'>VayuBuddy makes pollution monitoring easier by bridging the gap between users and datasets.<br>No coding required—just meaningful insights at your fingertips!</div>", unsafe_allow_html=True)
# Center-aligned instruction text with bold formatting
st.markdown("<div style='text-align:center;'>Choose a query from <b>Select a prompt</b> or type a query in the <b>chat box</b>, select a <b>LLM</b> (Large Language Model), and press enter to generate a response.</div>", unsafe_allow_html=True)
# os.environ["PANDASAI_API_KEY"] = "$2a$10$gbmqKotzJOnqa7iYOun8eO50TxMD/6Zw1pLI2JEoqncwsNx4XeBS2"
# with open(join(self_path, "context1.txt")) as f:
# context = f.read().strip()
# agent = load_agent(join(self_path, "app_trial_1.csv"), context)
# df = preprocess_and_load_df(join(self_path, "Data.csv"))
# inference_server = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2"
# inference_server = "https://api-inference.huggingface.co/models/codellama/CodeLlama-13b-hf"
# inference_server = "https://api-inference.huggingface.co/models/pandasai/bamboo-llm"
# model_name = st.sidebar.selectbox("Select LLM:", ["llama3","mixtral", "gemma"])
if 'question_state' not in st.session_state:
st.session_state.question_state = False
if 'fbk' not in st.session_state:
st.session_state.fbk = str(uuid.uuid4())
if 'feedback' not in st.session_state:
st.session_state.feedback = None
if "chat_history" not in st.session_state:
st.session_state.chat_history = []
def display_answer():
for entry in st.session_state.chat_history:
with st.chat_message("human"):
st.write(entry["question"])
# st.write(entry["answer"])
# print(entry["answer"])
show_response(st, entry["answer"])
def fbcb(response):
"""Update the history with feedback.
The question and answer are already saved in history.
Now we will add the feedback in that history entry.
"""
display_answer() # display hist
# Create a new feedback by changing the key of feedback component.
st.session_state.fbk = str(uuid.uuid4())
question = st.chat_input(placeholder="Ask your question here .... !!!!")
if question:
# We need this because of feedback. That question above
# is a stopper. If user hits the feedback button, streamlit
# reruns the code from top and we cannot enter back because
# of that chat_input.
st.session_state.prompt = question
st.session_state.question_state = True
# We are now free because st.session_state.question_state is True.
# But there are consequences. We will have to handle
# the double runs of create_answer() and display_answer()
# just to get the user feedback.
if st.session_state.question_state:
waiting_lines = ("Thinking...", "Just a moment...", "Let me think...", "Working on it...", "Processing...", "Hold on...", "One moment...", "On it...")
with st.spinner(random.choice(waiting_lines)):
ran = False
for i in range(5):
print(f"Attempt {i+1}")
llm = ChatGroq(model=models[model_name], api_key=os.getenv("GROQ_API"), temperature=0)
df_check = pd.read_csv("Data.csv")
df_check["Timestamp"] = pd.to_datetime(df_check["Timestamp"])
df_check = df_check.head(5)
new_line = "\n"
parameters = {"font.size": 12}
# If the query asks you to make a gif/animation, don't use savefig to save it. Instead use ani.save(answer, writer='pillow').
# If the query asks you to make a gif/animation, don't use colormaps .
template = f"""```python
import pandas as pd
import matplotlib.pyplot as plt
# plt.rcParams.update({parameters})
df = pd.read_csv("Data.csv")
df["Timestamp"] = pd.to_datetime(df["Timestamp"])
import geopandas as gpd
file_path = "india_states.geojson"
india = gpd.read_file("https://gist.githubusercontent.com/jbrobst/56c13bbbf9d97d187fea01ca62ea5112/raw/e388c4cae20aa53cb5090210a42ebb9b765c0a36/india_states.geojson")
india.loc[india['ST_NM'].isin(['Ladakh', 'Jammu & Kashmir']), 'ST_NM'] = 'Jammu and Kashmir'
# df.dtypes
{new_line.join(map(lambda x: '# '+x, str(df_check.dtypes).split(new_line)))}
# {st.session_state.prompt.strip()}
# <your code here>
```
"""
query = f"""I have a pandas dataframe data of PM2.5 and PM10.
* The columns are 'Timestamp', 'station', 'PM2.5', 'PM10', 'address', 'city', 'latitude', 'longitude',and 'state'.
* Frequency of data is daily.
* `pollution` generally means `PM2.5`.
* You already have df, so don't read the csv file
* Don't print anything, but save result in a variable `answer` and make it global.
* Unless explicitly mentioned, don't consider the result as a plot.
* PM2.5 guidelines: India: 60, WHO: 15.
* PM10 guidelines: India: 100, WHO: 50.
* If query asks to plot calendarmap, use library calmap.
* If result is a plot, show the India and WHO guidelines in the plot.
* If result is a plot make it in tight layout, save it and save path in `answer`. Example: `answer='plot.png'`
* If result is a plot, rotate x-axis tick labels by 45 degrees,
* If result is not a plot, save it as a string in `answer`. Example: `answer='The city is Mumbai'`
* I have a geopandas.geodataframe india containining the coordinates required to plot Indian Map with states.
* If the query asks you to plot on India Map, use that geodataframe to plot and then add more points as per the requirements using the similar code as follows : v = ax.scatter(df['longitude'], df['latitude']). If the colorbar is required, use the following code : plt.colorbar(v)
* If the query asks you to plot on India Map plot the India Map in Beige color
* Whenever you do any sort of aggregation, report the corresponding standard deviation, standard error and the number of data points for that aggregation.
* Whenever you're reporting a floating point number, round it to 2 decimal places.
* Always report the unit of the data. Example: `The average PM2.5 is 45.67 µg/m³`
Complete the following code.
{template}
"""
answer = None
code = None
exception = None
try:
answer = llm.invoke(query)
code = f"""
{template.split("```python")[1].split("```")[0]}
{answer.content.split("```python")[1].split("```")[0]}
"""
# update variable `answer` when code is executed
exec(code)
ran = True
no_response = False
except Exception as e:
no_response = True
exception = e
if code is not None:
answer = f"!!!Faced an error while working on your query. Please try again!!!"
if type(answer) != str:
answer = f"!!!Faced an error while working on your query. Please try again!!!"
response = {"role": "assistant", "content": answer, "gen_code": code, "ex_code": code, "last_prompt": st.session_state.prompt, "no_response": no_response,"exception": exception}
# print(response)
if ran:
break
# Display agent response
if code is not None:
# Add agent response to chat history
if response['content'][-4:] == ".gif" :
# Provide a button to show the gif, we don't want it to run forever
st.image(response['content'], use_column_width=True)
response['content'] = ""
print("Adding response : ")
message_id = len(st.session_state.chat_history)
st.session_state.chat_history.append({
"question": st.session_state.prompt,
"answer": response,
"message_id": message_id,
})
display_answer()
if no_response:
print("No response")
st.error(f"Failed to generate right output due to the following error:\n\n{exception}")
# display_answer()
# Pressing a button in feedback reruns the code.
st.session_state.feedback = streamlit_feedback(
feedback_type="thumbs",
optional_text_label="[Optional]",
align="flex-start",
key=st.session_state.fbk,
on_submit=fbcb
)
print("FeedBack",st.session_state.feedback)
if st.session_state.feedback :
push_to_dataset(st.session_state.feedback['score'],st.session_state.feedback['text'],answer,code,exception)
st.success("Feedback submitted successfully!")
|