Spaces:

SustainabilityLabIITGN
/

VayuBuddy

Running

App Files Files Community

YashB1 commited on May 6, 2024

Commit

25414e5

verified ·

1 Parent(s): 071b823

Update app.py

Browse files

Files changed (1) hide show

app.py +241 -70

app.py CHANGED Viewed

@@ -1,4 +1,200 @@
-import superimport
 import streamlit as st
 import os
 import pandas as pd
@@ -28,7 +224,10 @@ st.write(
 # Displaying the centered title
 st.markdown("<h2 class='title'>VayuBuddy</h2>", unsafe_allow_html=True)
 # os.environ["PANDASAI_API_KEY"] = "$2a$10$gbmqKotzJOnqa7iYOun8eO50TxMD/6Zw1pLI2JEoqncwsNx4XeBS2"
 # with open(join(self_path, "context1.txt")) as f:
@@ -44,20 +243,19 @@ model_name = st.sidebar.selectbox("Select LLM:", ["llama3","mixtral", "gemma"])
 questions = ('Custom Prompt',
              'Plot the monthly average PM2.5 for the year 2023.',
-             'Which month has the highest average PM2.5 overall?',
-             'Which month has the highest PM2.5 overall?',
              'Which month has the highest average PM2.5 in 2023 for Mumbai?',
              'Plot and compare monthly timeseries of pollution for Mumbai and Bengaluru.',
              'Plot the yearly average PM2.5.',
              'Plot the monthly average PM2.5 of Delhi, Mumbai and Bengaluru for the year 2022.',
              'Which month has the highest pollution?',
-             'Plot the monthly average PM2.5 of Delhi for the year 2022.',
              'Which city has the highest PM2.5 level in July 2022?',
              'Plot and compare monthly timeseries of PM2.5 for Mumbai and Bengaluru.',
              'Plot and compare the monthly average PM2.5 of Delhi, Mumbai and Bengaluru for the year 2022.',
              'Plot the monthly average PM2.5.',
              'Plot the monthly average PM10 for the year 2023.',
-             'Which month has the highest PM2.5?',
              'Plot the monthly average PM2.5 of Delhi for the year 2022.',
              'Plot the monthly average PM2.5 of Bengaluru for the year 2022.',
              'Plot the monthly average PM2.5 of Mumbai for the year 2022.',
@@ -105,8 +303,9 @@ if prompt := st.sidebar.selectbox("Select a Prompt:", questions):
         # select random waiting line
         with st.spinner(random.choice(waiting_lines)):
             ran = False
-            for i in range(5):
-                llm = ChatGroq(model=models[model_name], api_key=os.getenv("GROQ_API"), temperature=0.1)
                 df_check = pd.read_csv("Data.csv")
                 df_check["Timestamp"] = pd.to_datetime(df_check["Timestamp"])
@@ -114,62 +313,19 @@ if prompt := st.sidebar.selectbox("Select a Prompt:", questions):
                 new_line = "\n"
-                parameters = {"font.size": 18}
                 template = f"""```python
 import pandas as pd
 import matplotlib.pyplot as plt
-plt.rcParams.update({parameters})
 df = pd.read_csv("Data.csv")
 df["Timestamp"] = pd.to_datetime(df["Timestamp"])
-def calculator(Pollutant, concentration):
-    Calculator_index = Pollutant
-    breakpoints_low = {{
-        "O3": [0, 50, 100, 168, 208, 748],
-        "PM2.5": [0, 30, 60, 90, 120, 250],
-        "PM10": [0, 50, 100, 250, 350, 430],
-        "CO": [0, 1000, 2000, 10000, 17000, 34000],
-        "SO2": [0, 40, 80, 380, 800, 1600],
-        "NO2": [0, 40, 80, 180, 280, 400]
-    }}
-    breakpoints_high = {{
-        "O3": [50, 100, 168, 208, 748,1000],
-        "PM2.5": [30, 60, 90, 120, 250,1000],
-        "PM10": [50, 100, 250, 350, 430,1000],
-        "CO": [1000, 2000, 10000, 17000, 34000,50000],
-        "SO2": [40, 80, 380, 800, 1600,2000],
-        "NO2": [ 40, 80, 180, 280, 400,1000]
-    }}
-    # Define corresponding AQI categories
-    categories_low= [0, 50, 100, 200, 300, 400]
-    categories_high = [50, 100, 200, 300, 400,500]
-    # Find the appropriate AQI category based on concentration
-    for i in range(len(breakpoints_high[Calculator_index])):
-        if concentration <= breakpoints_high[Calculator_index][i]:
-            BPHI = breakpoints_high[Calculator_index][i]
-            IHI = categories_high[i]
-            # Calculate AQI using India formula
-            #AQI = ((categories[i] - categories[i-1]) / (breakpoints[Calculator_index][i] - breakpoints[Calculator_index][i-1])) * (concentration - breakpoints[Calculator_index][i-1]) + categories[i-1]
-            #st.sidebar.write(f"The Air Quality Index (AQI) for {{Calculator_index}} is: {{AQI}}")
-            break
-    for i in range(len(breakpoints_low[Calculator_index])):
-        if concentration >= breakpoints_low[Calculator_index][i]:
-            BPLI = breakpoints_low[Calculator_index][i]
-            ILI = categories_low[i]
-            # Calculate AQI using India formula
-            #AQI = ((categories[i] - categories[i-1]) / (breakpoints[Calculator_index][i] - breakpoints[Calculator_index][i-1])) * (concentration - breakpoints[Calculator_index][i-1]) + categories[i-1]
-            #st.sidebar.write(f"The Air Quality Index (AQI) for {{Calculator_index}} is: {{AQI}}")
-            break
-    AQI = ((IHI - ILI) / (BPHI - BPLI)) * (round(concentration) - BPLI) + ILI
-    return AQI
 # df.dtypes
 {new_line.join(map(lambda x: '# '+x, str(df_check.dtypes).split(new_line)))}
@@ -180,16 +336,21 @@ def calculator(Pollutant, concentration):
 """
                 query = f"""I have a pandas dataframe data of PM2.5 and PM10.
                 * Frequency of data is daily.
                 * `pollution` generally means `PM2.5`.
                 * You already have df, so don't read the csv file
-                * Don't print, but save result in a variable `answer` and make it global.
                 * Unless explicitly mentioned, don't consider the result as a plot.
                 * PM2.5 guidelines: India: 60, WHO: 15.
                 * PM10 guidelines: India: 100, WHO: 50.
 				* If result is a plot, show the India and WHO guidelines in the plot.
                 * If result is a plot make it in tight layout, save it and save path in `answer`. Example: `answer='plot.png'`
                 * If result is not a plot, save it as a string in `answer`. Example: `answer='The city is Mumbai'`
                 * Whenever you do any sort of aggregation, report the corresponding standard deviation, standard error and the number of data points for that aggregation.
                 * Whenever you're reporting a floating point number, round it to 2 decimal places.
                 * Always report the unit of the data. Example: `The average PM2.5 is 45.67 µg/m³`
@@ -199,21 +360,27 @@ def calculator(Pollutant, concentration):
                 {template}
                 """
-                answer = llm.invoke(query)
-                code = f"""
-                {template.split("```python")[1].split("```")[0]}
-                {answer.content.split("```python")[1].split("```")[0]}
-                """
-                # update variable `answer` when code is executed
                 try:
                     exec(code)
                     ran = True
                     no_response = False
                 except Exception as e:
                     no_response = True
                     exception = e
                 response = {"role": "assistant", "content": answer, "gen_code": code, "ex_code": code, "last_prompt": prompt, "no_response": no_response}
                 # Get response from agent
@@ -221,15 +388,19 @@ def calculator(Pollutant, concentration):
                 # response = ask_agent(agent, prompt)
                 if ran:
-                    break
         if no_response:
             st.error(f"Failed to generate right output due to the following error:\n\n{exception}")
-        # Add agent response to chat history
-        st.session_state.responses.append(response)
-        # Display agent response
-        if not no_response:
-            show_response(st, response)
-        del prompt

+# import streamlit as st
+# import os
+# import pandas as pd
+# import random
+# from os.path import join
+# from src import preprocess_and_load_df, load_agent, ask_agent, decorate_with_code, show_response, get_from_user, load_smart_df, ask_question
+# from dotenv import load_dotenv
+# from langchain_groq.chat_models import ChatGroq
+# load_dotenv("Groq.txt")
+# Groq_Token = os.environ["GROQ_API_KEY"]
+# models = {"llama3":"llama3-70b-8192","mixtral": "mixtral-8x7b-32768", "llama2": "llama2-70b-4096", "gemma": "gemma-7b-it"}
+# self_path = os.path.dirname(os.path.abspath(__file__))
+# # Using HTML and CSS to center the title
+# st.write(
+#     """
+#     <style>
+#     .title {
+#         text-align: center;
+#         color: #17becf;
+#     }
+# """,
+#     unsafe_allow_html=True,
+# )
+# # Displaying the centered title
+# st.markdown("<h2 class='title'>VayuBuddy</h2>", unsafe_allow_html=True)
+# # os.environ["PANDASAI_API_KEY"] = "$2a$10$gbmqKotzJOnqa7iYOun8eO50TxMD/6Zw1pLI2JEoqncwsNx4XeBS2"
+# # with open(join(self_path, "context1.txt")) as f:
+# #     context = f.read().strip()
+# # agent = load_agent(join(self_path, "app_trial_1.csv"), context)
+# # df = preprocess_and_load_df(join(self_path, "Data.csv"))
+# # inference_server = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2"
+# # inference_server = "https://api-inference.huggingface.co/models/codellama/CodeLlama-13b-hf"
+# # inference_server = "https://api-inference.huggingface.co/models/pandasai/bamboo-llm"
+# model_name = st.sidebar.selectbox("Select LLM:", ["llama3","mixtral", "llama2", "gemma"])
+# questions = ('Custom Prompt',
+#              'Plot the monthly average PM2.5 for the year 2023.',
+#              'Which month has the highest average PM2.5 overall?',
+#              'Which month has the highest PM2.5 overall?',
+#              'Which month has the highest average PM2.5 in 2023 for Mumbai?',
+#              'Plot and compare monthly timeseries of pollution for Mumbai and Bengaluru.',
+#              'Plot the yearly average PM2.5.',
+#              'Plot the monthly average PM2.5 of Delhi',
+#              'Mumbai and Bengaluru for the year 2022.',
+#              'Which month has the highest pollution?',
+#              'Plot the monthly average PM2.5 of Delhi for the year 2022.',
+#              'Which city has the highest PM2.5 level in July 2022?',
+#              'Plot and compare monthly timeseries of PM2.5 for Mumbai and Bengaluru.',
+#              'Plot and compare the monthly average PM2.5 of Delhi, Mumbai and Bengaluru for the year 2022.',
+#              'Plot the monthly average PM2.5.',
+#              'Plot the monthly average PM10 for the year 2023.',
+#              'Which month has the highest PM2.5?',
+#              'Plot the monthly average PM2.5 of Delhi for the year 2022.',
+#              'Plot the monthly average PM2.5 of Bengaluru for the year 2022.',
+#              'Plot the monthly average PM2.5 of Mumbai for the year 2022.',
+#              'Which state has the highest average PM2.5?',
+#              'Plot monthly PM2.5 in Gujarat for 2023.',
+#              'What is the name of the month with the highest average PM2.5 overall?')
+# waiting_lines = ("Thinking...", "Just a moment...", "Let me think...", "Working on it...", "Processing...", "Hold on...", "One moment...", "On it...")
+# # agent = load_agent(df, context="", inference_server=inference_server, name=model_name)
+# # Initialize chat history
+# if "responses" not in st.session_state:
+#     st.session_state.responses = []
+# # Display chat responses from history on app rerun
+# for response in st.session_state.responses:
+#     if not response["no_response"]:
+#         show_response(st, response)
+# show = True
+# prompt = st.sidebar.selectbox("Select a Prompt:", questions)
+# # add a note "select custom prompt to ask your own question"
+# if prompt == 'Custom Prompt':
+#     show = False
+#     # React to user input
+#     prompt = st.chat_input("Ask me anything about air quality!", key=10)
+#     if prompt:
+#         show = True
+# if show:
+#     # Add user input to chat history
+#     response = get_from_user(prompt)
+#     response["no_response"] = False
+#     st.session_state.responses.append(response)
+#     # Display user input
+#     show_response(st, response)
+#     no_response = False
+#     # select random waiting line
+#     with st.spinner(random.choice(waiting_lines)):
+#         ran = False
+#         for i in range(5):
+#             llm = ChatGroq(model=models[model_name], api_key=os.getenv("GROQ_API"), temperature=0.1)
+#             df_check = pd.read_csv("Data.csv")
+#             df_check["Timestamp"] = pd.to_datetime(df_check["Timestamp"])
+#             df_check = df_check.head(5)
+#             new_line = "\n"
+#             template = f"""```python
+# import pandas as pd
+# import matplotlib.pyplot as plt
+# df = pd.read_csv("Data.csv")
+# df["Timestamp"] = pd.to_datetime(df["Timestamp"])
+# # df.dtypes
+# {new_line.join(map(lambda x: '# '+x, str(df_check.dtypes).split(new_line)))}
+# # {prompt.strip()}
+# # <your code here>
+# ```
+# """
+#             query = f"""I have a pandas dataframe data of PM2.5 and PM10.
+#                 * Frequency of data is daily.
+#                 * `pollution` generally means `PM2.5`.
+#                 * You already have df, so don't read the csv file
+#                 * Don't print, but save result in a variable `answer` and make it global.
+#                 * Unless explicitly mentioned, don't consider the result as a plot.
+#                 * PM2.5 guidelines: India: 60, WHO: 15.
+#                 * PM10 guidelines: India: 100, WHO: 50.
+# 				* If result is a plot, show the India and WHO guidelines in the plot.
+#                 * If result is a plot make it in tight layout, save it and save path in `answer`. Example: `answer='plot.png'`
+#                 * If result is a plot, rotate x-axis tick labels by 45 degrees,
+#                 * If result is not a plot, save it as a string in `answer`. Example: `answer='The city is Mumbai'`
+#                 * Whenever you do any sort of aggregation, report the corresponding standard deviation, standard error and the number of data points for that aggregation.
+#                 * Whenever you're reporting a floating point number, round it to 2 decimal places.
+#                 * Always report the unit of the data. Example: `The average PM2.5 is 45.67 µg/m³`
+#                 Complete the following code.
+#             {template}
+#             """
+#             answer = llm.invoke(query)
+#             code = f"""
+#             {template.split("```python")[1].split("```")[0]}
+#             {answer.content.split("```python")[1].split("```")[0]}
+#             """
+#             # update variable `answer` when code is executed
+#             try:
+#                 exec(code)
+#                 ran = True
+#                 no_response = False
+#             except Exception as e:
+#                 no_response = True
+#                 exception = e
+#             response = {"role": "assistant", "content": answer, "gen_code": code, "ex_code": code, "last_prompt": prompt, "no_response": no_response}
+#             # Get response from agent
+#             # response = ask_question(model_name=model_name, question=prompt)
+#             # response = ask_agent(agent, prompt)
+#             if ran:
+#                 break
+#     if no_response:
+#         st.error(f"Failed to generate right output due to the following error:\n\n{exception}")
+#     # Add agent response to chat history
+#     st.session_state.responses.append(response)
+#     # Display agent response
+#     if not no_response:
+#         show_response(st, response)
+#     del prompt
 import streamlit as st
 import os
 import pandas as pd
 # Displaying the centered title
 st.markdown("<h2 class='title'>VayuBuddy</h2>", unsafe_allow_html=True)
+st.markdown("<div style='text-align:center; padding: 20px;'>VayuBuddy makes pollution monitoring easier by bridging the gap between users and datasets.<br>No coding required—just meaningful insights at your fingertips!</div>", unsafe_allow_html=True)
+# Center-aligned instruction text with bold formatting
+st.markdown("<div style='text-align:center;'>Choose a query from <b>Select a prompt</b> or type a query in the <b>chat box</b>, select a <b>LLM</b> (Large Language Model), and press enter to generate a response.</div>", unsafe_allow_html=True)
 # os.environ["PANDASAI_API_KEY"] = "$2a$10$gbmqKotzJOnqa7iYOun8eO50TxMD/6Zw1pLI2JEoqncwsNx4XeBS2"
 # with open(join(self_path, "context1.txt")) as f:
 questions = ('Custom Prompt',
              'Plot the monthly average PM2.5 for the year 2023.',
+             'Which month in which year has the highest average PM2.5 overall?',
+             'Which month in which year has the highest PM2.5 overall?',
              'Which month has the highest average PM2.5 in 2023 for Mumbai?',
              'Plot and compare monthly timeseries of pollution for Mumbai and Bengaluru.',
              'Plot the yearly average PM2.5.',
              'Plot the monthly average PM2.5 of Delhi, Mumbai and Bengaluru for the year 2022.',
              'Which month has the highest pollution?',
              'Which city has the highest PM2.5 level in July 2022?',
              'Plot and compare monthly timeseries of PM2.5 for Mumbai and Bengaluru.',
              'Plot and compare the monthly average PM2.5 of Delhi, Mumbai and Bengaluru for the year 2022.',
              'Plot the monthly average PM2.5.',
              'Plot the monthly average PM10 for the year 2023.',
+             'Which (month, year) has the highest PM2.5?',
              'Plot the monthly average PM2.5 of Delhi for the year 2022.',
              'Plot the monthly average PM2.5 of Bengaluru for the year 2022.',
              'Plot the monthly average PM2.5 of Mumbai for the year 2022.',
         # select random waiting line
         with st.spinner(random.choice(waiting_lines)):
             ran = False
+            for i in range(1):
+                print(f"Attempt {i+1}")
+                llm = ChatGroq(model=models[model_name], api_key=os.getenv("GROQ_API"), temperature=0)
                 df_check = pd.read_csv("Data.csv")
                 df_check["Timestamp"] = pd.to_datetime(df_check["Timestamp"])
                 new_line = "\n"
+                parameters = {"font.size": 12}
                 template = f"""```python
 import pandas as pd
 import matplotlib.pyplot as plt
+# plt.rcParams.update({parameters})
 df = pd.read_csv("Data.csv")
 df["Timestamp"] = pd.to_datetime(df["Timestamp"])
+import geopandas as gpd
+india = gpd.read_file("https://gist.githubusercontent.com/jbrobst/56c13bbbf9d97d187fea01ca62ea5112/raw/e388c4cae20aa53cb5090210a42ebb9b765c0a36/india_states.geojson")
 # df.dtypes
 {new_line.join(map(lambda x: '# '+x, str(df_check.dtypes).split(new_line)))}
 """
                 query = f"""I have a pandas dataframe data of PM2.5 and PM10.
+                * The columns are 'Timestamp', 'station', 'PM2.5', 'PM10', 'address', 'city', 'latitude', 'longitude',and 'state'.
                 * Frequency of data is daily.
                 * `pollution` generally means `PM2.5`.
                 * You already have df, so don't read the csv file
+                * Don't print anything, but save result in a variable `answer` and make it global.
                 * Unless explicitly mentioned, don't consider the result as a plot.
                 * PM2.5 guidelines: India: 60, WHO: 15.
                 * PM10 guidelines: India: 100, WHO: 50.
 				* If result is a plot, show the India and WHO guidelines in the plot.
                 * If result is a plot make it in tight layout, save it and save path in `answer`. Example: `answer='plot.png'`
+                * If result is a plot, rotate x-axis tick labels by 45 degrees,
                 * If result is not a plot, save it as a string in `answer`. Example: `answer='The city is Mumbai'`
+                * I have a geopandas.geodataframe india containining the coordinates required to plot Indian Map with states.
+                * If the query asks you to plot on India Map, use that geodataframe to plot and then add more points as per the requirements using the similar code as follows : v = ax.scatter(df['longitude'], df['latitude']). If the colorbar is required, use the following code : plt.colorbar(v)
+                * If the query asks you to plot on India Map plot the India Map in Beige color
                 * Whenever you do any sort of aggregation, report the corresponding standard deviation, standard error and the number of data points for that aggregation.
                 * Whenever you're reporting a floating point number, round it to 2 decimal places.
                 * Always report the unit of the data. Example: `The average PM2.5 is 45.67 µg/m³`
                 {template}
                 """
+                answer = None
+                code = None
                 try:
+                    answer = llm.invoke(query)
+                    code = f"""
+                    {template.split("```python")[1].split("```")[0]}
+                    {answer.content.split("```python")[1].split("```")[0]}
+                    """
+                    # update variable `answer` when code is executed
                     exec(code)
                     ran = True
                     no_response = False
                 except Exception as e:
                     no_response = True
                     exception = e
+                    if code is not None:
+                        answer = f"!!!Faced an error while working on your query. Please try again!!!"
+                if type(answer) != str:
+                    answer = f"!!!Faced an error while working on your query. Please try again!!!"
                 response = {"role": "assistant", "content": answer, "gen_code": code, "ex_code": code, "last_prompt": prompt, "no_response": no_response}
                 # Get response from agent
                 # response = ask_agent(agent, prompt)
                 if ran:
+                    break
+        # Display agent response
+        if code is not None:
+            # Add agent response to chat history
+            print("Adding response")
+            st.session_state.responses.append(response)
+            show_response(st, response)
         if no_response:
+            print("No response")
             st.error(f"Failed to generate right output due to the following error:\n\n{exception}")
+        prompt = 'Custom Prompt'