File size: 10,671 Bytes
c114fc1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0d241de
c114fc1
 
0d241de
c114fc1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
import streamlit as st
import os
import pandas as pd
import random
from os.path import join
from src import preprocess_and_load_df, load_agent, ask_agent, decorate_with_code, show_response, get_from_user, load_smart_df, ask_question
from dotenv import load_dotenv
from langchain_groq.chat_models import ChatGroq

load_dotenv("Groq.txt")
Groq_Token = os.environ["GROQ_API_KEY"]
models = {"llama3":"llama3-70b-8192","mixtral": "mixtral-8x7b-32768", "llama2": "llama2-70b-4096", "gemma": "gemma-7b-it"}

self_path = os.path.dirname(os.path.abspath(__file__))

# Using HTML and CSS to center the title
st.write(
    """
    <style>
    .title {
        text-align: center;
        color: #17becf;
    }
""",
    unsafe_allow_html=True,
)

# Displaying the centered title
st.markdown("<h2 class='title'>VayuBuddy</h2>", unsafe_allow_html=True)

# os.environ["PANDASAI_API_KEY"] = "$2a$10$gbmqKotzJOnqa7iYOun8eO50TxMD/6Zw1pLI2JEoqncwsNx4XeBS2"

# with open(join(self_path, "context1.txt")) as f:
#     context = f.read().strip()

# agent = load_agent(join(self_path, "app_trial_1.csv"), context)
# df = preprocess_and_load_df(join(self_path, "Data.csv"))
# inference_server = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2"
# inference_server = "https://api-inference.huggingface.co/models/codellama/CodeLlama-13b-hf"
# inference_server = "https://api-inference.huggingface.co/models/pandasai/bamboo-llm"

model_name = st.sidebar.selectbox("Select LLM:", ["llama3","mixtral", "llama2", "gemma"])

questions = ('Custom Prompt',
             'Plot the monthly average PM2.5 for the year 2023.',
             'Which month has the highest average PM2.5 overall?',
             'Which month has the highest PM2.5 overall?',
             'Which month has the highest average PM2.5 in 2023 for Mumbai?',
             'Plot and compare monthly timeseries of pollution for Mumbai and Bengaluru.',
             'Plot the yearly average PM2.5.',
             'Plot the monthly average PM2.5 of Delhi',
             'Mumbai and Bengaluru for the year 2022.',
             'Which month has the highest pollution?',
             'Plot the monthly average PM2.5 of Delhi for the year 2022.',
             'Which city has the highest PM2.5 level in July 2022?',
             'Plot and compare monthly timeseries of PM2.5 for Mumbai and Bengaluru.',
             'Plot and compare the monthly average PM2.5 of Delhi, Mumbai and Bengaluru for the year 2022.',
             'Plot the monthly average PM2.5.',
             'Plot the monthly average PM10 for the year 2023.',
             'Which month has the highest PM2.5?',
             'Plot the monthly average PM2.5 of Delhi for the year 2022.',
             'Plot the monthly average PM2.5 of Bengaluru for the year 2022.',
             'Plot the monthly average PM2.5 of Mumbai for the year 2022.',
             'Which state has the highest average PM2.5?',
             'Plot monthly PM2.5 in Gujarat for 2023.',
             'What is the name of the month with the highest average PM2.5 overall?')

waiting_lines = ("Thinking...", "Just a moment...", "Let me think...", "Working on it...", "Processing...", "Hold on...", "One moment...", "On it...")

# agent = load_agent(df, context="", inference_server=inference_server, name=model_name)

# Initialize chat history
if "responses" not in st.session_state:
    st.session_state.responses = []

# Display chat responses from history on app rerun
for response in st.session_state.responses:
    if not response["no_response"]:
        show_response(st, response)

show = True

prompt = st.sidebar.selectbox("Select a Prompt:", questions)

# add a note "select custom prompt to ask your own question"


if prompt == 'Custom Prompt':
    show = False
    # React to user input
    prompt = st.chat_input("Ask me anything about air quality!", key=10)
    if prompt:
        show = True

if show:
    
    # Add user input to chat history
    response = get_from_user(prompt)
    response["no_response"] = False
    st.session_state.responses.append(response)
    
    # Display user input
    show_response(st, response)

    no_response = False

    # select random waiting line
    with st.spinner(random.choice(waiting_lines)):
        ran = False
        for i in range(5):
            llm = ChatGroq(model=models[model_name], api_key=os.getenv("GROQ_API"), temperature=0.1)

            df_check = pd.read_csv("Data.csv")
            df_check["Timestamp"] = pd.to_datetime(df_check["Timestamp"])
            df_check = df_check.head(5)

            new_line = "\n"

            template = f"""```python
import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv("Data.csv")
df["Timestamp"] = pd.to_datetime(df["Timestamp"])

# df.dtypes
{new_line.join(map(lambda x: '# '+x, str(df_check.dtypes).split(new_line)))}

# {prompt.strip()}
# <your code here>
```
"""

            query = f"""I have a pandas dataframe data of PM2.5 and PM10.
            * Frequency of data is daily. 
            * Number of stations in a city is determined by finding the unique stations in the dataset along with their city
            * `pollution` generally means `PM2.5`.
            * PM2.5 guidelines: India: 60, WHO: 25.
            * PM10 guidelines: India: 100, WHO: 50.
            * You already have df, so don't read the csv file 
            * Don't print, but save result in a variable `answer` and make it global.
            * Unless explicitly mentioned, don't consider the result as a plot.
            * If result is a plot make it in tight layout, save it and save path in `answer`. Example: `answer='plot.png'`
            * If result is not a plot, save it as a string in `answer`. Example: `answer='The city is Mumbai'`
            * If result is not a plot, return a csv file containing the data and the corresponding answer, as well as the data samples used 
            * If result is a plot, show the India and WHO guidelines in the plot.
            * Whenever you do an aggregation, do it via mean and report the standard deviation and standard error, report the number of data points.
            * Whenever you're reporting a floating point number, round it to 2 decimal places.
            * Always report the unit of the data. Example: `The average PM2.5 is 45.67 µg/m³`
            * If the result is a plot, make it using tableau 20 colour scheme and big font size.
            * Consider station and sensor synonymously. 
            Complete the following code.

            {template}

            """
            
            answer = llm.invoke(query)
            code = f"""
            {template.split("```python")[1].split("```")[0]}
            {answer.content.split("```python")[1].split("```")[0]}
            """
            # update variable `answer` when code is executed
            try:
                exec(code)
                ran = True
                no_response = False
            except Exception as e:
                no_response = True
                exception = e

            response = {"role": "assistant", "content": answer, "gen_code": code, "ex_code": code, "last_prompt": prompt, "no_response": no_response}

            # Get response from agent
            # response = ask_question(model_name=model_name, question=prompt)
            # response = ask_agent(agent, prompt)

            if ran:
                break
    
    if no_response:
        st.error(f"Failed to generate right output due to the following error:\n\n{exception}")
    # Add agent response to chat history
    st.session_state.responses.append(response)

    # Display agent response
    if not no_response:
        show_response(st, response)

    del prompt



st.sidebar.info("\nCalculator")
Pollutant = ["O3", "PM2.5", "PM10", "CO", "SO2", "NO2"]
Calculator_index = st.sidebar.selectbox("Select a Prompt:", Pollutant)

if Calculator_index:
    concentration = st.sidebar.number_input(f"Enter {Calculator_index} concentration (µg/m³):")
    calculate_button = st.sidebar.button("Calculate")
    if concentration:        
        if calculate_button:
            # Define breakpoints and AQI categories for the selected pollutant
            breakpoints_low = {
                "O3": [0, 50, 100, 168, 208, 748],
                "PM2.5": [0, 30, 60, 90, 120, 250],
                "PM10": [0, 50, 100, 250, 350, 430],
                "CO": [0, 1000, 2000, 10000, 17000, 34000],
                "SO2": [0, 40, 80, 380, 800, 1600],
                "NO2": [0, 40, 80, 180, 280, 400]
            }
            
            breakpoints_high = {
                "O3": [50, 100, 168, 208, 748,1000],
                "PM2.5": [30, 60, 90, 120, 250,1000],
                "PM10": [50, 100, 250, 350, 430,1000],
                "CO": [1000, 2000, 10000, 17000, 34000,50000],
                "SO2": [40, 80, 380, 800, 1600,2000],
                "NO2": [ 40, 80, 180, 280, 400,1000]
            }
            # Define corresponding AQI categories
            categories_low= [0, 50, 100, 200, 300, 400]
            categories_high = [50, 100, 200, 300, 400,500] 

            # Find the appropriate AQI category based on concentration

            for i in range(len(breakpoints_high[Calculator_index])):
                if concentration <= breakpoints_high[Calculator_index][i]:
                    BPHI = breakpoints_high[Calculator_index][i]
                    IHI = categories_high[i]
                    # Calculate AQI using India formula
                    #AQI = ((categories[i] - categories[i-1]) / (breakpoints[Calculator_index][i] - breakpoints[Calculator_index][i-1])) * (concentration - breakpoints[Calculator_index][i-1]) + categories[i-1]
                    #st.sidebar.write(f"The Air Quality Index (AQI) for {Calculator_index} is: {AQI}")
                    break

            for i in range(len(breakpoints_low[Calculator_index])):
                if concentration >= breakpoints_low[Calculator_index][i]:
                    BPLI = breakpoints_low[Calculator_index][i]
                    ILI = categories_low[i]
                    # Calculate AQI using India formula
                    #AQI = ((categories[i] - categories[i-1]) / (breakpoints[Calculator_index][i] - breakpoints[Calculator_index][i-1])) * (concentration - breakpoints[Calculator_index][i-1]) + categories[i-1]
                    #st.sidebar.write(f"The Air Quality Index (AQI) for {Calculator_index} is: {AQI}")
                    break

            AQI = ((IHI - ILI) / (BPHI - BPLI)) * (round(concentration) - BPLI) + ILI 
            st.sidebar.write(f"The Air Quality Index (AQI) for {Calculator_index} is: {AQI}")