VayuChat / test_image.py
Nipun's picture
Implement ultra-high DPI plots and fix UI responsiveness
95b3c75
raw
history blame
5.46 kB
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import uuid
import calendar
import numpy as np
# Set professional matplotlib styling with high resolution
#plt.style.use('vayuchat.mplstyle')
df = pd.read_csv("AQ_met_data.csv")
df["Timestamp"] = pd.to_datetime(df["Timestamp"])
states_df = pd.read_csv("states_data.csv")
ncap_df = pd.read_csv("ncap_funding_data.csv")
# df is pandas DataFrame with air quality data from India. Data frequency is daily from 2017 to 2024. The data has the following columns and data types:
# Unnamed: 0 int64
# Timestamp datetime64[ns]
# State object
# City object
# Station object
# site_id object
# Year int64
# PM2.5 (µg/m³) float64
# PM10 (µg/m³) float64
# NO (µg/m³) float64
# NO2 (µg/m³) float64
# NOx (ppb) float64
# NH3 (µg/m³) float64
# SO2 (µg/m³) float64
# CO (mg/m³) float64
# Ozone (µg/m³) float64
# AT (°C) float64
# RH (%) float64
# WS (m/s) float64
# WD (deg) float64
# RF (mm) float64
# TOT-RF (mm) float64
# SR (W/mt2) float64
# BP (mmHg) float64
# VWS (m/s) float64
# dtype: object
# states_df is a pandas DataFrame of state-wise population, area and whether state is union territory or not of India.
# state object
# population int64
# area (km2) int64
# isUnionTerritory bool
# dtype: object
# ncap_df is a pandas DataFrame of funding given to the cities of India from 2019-2022, under The National Clean Air Program (NCAP).
# S. No. int64
# state object
# city object
# Amount released during FY 2019-20 float64
# Amount released during FY 2020-21 float64
# Amount released during FY 2021-22 float64
# Total fund released float64
# Utilisation as on June 2022 float64
# dtype: object
# Question: Compare the wind speed and PM2.5 levels during Delhi’s most polluted week (highest PM2.5) in December 2024 with the previous 15 days and the following 15 days on a time series plot.
# Generate code to answer the question and save result in 'answer' variable
# If creating a plot, save it with a unique filename and store the filename in 'answer'
# If returning text/numbers, store the result directly in 'answer'
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import uuid
import numpy as np
# Ensure data is loaded
if df.empty:
answer = "No data available"
else:
try:
# Filter for Delhi in December 2024
df_delhi = df[
(df['City'].str.contains('Delhi', case=False, na=False)) &
(df['Timestamp'].dt.year == 2024) &
(df['Timestamp'].dt.month == 12)
].copy()
df_delhi = df_delhi.dropna(subset=['PM2.5 (µg/m³)', 'WS (m/s)'])
df_delhi = df_delhi.sort_values('Timestamp')
# Need at least a full week to compute rolling mean
if len(df_delhi) < 7:
answer = "Insufficient data"
else:
# Compute 7‑day rolling mean of PM2.5
df_delhi['PM2.5_roll7'] = df_delhi['PM2.5 (µg/m³)'].rolling(window=7, min_periods=7).mean()
# Identify the window with the highest mean PM2.5
max_idx = df_delhi['PM2.5_roll7'].idxmax()
max_end_date = df_delhi.loc[max_idx, 'Timestamp']
max_start_date = max_end_date - pd.Timedelta(days=6)
# Define extended window: 15 days before start and 15 days after end
ext_start = max_start_date - pd.Timedelta(days=15)
ext_end = max_end_date + pd.Timedelta(days=15)
# Filter data for the extended period
mask = (df_delhi['Timestamp'] >= ext_start) & (df_delhi['Timestamp'] <= ext_end)
df_plot = df_delhi.loc[mask].copy()
if df_plot.empty or len(df_plot) < 30:
answer = "Insufficient data"
else:
# Plot time series
plt.figure(figsize=(9, 6))
ax1 = plt.gca()
sns.lineplot(data=df_plot, x='Timestamp', y='PM2.5 (µg/m³)', ax=ax1,
label='PM2.5 (µg/m³)', color='tab:red')
ax1.set_ylabel('PM2.5 (µg/m³)', color='tab:red')
ax1.tick_params(axis='y', labelcolor='tab:red')
ax2 = ax1.twinx()
sns.lineplot(data=df_plot, x='Timestamp', y='WS (m/s)', ax=ax2,
label='Wind Speed (m/s)', color='tab:blue')
ax2.set_ylabel('Wind Speed (m/s)', color='tab:blue')
ax2.tick_params(axis='y', labelcolor='tab:blue')
plt.title('Delhi – PM2.5 and Wind Speed around Most Polluted Week (Dec 2024)')
plt.xlabel('Date')
plt.tight_layout()
# Save plot
filename = f"plot.png"
plt.savefig(filename, dpi=1200, bbox_inches='tight', facecolor='white')
plt.close()
answer = filename
except Exception as e:
answer = "Unable to complete analysis with available data"