nakas commited on
Commit
24539ce
·
verified ·
1 Parent(s): 669cefc

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +388 -0
app.py ADDED
@@ -0,0 +1,388 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ import gradio as gr
3
+ import requests
4
+ import pandas as pd
5
+ import numpy as np
6
+ from datetime import datetime, timedelta
7
+ import matplotlib.pyplot as plt
8
+ from matplotlib.gridspec import GridSpec
9
+ import json
10
+ import time
11
+ from bs4 import BeautifulSoup
12
+
13
+ # Selenium-related imports
14
+ from selenium import webdriver
15
+ from selenium.webdriver.chrome.options import Options
16
+ from selenium.webdriver.common.by import By
17
+ from selenium.webdriver.support.ui import WebDriverWait
18
+ from selenium.webdriver.support import expected_conditions as EC
19
+ from selenium.webdriver.chrome.service import Service
20
+ from webdriver_manager.chrome import ChromeDriverManager
21
+
22
+ def get_raw_data(station_id):
23
+ """
24
+ Get raw data from the NWS API.
25
+ """
26
+ headers = {
27
+ 'User-Agent': '(Weather Data Viewer, [email protected])',
28
+ 'Accept': 'application/json'
29
+ }
30
+
31
+ # Calculate date range for last 3 days
32
+ end_time = datetime.utcnow()
33
+ start_time = end_time - timedelta(hours=72)
34
+
35
+ params = {
36
+ 'start': start_time.isoformat() + 'Z',
37
+ 'end': end_time.isoformat() + 'Z'
38
+ }
39
+
40
+ url = f"https://api.weather.gov/stations/{station_id}/observations"
41
+
42
+ try:
43
+ print("\nFetching observations...")
44
+ print(f"URL: {url}")
45
+ print(f"Time range: {start_time} to {end_time}")
46
+ response = requests.get(url, headers=headers, params=params)
47
+ print(f"Response status: {response.status_code}")
48
+
49
+ if response.status_code != 200:
50
+ print(f"Response content: {response.text}")
51
+ response.raise_for_status()
52
+
53
+ data = response.json()
54
+
55
+ if 'features' in data:
56
+ print(f"\nNumber of observations: {len(data['features'])}")
57
+ if len(data['features']) > 0:
58
+ print("\nFirst observation properties:")
59
+ print(json.dumps(data['features'][0]['properties'], indent=2))
60
+ keys = set()
61
+ for feature in data['features']:
62
+ keys.update(feature['properties'].keys())
63
+ print("\nAll available property keys:")
64
+ print(sorted(list(keys)))
65
+
66
+ return data
67
+ except Exception as e:
68
+ print(f"Error fetching data: {e}")
69
+ import traceback
70
+ traceback.print_exc()
71
+ return None
72
+
73
+ def scrape_snow_depth():
74
+ """
75
+ Uses Selenium with a headless browser to load the weather.gov timeseries page,
76
+ waits until an element containing 'Snow Depth' is present, then extracts the table data.
77
+ Returns a DataFrame with columns "timestamp" and "snowDepth".
78
+ """
79
+ url = ("https://www.weather.gov/wrh/timeseries?"
80
+ "site=YCTIM&hours=720&units=english&chart=on&headers=on&"
81
+ "obs=tabular&hourly=false&pview=standard&font=12&plot=")
82
+
83
+ # Set up headless Chrome options
84
+ chrome_options = Options()
85
+ chrome_options.add_argument("--headless")
86
+ chrome_options.add_argument("--no-sandbox")
87
+ chrome_options.add_argument("--disable-dev-shm-usage")
88
+ # Tell Selenium where Chromium is located
89
+ chrome_options.binary_location = "/usr/bin/chromium-browser"
90
+
91
+ # Initialize Chrome using the Service object (Selenium 4+)
92
+ service = Service(ChromeDriverManager().install())
93
+ driver = webdriver.Chrome(service=service, options=chrome_options)
94
+
95
+ driver.get(url)
96
+
97
+ try:
98
+ # Wait up to 30 seconds for any element containing the text "Snow Depth" to appear
99
+ WebDriverWait(driver, 30).until(
100
+ EC.presence_of_element_located((By.XPATH, "//*[contains(text(), 'Snow Depth')]"))
101
+ )
102
+ except Exception as e:
103
+ print("Timeout waiting for 'Snow Depth' element to appear:", e)
104
+
105
+ # Allow extra time for dynamic content to load
106
+ time.sleep(5)
107
+
108
+ page_source = driver.page_source
109
+ driver.quit()
110
+
111
+ soup = BeautifulSoup(page_source, 'html.parser')
112
+
113
+ # Look through all tables for one that contains "Snow Depth" in its text
114
+ tables = soup.find_all("table")
115
+ target_table = None
116
+ for table in tables:
117
+ table_text = table.get_text()
118
+ print("Found table text snippet:", table_text[:100])
119
+ if "Snow Depth" in table_text:
120
+ target_table = table
121
+ break
122
+
123
+ if target_table is None:
124
+ print("No table with 'Snow Depth' found in the page.")
125
+ return pd.DataFrame()
126
+
127
+ # Look for header cells in the table
128
+ header_row = target_table.find("tr")
129
+ if not header_row:
130
+ print("No header row found in the table.")
131
+ return pd.DataFrame()
132
+ headers = [th.get_text(strip=True) for th in header_row.find_all("th")]
133
+ print("Table headers found:", headers)
134
+
135
+ # Identify column indices (using case-insensitive match)
136
+ time_index = None
137
+ snow_index = None
138
+ for i, header in enumerate(headers):
139
+ if "time" in header.lower():
140
+ time_index = i
141
+ if "snow" in header.lower():
142
+ snow_index = i
143
+ if time_index is None or snow_index is None:
144
+ print("Required columns ('Time' and 'Snow Depth') not found in the table headers.")
145
+ return pd.DataFrame()
146
+
147
+ # Extract rows (skip header)
148
+ data = []
149
+ rows = target_table.find_all("tr")[1:]
150
+ for row in rows:
151
+ cells = row.find_all("td")
152
+ if len(cells) <= max(time_index, snow_index):
153
+ continue
154
+ time_text = cells[time_index].get_text(strip=True)
155
+ snow_text = cells[snow_index].get_text(strip=True)
156
+ data.append((time_text, snow_text))
157
+
158
+ df = pd.DataFrame(data, columns=["Time", "Snow Depth"])
159
+ # Convert the "Time" column to datetime
160
+ df["Time"] = pd.to_datetime(df["Time"], errors="coerce")
161
+ # Convert "Snow Depth" to numeric (in inches)
162
+ df["Snow Depth"] = pd.to_numeric(df["Snow Depth"], errors="coerce")
163
+ print("Scraped snow depth data:")
164
+ print(df.head())
165
+
166
+ # Rename columns to match API data
167
+ return df.rename(columns={"Time": "timestamp", "Snow Depth": "snowDepth"})
168
+
169
+ def parse_raw_data(data):
170
+ """
171
+ Parse the raw JSON API data into a DataFrame.
172
+ """
173
+ if not data or 'features' not in data:
174
+ return None
175
+
176
+ records = []
177
+ for feature in data['features']:
178
+ props = feature['properties']
179
+ # Extract any snow-related fields if present
180
+ snow_fields = {k: v for k, v in props.items() if 'snow' in k.lower()}
181
+ if snow_fields:
182
+ print("\nFound snow-related fields:")
183
+ for k, v in snow_fields.items():
184
+ print(f"{k}: {v}")
185
+
186
+ record = {
187
+ 'timestamp': props['timestamp'],
188
+ 'temperature': props.get('temperature', {}).get('value'),
189
+ 'wind_speed': props.get('windSpeed', {}).get('value'),
190
+ 'wind_direction': props.get('windDirection', {}).get('value')
191
+ }
192
+ # Add any snow fields
193
+ for k, v in snow_fields.items():
194
+ if isinstance(v, dict) and 'value' in v:
195
+ record[k] = v['value']
196
+ else:
197
+ record[k] = v
198
+
199
+ records.append(record)
200
+
201
+ df = pd.DataFrame(records)
202
+ print("\nDataFrame columns from API:")
203
+ print(df.columns.tolist())
204
+ print("\nSample of raw API data:")
205
+ print(df.head())
206
+
207
+ return df
208
+
209
+ def process_weather_data(df):
210
+ """
211
+ Process the weather DataFrame.
212
+ """
213
+ if df is None or df.empty:
214
+ return None
215
+
216
+ # Convert timestamp column to datetime
217
+ df['timestamp'] = pd.to_datetime(df['timestamp'])
218
+ df['date'] = df['timestamp'].dt.date
219
+
220
+ # Convert temperature from Celsius to Fahrenheit if available
221
+ if df['temperature'].notna().all():
222
+ df['temperature'] = (df['temperature'] * 9/5) + 32
223
+
224
+ # Convert wind speed from km/h to mph if available (original unit is km/h)
225
+ if df['wind_speed'].notna().all():
226
+ df['wind_speed'] = df['wind_speed'] * 0.621371
227
+
228
+ return df
229
+
230
+ def create_wind_rose(ax, data, title):
231
+ """
232
+ Create a wind rose subplot.
233
+ """
234
+ if data.empty or data['wind_direction'].isna().all() or data['wind_speed'].isna().all():
235
+ ax.text(0.5, 0.5, 'No wind data available',
236
+ horizontalalignment='center',
237
+ verticalalignment='center',
238
+ transform=ax.transAxes)
239
+ ax.set_title(title)
240
+ return
241
+
242
+ plot_data = data.copy()
243
+ direction_bins = np.arange(0, 361, 45)
244
+ directions = ['N', 'NE', 'E', 'SE', 'S', 'SW', 'W', 'NW']
245
+ mask = plot_data['wind_direction'].notna() & plot_data['wind_speed'].notna()
246
+ plot_data = plot_data[mask]
247
+ if plot_data.empty:
248
+ ax.text(0.5, 0.5, 'No valid wind data',
249
+ horizontalalignment='center',
250
+ verticalalignment='center',
251
+ transform=ax.transAxes)
252
+ ax.set_title(title)
253
+ return
254
+ plot_data.loc[:, 'direction_bin'] = pd.cut(plot_data['wind_direction'],
255
+ bins=direction_bins,
256
+ labels=directions,
257
+ include_lowest=True)
258
+ wind_stats = plot_data.groupby('direction_bin', observed=True)['wind_speed'].mean()
259
+ all_directions = pd.Series(0.0, index=directions)
260
+ wind_stats = wind_stats.combine_first(all_directions)
261
+ angles = np.linspace(0, 2*np.pi, len(directions), endpoint=False)
262
+ values = [wind_stats[d] for d in directions]
263
+ if any(v > 0 for v in values):
264
+ ax.bar(angles, values, width=0.5, alpha=0.6)
265
+ ax.set_xticks(angles)
266
+ ax.set_xticklabels(directions)
267
+ else:
268
+ ax.text(0.5, 0.5, 'No significant wind',
269
+ horizontalalignment='center',
270
+ verticalalignment='center',
271
+ transform=ax.transAxes)
272
+ ax.set_title(title)
273
+
274
+ def create_visualizations(df):
275
+ """
276
+ Create static visualizations using matplotlib.
277
+ Plots temperature, wind speed, and snow depth.
278
+ """
279
+ fig = plt.figure(figsize=(20, 24))
280
+ gs = GridSpec(5, 2, figure=fig)
281
+
282
+ ax1 = fig.add_subplot(gs[0, :])
283
+ ax2 = fig.add_subplot(gs[1, :])
284
+ ax3 = fig.add_subplot(gs[2, :])
285
+
286
+ if not df['temperature'].isna().all():
287
+ ax1.plot(df['timestamp'], df['temperature'], linewidth=2)
288
+ ax1.set_title('Temperature Over Time')
289
+ ax1.set_ylabel('Temperature (°F)')
290
+ ax1.set_xlabel('')
291
+ ax1.grid(True)
292
+
293
+ if not df['wind_speed'].isna().all():
294
+ ax2.plot(df['timestamp'], df['wind_speed'], linewidth=2)
295
+ ax2.set_title('Wind Speed Over Time')
296
+ ax2.set_ylabel('Wind Speed (mph)')
297
+ ax2.set_xlabel('')
298
+ ax2.grid(True)
299
+
300
+ if 'snowDepth' in df.columns and not df['snowDepth'].isna().all():
301
+ ax3.plot(df['timestamp'], df['snowDepth'], linewidth=2)
302
+ ax3.set_ylim(0, 80)
303
+ else:
304
+ ax3.text(0.5, 0.5, 'No snow depth data available',
305
+ horizontalalignment='center',
306
+ verticalalignment='center',
307
+ transform=ax3.transAxes)
308
+ ax3.set_title('Snow Depth')
309
+ ax3.set_ylabel('Snow Depth (inches)')
310
+ ax3.set_xlabel('')
311
+ ax3.grid(True)
312
+
313
+ for ax in [ax1, ax2, ax3]:
314
+ ax.tick_params(axis='x', rotation=45)
315
+ ax.xaxis.set_major_formatter(plt.matplotlib.dates.DateFormatter('%Y-%m-%d %H:%M'))
316
+
317
+ dates = sorted(df['date'].unique())
318
+ for i, date in enumerate(dates):
319
+ if i < 2:
320
+ ax = fig.add_subplot(gs[4, i], projection='polar')
321
+ day_data = df[df['date'] == date].copy()
322
+ create_wind_rose(ax, day_data, pd.to_datetime(date).strftime('%Y-%m-%d'))
323
+
324
+ plt.tight_layout()
325
+ return fig
326
+
327
+ def get_weather_data(station_id, hours):
328
+ """
329
+ Main function to get and process weather data.
330
+ Combines API data and scraped snow depth data.
331
+ """
332
+ try:
333
+ raw_data = get_raw_data(station_id)
334
+ if raw_data is None:
335
+ return None, "Failed to fetch data from API"
336
+
337
+ df = parse_raw_data(raw_data)
338
+ if df is None:
339
+ return None, "Failed to parse API data"
340
+
341
+ df = process_weather_data(df)
342
+ if df is None:
343
+ return None, "Failed to process API data"
344
+
345
+ # Attempt to scrape snow depth data using Selenium
346
+ snow_df = scrape_snow_depth()
347
+ if not snow_df.empty:
348
+ df = df.sort_values('timestamp')
349
+ snow_df = snow_df.sort_values('timestamp')
350
+ df = pd.merge_asof(df, snow_df, on='timestamp', tolerance=pd.Timedelta('30min'), direction='nearest')
351
+
352
+ print("\nProcessed combined data sample:")
353
+ print(df.head())
354
+
355
+ return df, None
356
+ except Exception as e:
357
+ return None, f"Error: {str(e)}"
358
+
359
+ def fetch_and_display(station_id, hours):
360
+ """
361
+ Fetch data and create visualization.
362
+ """
363
+ df, error = get_weather_data(station_id, hours)
364
+ if error:
365
+ return None, error
366
+ if df is not None and not df.empty:
367
+ fig = create_visualizations(df)
368
+ return fig, "Data fetched successfully!"
369
+ return None, "No data available for the specified parameters."
370
+
371
+ # Create Gradio interface
372
+ with gr.Blocks() as demo:
373
+ gr.Markdown("# Weather Data Viewer")
374
+ gr.Markdown("Displays temperature, wind speed, and snow depth from NWS stations.")
375
+ with gr.Row():
376
+ station_id = gr.Textbox(label="Station ID", value="YCTIM")
377
+ hours = gr.Slider(minimum=24, maximum=168, value=72, label="Hours of Data", step=24)
378
+ fetch_btn = gr.Button("Fetch Data")
379
+ plot_output = gr.Plot()
380
+ message = gr.Textbox(label="Status")
381
+ fetch_btn.click(
382
+ fn=fetch_and_display,
383
+ inputs=[station_id, hours],
384
+ outputs=[plot_output, message]
385
+ )
386
+
387
+ # Launch the app
388
+ demo.launch()