File size: 4,583 Bytes
43106f9
 
 
 
2e28476
f2e3576
 
 
 
 
 
938a35d
2e28476
f2e3576
1237c34
f2e3576
2e28476
 
 
f2e3576
 
 
 
2e28476
f6be049
2e28476
 
f6be049
2e28476
 
 
 
f6be049
2e28476
f6be049
f2e3576
f6be049
2e28476
 
 
f6be049
 
 
 
2e28476
f6be049
1237c34
 
 
 
 
2e28476
938a35d
2e28476
 
 
 
 
 
 
 
 
 
 
1237c34
f6be049
 
 
2e28476
 
f6be049
2e28476
 
 
 
 
f2e3576
 
 
 
938a35d
f2e3576
1237c34
 
 
 
2e28476
1237c34
 
2e28476
1237c34
2e28476
f6be049
 
 
2e28476
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f6be049
 
 
 
1237c34
f6be049
2e28476
 
43106f9
 
 
 
2e28476
43106f9
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Connection closed.\n",
      "Connected. Call `.close()` to terminate connection gracefully.\n",
      "\n",
      "Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1160344\n",
      "Connected. Call `.close()` to terminate connection gracefully.\n",
      "Connected. Call `.close()` to terminate connection gracefully.\n",
      "Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (3.28s) \n",
      "Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (1.04s) \n"
     ]
    }
   ],
   "source": [
    "import streamlit as st\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import datetime\n",
    "import hopsworks\n",
    "from functions import figure, retrieve\n",
    "import os\n",
    "import pickle\n",
    "import plotly.express as px\n",
    "import json\n",
    "from datetime import datetime\n",
    "import os\n",
    "\n",
    "\n",
    "# Real data\n",
    "today = datetime.today().strftime('%Y-%m-%d')\n",
    "df = retrieve.get_merged_dataframe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Connection closed.\n",
      "Connected. Call `.close()` to terminate connection gracefully.\n",
      "\n",
      "Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1160344\n",
      "Connected. Call `.close()` to terminate connection gracefully.\n",
      "Connected. Call `.close()` to terminate connection gracefully.\n",
      "No air_quality_fv feature view found\n",
      "No air_quality feature group found\n",
      "No weather feature group found\n",
      "No aq_predictions feature group found\n",
      "No air_quality_xgboost_model model found\n",
      "Connected. Call `.close()` to terminate connection gracefully.\n",
      "Deleted secret SENSOR_LOCATION_JSON\n"
     ]
    }
   ],
   "source": [
    "import hopsworks\n",
    "import os\n",
    "\n",
    "from functions import util\n",
    "api_key = os.getenv('HOPSWORKS_API_KEY')\n",
    "project_name = os.getenv('HOPSWORKS_PROJECT')\n",
    "project = hopsworks.login(project=project_name, api_key_value=api_key)\n",
    "util.purge_project(project)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "9"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def backfill_predictions_for_monitoring(weather_fg, air_quality_df, monitor_fg, model):\n",
    "    weather_df = weather_fg.read()\n",
    "    weather_df = weather_df.sort_values(by=['date'], ascending=True)\n",
    "    weather_df['date'] = weather_df['date'].dt.tz_convert(None).astype('datetime64[ns]')\n",
    "    air_quality_df_filter = air_quality_df[['date', 'past_air_quality']]\n",
    "    monitor_fg_filter = monitor_fg.read()[['date','past_air_quality']]\n",
    "    combined_df = pd.concat([air_quality_df_filter, monitor_fg_filter])\n",
    "    combined_df['date'] = pd.to_datetime(combined_df['date'], utc=True)\n",
    "    combined_df['date'] = combined_df['date'].dt.tz_convert(None).astype('datetime64[ns]')\n",
    "    features_df = pd.merge(weather_df, combined_df, on='date', how='left')\n",
    "    \n",
    "    features_df = features_df.tail(10)\n",
    "    features_df['predicted_pm25'] = model.predict(features_df[['past_air_quality','temperature_2m_mean', 'precipitation_sum', 'wind_speed_10m_max', 'wind_direction_10m_dominant']])\n",
    "    air_quality_df['date'] = pd.to_datetime(air_quality_df['date'])\n",
    "    # features_df['date'] = features_df['date'].dt.tz_convert(None).astype('datetime64[ns]')\n",
    "    \n",
    "    df = pd.merge(features_df, air_quality_df[['date','pm25','street','country']], on=\"date\")\n",
    "    df['days_before_forecast_day'] = 1\n",
    "    hindcast_df = df\n",
    "    df = df.drop('pm25', axis=1)\n",
    "    monitor_fg.insert(df, write_options={\"wait_for_job\": True})\n",
    "    return hindcast_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "language_info": {
   "name": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}