In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy as sp
import sklearn as sk
import datetime
import calendar
from jupyter_dash import JupyterDash
import dash
from dash import Dash, html, dcc, Input, Output
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import requests
import io


In [2]:
url = "https://raw.githubusercontent.com/tappyness1/causion/main/data/counts_dataset.csv"

download = requests.get(url).content
df = pd.read_csv(io.StringIO(download.decode('utf-8')))
display(df.head())
print(df.shape)

Unnamed: 0,date,time,view,car,motorcycle,large_vehicle
0,2023-02-14,22:36:03,View_from_Second_Link_at_Tuas,0,0,1
1,2023-02-14,22:36:03,View_from_Tuas_Checkpoint,2,0,0
2,2023-02-14,22:36:03,View_from_Woodlands_Causeway_Towards_Johor,2,0,0
3,2023-02-14,22:36:03,View_from_Woodlands_Checkpoint_Towards_BKE,3,0,1
4,2023-02-14,23:14:34,View_from_Second_Link_at_Tuas,0,0,6


(6960, 6)


In [3]:
#Data manipulation

df['date'] = pd.to_datetime(df['date'], format = "%Y-%m-%d")
df['day'] = df['date'].dt.day_name()
df['hour'] = df['time'].str[:2] + ':00'
df.drop(columns=['motorcycle'], axis=1, inplace=True)
df['vehicle'] = df['car'] + df['large_vehicle']

In [4]:
display(df.head())

Unnamed: 0,date,time,view,car,large_vehicle,day,hour,vehicle
0,2023-02-14,22:36:03,View_from_Second_Link_at_Tuas,0,1,Tuesday,22:00,1
1,2023-02-14,22:36:03,View_from_Tuas_Checkpoint,2,0,Tuesday,22:00,2
2,2023-02-14,22:36:03,View_from_Woodlands_Causeway_Towards_Johor,2,0,Tuesday,22:00,2
3,2023-02-14,22:36:03,View_from_Woodlands_Checkpoint_Towards_BKE,3,1,Tuesday,22:00,4
4,2023-02-14,23:14:34,View_from_Second_Link_at_Tuas,0,6,Tuesday,23:00,6


In [5]:
cat = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday','Saturday', 'Sunday']
new_df = df.groupby(['day']).sum().reset_index()
new_df = new_df.reindex([1,5,6,4,0,2,3])
new_df.head(10)

  new_df = df.groupby(['day']).sum().reset_index()


Unnamed: 0,day,car,large_vehicle,vehicle
1,Monday,2406,1064,3470
5,Tuesday,2003,811,2814
6,Wednesday,1942,864,2806
4,Thursday,1976,903,2879
0,Friday,2070,762,2832
2,Saturday,2117,578,2695
3,Sunday,1515,428,1943


In [6]:
new = df.groupby(['hour','day']).sum().drop(columns=['car', "large_vehicle"]).reset_index()
display(new)

  new = df.groupby(['hour','day']).sum().drop(columns=['car', "large_vehicle"]).reset_index()


Unnamed: 0,hour,day,vehicle
0,00:00,Friday,44
1,00:00,Monday,52
2,00:00,Saturday,50
3,00:00,Sunday,61
4,00:00,Thursday,22
...,...,...,...
163,23:00,Saturday,57
164,23:00,Sunday,62
165,23:00,Thursday,51
166,23:00,Tuesday,49


In [7]:
#Pivot the table

table = pd.pivot_table(new, values='vehicle', index=['day'], columns=['hour']).reset_index()
table = table.reindex([1,5,6,4,0,2,3])
display(table)

hour,day,00:00,01:00,02:00,03:00,04:00,05:00,06:00,07:00,08:00,...,14:00,15:00,16:00,17:00,18:00,19:00,20:00,21:00,22:00,23:00
1,Monday,52,82,35,40,29,60,77,233,34,...,268,148,227,253,214,256,69,58,30,35
5,Tuesday,58,30,19,14,15,35,85,144,47,...,186,202,243,207,265,168,49,40,46,49
6,Wednesday,28,41,18,17,16,26,57,96,23,...,182,226,192,280,271,163,35,42,32,80
4,Thursday,22,38,18,18,36,44,75,249,66,...,111,130,197,225,184,163,57,45,45,51
0,Friday,44,37,31,33,28,36,65,143,0,...,281,245,255,218,215,191,58,45,56,69
2,Saturday,50,43,24,21,30,48,53,189,37,...,170,214,170,209,271,164,61,54,48,57
3,Sunday,61,38,16,21,15,28,45,149,4,...,132,100,171,98,96,105,50,50,56,62


In [8]:
t = table.T
t.drop('day', inplace=True)
t.columns = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday",
                        "Saturday", "Sunday"]
t = t.reset_index()
display(t.head())

Unnamed: 0,hour,Monday,Tuesday,Wednesday,Thursday,Friday,Saturday,Sunday
0,00:00,52,58,28,22,44,50,61
1,01:00,82,30,41,38,37,43,38
2,02:00,35,19,18,18,31,24,16
3,03:00,40,14,17,18,33,21,21
4,04:00,29,15,16,36,28,30,15


In [9]:
fig = px.bar(new_df, x = 'day', y='vehicle', color='day', 
             text_auto=True, labels={'day':'Day of the Week','vehicle':'Vehicle Count'})
fig.show()

In [10]:
new_table = table.iloc[:,1:].to_numpy()
fig1 = px.imshow(new_table, labels=dict(x="Hour of the Day", y = 'Day of the Week', color='Causeway Traffic'),
                x=['12am', '1am', '2am', '3am', '4am', '5am', '6am', '7am', '8am', '9am', '10am', '11am', '12pm',
                '1pm', '2pm', '3pm', '4pm', '5pm', '6pm', '7pm', '8pm', '9pm', '10pm', "11pm"],
                y=["Monday", "Tuesday", "Wednesday", "Thursday", "Friday",
                        "Saturday", "Sunday"], text_auto=True)
fig1.update_xaxes(side='top')

In [11]:
#fig = make_subplots(rows=1, cols=2, specs=[[{'type':'bar'},{'type':'bar'}]],
                   #subplot_titles=('Hours', 'Days'))

In [12]:
app_new = JupyterDash(__name__)

app_new.title = 'CSE6242 Dashboard'
app_new.layout = html.Div([
    html.Div(html.H2("Causian Dashboard"), style={'width':'250px', 'height':'60px', 'padding-left':'2%',
                                                 'display':'inline-block'}),
    html.Div([
    html.Label("Hours"), dcc.Dropdown(id='hours_dropdown_id',
                                     options=['00:00', '01:00', '02:00', '03:00', '04:00', '05:00', '06:00', '07:00', '08:00', '09:00', 
                         '10:00', '11:00', '12:00', '13:00', '14:00', '15:00', '16:00', '17:00', '18:00','19:00',
                         '20:00', '21:00', '22:00', '23:00'],
                                     value='07:00', clearable=False)],
    style={'width':'20%','height':'60px', 'padding-left':'2%',
                                                 'display':'inline-block'}),
                      html.Div([html.Label("Day of the Week"), dcc.Dropdown(id='days_dropdown_id', value='Monday',
                                                                            options=["Monday", "Tuesday", "Wednesday", "Thursday", "Friday","Saturday", "Sunday"],
                                                                           clearable=False)],
                              style={'width':'20%','height':'60px', 'padding-left':'2%',
                                                 'display':'inline-block'}),
                      html.Div(dcc.Graph(id='fig_hours')),
                      html.Div(dcc.Graph(id='fig_days')),
                      html.Div(dcc.Graph(id='fig_heatplot', figure=fig1))])

@app_new.callback(Output('fig_hours', "figure"), Input('hours_dropdown_id', "value"))
def update_hour_bar_chart(Hours):
    fig_hours = px.bar(table, x='day', y=str(Hours), color='day', text_auto=True, labels={'day':"Day of the Week"})
    return fig_hours
@app_new.callback(Output('fig_days', "figure"), Input('days_dropdown_id', "value"))
def update_day_bar_chart(day):
    fig_days = px.bar(t, x='hour', y = str(day), color=str(day), text_auto=True, labels={'hour':"Count of Each Hour"})
    return fig_days

app_new.run_server(mode='inline')

Dash is running on http://127.0.0.1:8050/



In [13]:
#Trial basic Linear Regression Model
new['hour'] = new['hour'].astype('category')
new['day'] = new['day'].astype('category')
print(new.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 168 entries, 0 to 167
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype   
---  ------   --------------  -----   
 0   hour     168 non-null    category
 1   day      168 non-null    category
 2   vehicle  168 non-null    int64   
dtypes: category(2), int64(1)
memory usage: 2.8 KB
None


In [14]:
from sklearn.linear_model import LinearRegression

X = new.loc[:,['hour', 'day']]
y = new.loc[:,'vehicle']
n = pd.get_dummies(X)
display(n.head())
print(n.shape)
print(y.shape)

Unnamed: 0,hour_00:00,hour_01:00,hour_02:00,hour_03:00,hour_04:00,hour_05:00,hour_06:00,hour_07:00,hour_08:00,hour_09:00,...,hour_21:00,hour_22:00,hour_23:00,day_Friday,day_Monday,day_Saturday,day_Sunday,day_Thursday,day_Tuesday,day_Wednesday
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
4,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0


(168, 31)
(168,)


In [15]:
reg = LinearRegression().fit(n,y)
print(reg.score(n,y))
y_pred = reg.predict(n)

new_pred = []
for i in y_pred:
    if i < 0:
        new_pred.append(0)
    else:
        new_pred.append(i)


0.8552520624553817


In [16]:
import sklearn.metrics as metrics
def regression_results(y_true, y_pred):

    # Regression metrics
    explained_variance=metrics.explained_variance_score(y_true, y_pred)
    mean_absolute_error=metrics.mean_absolute_error(y_true, y_pred) 
    mse=metrics.mean_squared_error(y_true, y_pred) 
    mean_squared_log_error=metrics.mean_squared_log_error(y_true, y_pred)
    median_absolute_error=metrics.median_absolute_error(y_true, y_pred)
    r2=metrics.r2_score(y_true, y_pred)

    print('explained_variance: ', round(explained_variance,4))    
    print('mean_squared_log_error: ', round(mean_squared_log_error,4))
    print('r2: ', round(r2,4))
    print('MAE: ', round(mean_absolute_error,4))
    print('MSE: ', round(mse,4))
    print('RMSE: ', round(np.sqrt(mse),4))

In [17]:
regression_results(y, new_pred)

explained_variance:  0.8567
mean_squared_log_error:  0.3976
r2:  0.8566
MAE:  23.7455
MSE:  1029.6179
RMSE:  32.0877
