Spaces:
Sleeping
Sleeping
import pandas as pd | |
from prophet import Prophet | |
import gradio as gr | |
import plotly.graph_objs as go | |
import numpy as np | |
# Function to train the model and generate forecast | |
def predict_sales(time_frame): | |
all_sales_data = pd.read_csv('/content/All sales - House of Pizza.csv') | |
# Clean up the 'Total paid' column by splitting based on '₨' symbol and converting to float | |
def clean_total_paid(val): | |
if isinstance(val, str): # Only process if the value is a string | |
amounts = [float(x.replace(',', '').strip()) for x in val.split('₨') if x.strip()] | |
return sum(amounts) # Sum if multiple values exist | |
elif pd.isna(val): # Handle NaN values | |
return 0.0 | |
return val # If it's already a float, return it as-is | |
# Apply the cleaning function to the 'Total paid' column | |
all_sales_data['Total paid'] = all_sales_data['Total paid'].apply(clean_total_paid) | |
# Convert the 'Date' column to datetime, coercing errors | |
all_sales_data['Date'] = pd.to_datetime(all_sales_data['Date'], format='%m/%d/%Y %H:%M', errors='coerce') | |
# Drop rows with invalid dates | |
all_sales_data = all_sales_data.dropna(subset=['Date']) | |
all_sales_data['date_only'] = all_sales_data['Date'].dt.date | |
daily_sales = all_sales_data.groupby('date_only').agg(total_sales=('Total paid', 'sum')).reset_index() | |
# Prepare the DataFrame for Prophet | |
df = pd.DataFrame({ | |
'Date': daily_sales['date_only'], | |
'Total paid': daily_sales['total_sales'] | |
}) | |
# Apply log transformation | |
df['y'] = np.log1p(df['Total paid']) # Using log1p to avoid log(0) | |
# Prepare Prophet model | |
model = Prophet(weekly_seasonality=True) # Enable weekly seasonality | |
df['ds'] = df['Date'] | |
model.fit(df[['ds', 'y']]) | |
# Future forecast based on the time frame | |
future_periods = { | |
'Next Day': 1, | |
'7 days': 7, | |
'10 days': 10, | |
'15 days': 15, | |
'1 month': 30 | |
} | |
# Get the last historical date and calculate the start date for the forecast | |
last_date_value = df['Date'].iloc[-1] | |
forecast_start_date = pd.Timestamp(last_date_value) + pd.Timedelta(days=1) # Start the forecast from the next day | |
# Generate the future time DataFrame starting from the day after the last date | |
future_time = model.make_future_dataframe(periods=future_periods[time_frame], freq='D') | |
# Filter future_time to include only future dates starting from forecast_start_date | |
future_only = future_time[future_time['ds'] >= forecast_start_date] | |
forecast = model.predict(future_only) | |
# Exponentiate the forecast to revert back to the original scale | |
forecast['yhat'] = np.expm1(forecast['yhat']) # Use expm1 to handle the log transformation | |
forecast['yhat_lower'] = np.expm1(forecast['yhat_lower']) # Exponentiate lower bound | |
forecast['yhat_upper'] = np.expm1(forecast['yhat_upper']) # Exponentiate upper bound | |
# Create a DataFrame for weekends only | |
forecast['day_of_week'] = forecast['ds'].dt.day_name() # Get the day name from the date | |
weekends = forecast[forecast['day_of_week'].isin(['Saturday', 'Sunday'])] # Filter for weekends | |
# Display the forecasted data for the specified period | |
forecast_table = forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].head(future_periods[time_frame]) | |
weekend_forecast_table = weekends[['ds', 'yhat', 'yhat_lower', 'yhat_upper']] # Weekend forecast | |
# Create a Plotly graph | |
fig = go.Figure() | |
fig.add_trace(go.Scatter( | |
x=forecast['ds'], y=forecast['yhat'], | |
mode='lines+markers', | |
name='Forecasted Sales', | |
line=dict(color='orange'), | |
marker=dict(size=6), | |
hovertemplate='Date: %{x}<br>Forecasted Sales: %{y}<extra></extra>' | |
)) | |
# Add lines for yhat_lower and yhat_upper | |
fig.add_trace(go.Scatter( | |
x=forecast['ds'], y=forecast['yhat_lower'], | |
mode='lines', | |
name='Lower Bound', | |
line=dict(color='red', dash='dash') | |
)) | |
fig.add_trace(go.Scatter( | |
x=forecast['ds'], y=forecast['yhat_upper'], | |
mode='lines', | |
name='Upper Bound', | |
line=dict(color='green', dash='dash') | |
)) | |
fig.update_layout( | |
title='Sales Forecast using Prophet', | |
xaxis_title='Date', | |
yaxis_title='Sales Price', | |
xaxis=dict(tickformat="%Y-%m-%d"), | |
yaxis=dict(autorange=True) | |
) | |
return forecast_table, weekend_forecast_table, fig # Return the forecast table, weekend forecast, and plot | |
# Gradio interface | |
def run_gradio(): | |
# Create the Gradio Interface | |
time_options = ['Next Day', '7 days', '10 days', '15 days', '1 month'] | |
gr.Interface( | |
fn=predict_sales, # Function to be called | |
inputs=gr.components.Dropdown(time_options, label="Select Forecast Time Range"), # User input | |
outputs=[ | |
gr.components.Dataframe(label="Forecasted Sales Table"), # Forecasted data in tabular form | |
gr.components.Dataframe(label="Weekend Forecasted Sales Table"), # Weekend forecast data | |
gr.components.Plot(label="Sales Forecast Plot") # Plotly graph output | |
], | |
title="Sales Forecasting with Prophet", | |
description="Select a time range for the forecast and click on the button to train the model and see the results." | |
).launch(debug=True) | |
# Run the Gradio interface | |
if __name__ == '__main__': | |
run_gradio() | |