Spaces:
Build error
Build error
import matplotlib.pyplot as plt | |
from pylab import rcParams | |
from statsmodels.tsa.seasonal import seasonal_decompose | |
import streamlit as st | |
import pandas as pd | |
from datasets import load_dataset | |
dataset = load_dataset("shouzen/final_data_sale", use_auth_token=True) | |
st.title('Project Canada Goose') | |
st.write('Mempertahankan brand "canada goose" agar tetap menjadi penjualan tertinggi (untuk 1 tahun kedepan) dengan metode time series forecasting') | |
st.markdown('# All Data') | |
def load_csv_data(): | |
tp = pd.read_csv('Final_Data_Sales.csv', iterator=True, chunksize=1000,nrows=50000) # gives TextFileReader | |
data = pd.concat(tp, ignore_index=True) | |
# Convert data yang bukan datetime yang seperti 0000-0000 ke Datetime agar hasilnya NaT | |
data['sold_at'] = pd.to_datetime(data['sold_at'], errors='coerce') | |
data['created_at'] = pd.to_datetime(data['created_at'], errors='coerce') | |
data['shipped_at'] = pd.to_datetime(data['shipped_at'], errors='coerce') | |
data['delivered_at'] = pd.to_datetime(data['delivered_at'], errors='coerce') | |
data['returned_at'] = pd.to_datetime(data['returned_at'], errors='coerce') | |
# Ambil data date dari data setelahnya. | |
data.fillna(method='bfill', inplace=True) | |
return data | |
data_load_state = st.text('Loading data...') | |
data = load_csv_data() | |
st.dataframe(data) | |
# Notify the reader that the data was successfully loaded. | |
data_load_state.text("Ini adalah data keseluruhan dari data csv") | |
total_data = data.shape | |
st.write(f'Total Datanya adalah : {total_data}') | |
# Data Cleaning | |
data = data.dropna() | |
st.write("Jumlah data setelah menghapus missing value:", len(data)) | |
#Statistika Deskriptif | |
st.markdown('## Statistika Deskriptif') | |
analisis = data.copy() | |
analisis = analisis[['sale_price', 'cost']] | |
st.table(analisis.describe()) | |
#Perbandingan Shipped, Processing, Cancelled, Complete dan Returned | |
st.markdown("## Perbandingan Shipped, Processing, Cancelled, Complete dan Returned") | |
# plt.figure(figsize=(10,5)) | |
# plt.pie(data['status'].value_counts(), labels=data['status'].unique(), autopct='%.2f%%') | |
# plt.show() | |
fig1, ax1 = plt.subplots() | |
ax1.pie(data['status'].value_counts(), labels=data['status'].unique(), autopct='%.2f%%') | |
st.pyplot(fig1) | |
#Brand Terlaris | |
st.markdown("## Brand Terlaris") | |
st.write("Ini adalah top 5 brand terlaris ") | |
brand = data[['product_id','product_brand', 'sale_price']] | |
brand = brand.groupby(['product_id','product_brand'], as_index=False)['sale_price'].sum() | |
brand = brand.sort_values('sale_price', ascending=False) | |
st.table(brand.head(5)) | |
#Penjualan Tertinggi Berdasarkan Product Brand | |
st.markdown("## Penjualan Tertinggi Berdasarkan Product Brand") | |
def perbandingan(w, a, x, y, z): | |
plt.figure(figsize=(20, 8)) | |
plt.subplot(221) | |
plt.grid() | |
plt.bar(w[a], w['sale_price'], label="Sale Price") | |
plt.title(y) | |
plt.subplot(222) | |
plt.grid() | |
plt.bar(x[a], x['sale_price'], label="Sale Price") | |
plt.title(z) | |
st.pyplot(plt) | |
product_brand = brand | |
pb = product_brand[['product_brand', 'sale_price']] | |
sh = pb.sort_values('sale_price').tail(5) | |
sl = pb.sort_values('sale_price').head(5) | |
perbandingan(sh, 'product_brand', sl, 'Penjualan Tertinggi Berdasarkan Product Brand', 'Penjualan Terendah Berdasarkan Product Brand') | |
#Visualisasi Data Sale Price | |
st.markdown(' # Visualisasi Data Sale Price Khusus Untuk Canada Goose') | |
cg = data.copy() | |
cg= cg[['created_at','product_brand','sale_price']] | |
cg_f = cg.loc[cg['product_brand'] == 'Canada Goose'] #Ambil data Canada Goose Saja | |
cg_f = cg_f.sort_values('created_at') | |
st.write('Sorting berdasarkan tanggal pada created_at') | |
st.dataframe(cg_f) | |
#Resampling Data to Monthly | |
st.markdown('## Resampling data perbulan') | |
st.write('Data sale_price disini ditampilkan dalam perbulan') | |
cg_e = cg_f[['created_at','sale_price']] ## Ambil created at dan sale price | |
cg_e = cg_e.sort_values('created_at') | |
y = cg_e.set_index('created_at').resample('M').mean() ## Rata rata sale price /bulan agar data tidak lebih 'noisy' (m yang dimaksud adalah month end frequency) | |
y = y.dropna() #Hapus Value Kosong | |
y = y.rename_axis(None, axis=1).rename_axis('Date', axis=0) #Ubah index yang tadinya 'created_at' menjadi 'Date' | |
st.dataframe(y.head(10)) #Tampilkan 10 data teratas saja | |
# Classic Time Series Decomposition -> 1920 | |
st.markdown('## Classic Time Series Decomposition -> 1920') | |
st.markdown(''' | |
Teknik untuk memisahkan time series menjadi trend, seasonal, dan residual menggunakan movie average, ada 2 tipe: | |
*Additive = Trend + Seasonal + Residual*\n | |
*Multiplicative = Trend * Seasonal * Residual*\n | |
Additive dipakai **untuk trend dan seasonal yang tidak terlalu bervariasi**\n | |
Multiplicative dipakai **untuk trend dan seasonal yang berubah seiring jalannya waktu** | |
''') | |
rcParams['figure.figsize'] = 10, 5 #Besar Figur | |
decomposition = seasonal_decompose(y.copy(), model='additive',period=12) | |
fig = decomposition.plot() | |
st.pyplot(fig) | |
#Model | |
y_train, y_test = y[:28], y[-7:] # Pisah data untuk keperlaun model dengan 80% train dan 20% test | |
st.markdown('# Model') | |
st.markdown('## ProphetFB Model') | |
from fbprophet import Prophet #Import Prophet FB Model | |
m = Prophet() | |
d = y.copy() | |
d= d.reset_index() | |
d = d.rename(columns={'Date' : 'ds', 'sale_price' : 'y'}) | |
model = m.fit(d) | |
future = m.make_future_dataframe(periods=14, freq='M') #bisa setting periode untuk setting seberapa jauh untuk diprediksi (dalam bulan) | |
forecast = m.predict(future) | |
forecast = forecast.set_index('ds') | |
d = d.set_index('ds') | |
final_forecast = forecast['yhat'] | |
fig = plt.figure(figsize=(15,5)) | |
plt.title("Prediksi untuk 1 tahun kedepan dengan ProphetFB Model") | |
plt.plot(d, label="Actual") | |
plt.plot(final_forecast, label="Predicted") | |
plt.legend(loc = 'upper left') | |
st.pyplot(fig) | |
#Arima Model | |
st.markdown("## ARIMA Model") | |
from pmdarima import auto_arima | |
arima = auto_arima(y_train,start_p=1, start_q=1, max_p=3, max_q=3, m=12, | |
start_P=0, seasonal=True, d=1, D=1, trace=True, | |
error_action='ignore', # don't want to know if an order does not work | |
suppress_warnings=True, # don't want convergence warnings | |
stepwise=True) | |
n_forecast = len(y_test) + 8 | |
pred= arima.predict(n_forecast,D=1,seasonal=(1,0,0)) | |
dates = pd.date_range(y_test.index[-1],periods=n_forecast, freq='M') | |
pred= pd.Series(pred, index=dates) | |
fig = plt.figure(figsize=(15,5)) | |
plt.title("Prediksi menurut arima untuk 1 tahun kedepan") | |
plt.plot(y_train,label="Training") | |
plt.plot(y_test,label="Test") | |
plt.plot(pred,label="Pred") | |
plt.legend(loc = 'upper left') | |
st.pyplot(fig) | |