Spaces:
Sleeping
Sleeping
File size: 4,141 Bytes
6a9bd56 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import gradio as gr
import plotly.graph_objects as go
# Function to create dataset for time series prediction
def create_dataset(dataset, lookback):
X, y = [], []
for i in range(len(dataset) - lookback):
feature = dataset[i:i + lookback]
target = dataset[i + 1:i + lookback + 1]
X.append(feature)
y.append(target)
X = np.array(X).reshape(-1, lookback, 1) # Reshape to 3D (samples, lookback, features)
y = np.array(y).reshape(-1, lookback, 1) # Reshape to 3D (samples, lookback, features)
return torch.tensor(X).float(), torch.tensor(y).float()
# Define LSTM model
class AirModel(nn.Module):
def __init__(self):
super(AirModel, self).__init__()
self.lstm = nn.LSTM(input_size=1, hidden_size=50, num_layers=1, batch_first=True)
self.linear = nn.Linear(50, 1)
def forward(self, x):
x, _ = self.lstm(x)
x = self.linear(x)
return x
# Training and prediction function
def train_and_predict(csv_file, lookback, epochs, batch_size):
# Load CSV
df = pd.read_csv(csv_file.name)
# Extract time series data
timeseries = df[["AmtNet Sales USD"]].values.astype('float32')
# Train-test split
train_size = int(len(timeseries) * 0.67)
test_size = len(timeseries) - train_size
train, test = timeseries[:train_size], timeseries[train_size:]
# Create datasets
X_train, y_train = create_dataset(train, lookback=lookback)
X_test, y_test = create_dataset(test, lookback=lookback)
if len(X_train) == 0 or len(X_test) == 0:
return "The lookback value is too large for the dataset. Please reduce the lookback value."
# DataLoader for batching
train_loader = data.DataLoader(data.TensorDataset(X_train, y_train), shuffle=True, batch_size=batch_size)
# Initialize model, optimizer, and loss function
model = AirModel()
optimizer = optim.Adam(model.parameters())
loss_fn = nn.MSELoss()
# Training loop
for epoch in range(epochs):
model.train()
for X_batch, y_batch in train_loader:
y_pred = model(X_batch)
loss = loss_fn(y_pred, y_batch)
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Prediction
model.eval()
with torch.no_grad():
train_plot = np.ones_like(timeseries) * np.nan
train_plot[lookback:train_size] = model(X_train)[:, -1, :].numpy()
test_plot = np.ones_like(timeseries) * np.nan
test_plot[train_size + lookback:len(timeseries)] = model(X_test)[:, -1, :].numpy()
# Plot results with Plotly
fig = go.Figure()
fig.add_trace(go.Scatter(y=timeseries.squeeze(), mode='lines', name='Original Data'))
fig.add_trace(go.Scatter(y=train_plot.squeeze(), mode='lines', name='Train Prediction', line=dict(color='red')))
fig.add_trace(go.Scatter(y=test_plot.squeeze(), mode='lines', name='Test Prediction', line=dict(color='green')))
fig.update_layout(title="Time Series Prediction", xaxis_title="Time", yaxis_title="Sales")
# Calculate Mean Absolute Error (MAE)
mae = np.mean(np.abs(test_plot[train_size + lookback:len(timeseries)] - timeseries[train_size + lookback:len(timeseries)]))
return fig, f"Mean Absolute Error (MAE) on Test Data: {mae:.4f}"
# Gradio app interface using new API
interface = gr.Interface(
fn=train_and_predict,
inputs=[
gr.File(label="Upload your CSV file"),
gr.Slider(10, 365, step=1, value=100, label="Lookback"),
gr.Slider(100, 5000, step=100, value=1000, label="Epochs"),
gr.Slider(4, 32, step=1, value=8, label="Batch size")
],
outputs=[
gr.Plot(label="Prediction Plot"),
gr.Textbox(label="Error Metrics")
],
title="Time Series Prediction with LSTM",
description="Upload a CSV file with a 'Amount Net Sales' column and get time series predictions using an LSTM model.",
)
# Launch the app with a shareable link
interface.launch()
|