import gradio as gr from huggingface_hub import from_pretrained_keras import pandas as pd import numpy as np import json from matplotlib import pyplot as plt f = open('scaler.json') scaler = json.load(f) TIME_STEPS = 288 def create_sequences(values, time_steps=TIME_STEPS): output = [] for i in range(len(values) - time_steps + 1): output.append(values[i : (i + time_steps)]) return np.stack(output) def normalize_data(data): df_test_value = (data - scaler["mean"]) / scaler["std"] return df_test_value def plot_test_data(df_test_value): fig, ax = plt.subplots(figsize=(12, 6)) df_test_value.plot(legend=False, ax=ax) ax.set_xlabel("Time") ax.set_ylabel("Value") ax.set_title("Input Test Data") return fig def get_anomalies(df_test_value): x_test = create_sequences(df_test_value.values) model = from_pretrained_keras("keras-io/timeseries-anomaly-detection") x_test_pred = model.predict(x_test) test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1) test_mae_loss = test_mae_loss.reshape((-1)) anomalies = test_mae_loss > scaler["threshold"] return anomalies def plot_anomalies(df_test_value, data, anomalies): anomalous_data_indices = [] for data_idx in range(TIME_STEPS - 1, len(df_test_value) - TIME_STEPS + 1): if np.all(anomalies[data_idx - TIME_STEPS + 1 : data_idx]): anomalous_data_indices.append(data_idx) df_subset = data.iloc[anomalous_data_indices] fig, ax = plt.subplots(figsize=(12, 6)) data.plot(legend=False, ax=ax) df_subset.plot(legend=False, ax=ax, color="r") ax.set_xlabel("Time") ax.set_ylabel("Value") ax.set_title("Anomalous Data Points") return fig def clean_data(df): if "timestamp" in df.columns and "value" in df.columns: df["timestamp"] = pd.to_datetime(df["timestamp"]) return df elif "Date" in df.columns and "Hour" in df.columns and "Hourly_Labor_Hours_Total" in df.columns: df["timestamp"] = pd.to_datetime(df["Date"]) + pd.to_timedelta(df["Hour"].astype(int), unit='h') df.loc[df["timestamp"].dt.hour == 24, "timestamp"] = df["timestamp"] + pd.DateOffset(days=1) df["timestamp"] = df["timestamp"].dt.floor('h') df = df[["timestamp", "Hourly_Labor_Hours_Total"]] df.rename(columns={"Hourly_Labor_Hours_Total": "value"}, inplace=True) elif "Date_CY" in df.columns and "Hour" in df.columns and "Net_Sales_CY" in df.columns: df["timestamp"] = pd.to_datetime(df["Date_CY"]) + pd.to_timedelta(df["Hour"].astype(int), unit='h') df.loc[df["timestamp"].dt.hour == 24, "timestamp"] = df["timestamp"] - pd.DateOffset(days=1) df["timestamp"] = df["timestamp"].dt.floor('h') df = df[["timestamp", "Net_Sales_CY"]] df.rename(columns={"Net_Sales_CY": "value"}, inplace=True) df = df.dropna(subset=['value']) return df else: raise ValueError("Dataframe does not contain necessary columns.") def master(file): data = pd.read_csv(file.name) print(f"Original data shape: {data.shape}") # Debug statement data = clean_data(data) print(f"Cleaned data shape: {data.shape}") # Debug statement data['timestamp'] = pd.to_datetime(data['timestamp']) data.set_index("timestamp", inplace=True) if len(data) < TIME_STEPS: return "Not enough data to create sequences. Need at least {} records.".format(TIME_STEPS) df_test_value = normalize_data(data) plot1 = plot_test_data(df_test_value) anomalies = get_anomalies(df_test_value) plot2 = plot_anomalies(df_test_value, data, anomalies) return plot2 outputs = gr.outputs.Image() iface = gr.Interface( fn=master, inputs=gr.inputs.File(label="CSV File"), outputs=outputs, examples=["art_daily_jumpsup.csv","labor_hourly_short.csv", "sales_hourly_short.csv"], title="Timeseries Anomaly Detection Using an Autoencoder", description="Anomaly detection of timeseries data." ) iface.launch()