Spaces:
Runtime error
Runtime error
File size: 4,453 Bytes
1233062 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from tsfeatures import (
tsfeatures, acf_features, arch_stat, crossing_points,
entropy, flat_spots, heterogeneity, holt_parameters,
lumpiness, nonlinearity, pacf_features, stl_features,
stability, hw_parameters, unitroot_kpss, unitroot_pp,
series_length, sparsity, hurst, statistics
)
FILE_CATALOGUE = os.environ['FILE_CATALOGUE']
BUCKET_TIMENET = os.environ['BUCKET_TIMENET']
KEY_TIMENET = os.environ['KEY_TIMENET']
FEATS_COLS = ['hurst', 'series_length', 'unitroot_pp', 'unitroot_kpss', 'hw_alpha',
'hw_beta', 'hw_gamma', 'stability', 'nperiods', 'seasonal_period',
'trend_strength', 'spike', 'linearity', 'curvature', 'e_acf1',
'e_acf10', 'seasonal_strength', 'peak', 'trough', 'x_pacf5',
'diff1x_pacf5', 'diff2x_pacf5', 'seas_pacf', 'nonlinearity',
'lumpiness', 'alpha', 'beta', 'flat_spots', 'entropy',
'crossing_points', 'arch_lm', 'x_acf1', 'x_acf10', 'diff1_acf1',
'diff1_acf10', 'diff2_acf1', 'diff2_acf10', 'seas_acf1', 'sparsity',
'total_sum', 'mean', 'variance', 'median', 'p2point5', 'p5', 'p25',
'p75', 'p95', 'p97point5', 'max', 'min']
def tsfeatures_vector(df:pd.DataFrame, seasonality: int) -> pd.DataFrame:
ts_df = tsfeatures(
ts=df[['unique_id', 'ds', 'y']],
freq=seasonality,
features=[sparsity, acf_features, crossing_points,
entropy, flat_spots, holt_parameters,
lumpiness, nonlinearity, pacf_features, stl_features,
stability, hw_parameters, unitroot_kpss, unitroot_pp,
series_length, hurst, arch_stat, statistics],
scale=False,
).rename(columns={'trend': 'trend_strength'})
if seasonality == 1:
# add missing features when seasonality != 1
ts_df[['seasonal_strength', 'peak', 'trough', 'seas_pacf', 'seas_acf1']] = np.nan
ts_df[['trend_strength', 'seasonal_strength']] = ts_df[['trend_strength', 'seasonal_strength']].fillna(0)
vector = ts_df[FEATS_COLS].fillna(0).iloc[0].values
vector = (vector - vector.min()) / (vector.max() - vector.min())
return vector.tolist()
def get_closest_ids(x: list, top_k: int, index_pinecone):
query_response = index_pinecone.query(
top_k=top_k,
include_values=False,
include_metadata=True,
vector=x,
)
return query_response['matches']
def plot_best_models_count(ids, catalogue):
uids = [x['id'] for x in ids]
file_evaluations = catalogue['file_evaluation'].loc[uids].unique()
eval_df = [pd.read_parquet(f_eval) for f_eval in file_evaluations]
eval_df = pd.concat(eval_df).query('unique_id in @uids')
eval_df = pd.pivot(
eval_df,
index=['unique_id', 'metric'],
columns='model',
values='value'
).reset_index()
models = eval_df.drop(columns=['unique_id', 'metric']).columns
eval_df['BestModel'] = eval_df[models].idxmin(axis=1)
#eval_df = eval_df.groupby(['BestModel', 'metric']).size().rename('n').reset_index()
fig = sns.catplot(eval_df.query('metric != "mase"'), y='BestModel', kind='count', col='metric')
return fig
def plot_closest_series(Y_df, id, catalogue):
# leer archivo de file_timenet y hacer el plot
uid_catalogue = catalogue.loc[id]
closest_df = pd.read_parquet(uid_catalogue.file_timenet).query('unique_id == @id')
#Y_df['unique_id'] = 'ProvidedByUser'
# Create a figure with 1 row and 2 columns
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(15,5))
# Get the unique_id for each DataFrame
unique_id_Y_df = Y_df['unique_id'].unique()[0]
unique_id_closest_df = closest_df['unique_id'].unique()[0]
# Plot the 'y' column for both dataframes, against 'ds', and label them with unique_id
sns.lineplot(x='ds', y='y', ax=axes[0], data=Y_df, label=unique_id_Y_df)
sns.lineplot(x='ds', y='y', ax=axes[1], data=closest_df)
# Set the titles for the subplots
axes[0].set_title('Uploaded Dataset')
axes[1].set_title(f'TimenetTimeSeries:{uid_catalogue.dataset},{uid_catalogue.subdataset},{uid_catalogue.ts_name}')
# Show legend on each subplot
axes[0].legend()
axes[1].legend()
# Display the plot
plt.tight_layout()
plt.show()
return fig
def get_catalogue():
return pd.read_parquet(FILE_CATALOGUE)
|