diff --git "a/.ipynb_checkpoints/Time_Series_Forecasting-checkpoint.ipynb" "b/.ipynb_checkpoints/Time_Series_Forecasting-checkpoint.ipynb" new file mode 100644--- /dev/null +++ "b/.ipynb_checkpoints/Time_Series_Forecasting-checkpoint.ipynb" @@ -0,0 +1,1402 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "lLC1wTwghgKr", + "outputId": "fadf5c4c-5ec6-496d-c0ee-3aadde7be24c" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[*********************100%***********************] 1 of 1 completed\n", + "\n", + "1 Failed download:\n", + "['AAPL']: ReadTimeout(ReadTimeoutError(\"HTTPSConnectionPool(host='query2.finance.yahoo.com', port=443): Read timed out. (read timeout=10)\"))\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Empty DataFrame\n", + "Columns: [Close_AAPL]\n", + "Index: []\n" + ] + } + ], + "source": [ + "import yfinance as yf\n", + "import pandas as pd\n", + "\n", + "# Download Apple stock data\n", + "data = yf.download(\"AAPL\", start=\"2015-01-01\", end=\"2023-01-01\", auto_adjust=True)\n", + "\n", + "# Flatten columns (in case of MultiIndex)\n", + "data.columns = ['_'.join(col).strip() if isinstance(col, tuple) else col for col in data.columns]\n", + "\n", + "# Keep only the Close column\n", + "data = data[['Close_AAPL']].copy() # Use the new name\n", + "\n", + "# Drop any missing values (just in case)\n", + "data.dropna(subset=['Close_AAPL'], inplace=True)\n", + "\n", + "# Show first 5 rows\n", + "print(data.head())\n", + "\n", + "# Save to CSV\n", + "data.to_csv(\"AAPL_stock_data.csv\", index_label=\"Date\")\n", + "\n", + "# Save to Excel\n", + "data.to_excel(\"AAPL_stock_data.xlsx\", index_label=\"Date\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "IcCo4MeeI_kC", + "outputId": "85159790-5809-4f8d-b8cc-4f03efe5d992" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Close\n", + "Date \n", + "2015-01-02 24.261055\n", + "2015-01-05 23.577574\n", + "2015-01-06 23.579790\n", + "2015-01-07 23.910435\n", + "2015-01-08 24.829126\n", + "\n", + "DatetimeIndex: 2014 entries, 2015-01-02 to 2022-12-30\n", + "Data columns (total 1 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Close 2014 non-null float64\n", + "dtypes: float64(1)\n", + "memory usage: 31.5 KB\n", + "None\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "# Skip the first 2 rows that are junk\n", + "df = pd.read_csv(\"AAPL_stock_data.csv\", skiprows=0, parse_dates=['Date'], index_col='Date')\n", + "\n", + "# Rename the column properly\n", + "df.columns = ['Close']\n", + "\n", + "# Ensure numeric\n", + "df['Close'] = pd.to_numeric(df['Close'], errors='coerce')\n", + "\n", + "# Drop missing values\n", + "df = df.dropna(subset=['Close'])\n", + "\n", + "print(df.head())\n", + "print(df.info())" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "UFUNbSQ1JKSP", + "outputId": "4172b22d-75f2-40b6-dbd4-4555f4f387ee" + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ADF Statistic: -0.6303067985116851\n", + "p-value: 0.8639858434129919\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "from statsmodels.tsa.stattools import adfuller\n", + "from statsmodels.graphics.tsaplots import plot_acf, plot_pacf\n", + "\n", + "# Plot data\n", + "df['Close'].plot(figsize=(12,6), title=\"Apple Stock Closing Prices\")\n", + "plt.show()\n", + "\n", + "# Stationarity test\n", + "result = adfuller(df['Close'])\n", + "print(\"ADF Statistic:\", result[0])\n", + "print(\"p-value:\", result[1])\n", + "\n", + "# ACF & PACF\n", + "plot_acf(df['Close'], lags=40)\n", + "plot_pacf(df['Close'], lags=40)\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 716 + }, + "id": "KaLRag6iJPzo", + "outputId": "33bbb26e-dbcf-4182-9f2d-0d2e5953904b" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Hp\\anaconda3\\Lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.\n", + " self._init_dates(dates, freq)\n", + "C:\\Users\\Hp\\anaconda3\\Lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.\n", + " self._init_dates(dates, freq)\n", + "C:\\Users\\Hp\\anaconda3\\Lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.\n", + " self._init_dates(dates, freq)\n", + "C:\\Users\\Hp\\anaconda3\\Lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:836: ValueWarning: No supported index is available. Prediction results will be given with an integer index beginning at `start`.\n", + " return get_prediction_index(\n", + "C:\\Users\\Hp\\anaconda3\\Lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:836: FutureWarning: No supported index is available. In the next version, calling this method in a model without a supported index will result in an exception.\n", + " return get_prediction_index(\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from statsmodels.tsa.arima.model import ARIMA\n", + "\n", + "train = df['Close'][:-200]\n", + "test = df['Close'][-200:]\n", + "\n", + "model = ARIMA(train, order=(5,1,0)) # (p,d,q) — tune using AIC\n", + "model_fit = model.fit()\n", + "forecast = model_fit.forecast(steps=len(test))\n", + "\n", + "plt.figure(figsize=(12,6))\n", + "plt.plot(train.index, train, label='Train')\n", + "plt.plot(test.index, test, label='Test')\n", + "plt.plot(test.index, forecast, label='ARIMA Forecast')\n", + "plt.legend()\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 564 + }, + "id": "4-SnQaMTJUfH", + "outputId": "7c33dfe8-2248-49aa-cb06-86e9a9a563f7" + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "from statsmodels.tsa.arima.model import ARIMA\n", + "\n", + "# Read CSV with correct date parsing\n", + "df = pd.read_csv(\"AAPL_stock_data.csv\", parse_dates=['Date'], index_col='Date')\n", + "\n", + "# Rename column if needed\n", + "df.columns = ['Close']\n", + "\n", + "# Ensure numeric and drop any missing values\n", + "df['Close'] = pd.to_numeric(df['Close'], errors='coerce')\n", + "df = df.dropna(subset=['Close'])\n", + "\n", + "# Resample to daily frequency and forward fill missing values\n", + "df = df.resample('D').ffill()\n", + "\n", + "# Split train and test\n", + "train = df['Close'][:-200]\n", + "test = df['Close'][-200:]\n", + "\n", + "# Fit ARIMA model\n", + "model = ARIMA(train, order=(5,1,0))\n", + "model_fit = model.fit()\n", + "\n", + "# Forecast\n", + "forecast = model_fit.forecast(steps=len(test))\n", + "\n", + "# Plot\n", + "plt.figure(figsize=(12,6))\n", + "plt.plot(train.index, train, label='Train')\n", + "plt.plot(test.index, test, label='Test')\n", + "plt.plot(test.index, forecast, label='ARIMA Forecast')\n", + "plt.title(\"Apple Stock Price ARIMA Forecast\")\n", + "plt.xlabel(\"Date\")\n", + "plt.ylabel(\"Closing Price\")\n", + "plt.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "6cmWlsw9JuLP", + "outputId": "deaf96c5-a5ff-4985-d28e-6ff7e28b3d27" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: tensorflow in c:\\users\\hp\\anaconda3\\lib\\site-packages (2.20.0)\n", + "Requirement already satisfied: absl-py>=1.0.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from tensorflow) (2.3.1)\n", + "Requirement already satisfied: astunparse>=1.6.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from tensorflow) (1.6.3)\n", + "Requirement already satisfied: flatbuffers>=24.3.25 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from tensorflow) (25.9.23)\n", + "Requirement already satisfied: gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from tensorflow) (0.6.0)\n", + "Requirement already satisfied: google_pasta>=0.1.1 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from tensorflow) (0.2.0)\n", + "Requirement already satisfied: libclang>=13.0.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from tensorflow) (18.1.1)\n", + "Requirement already satisfied: opt_einsum>=2.3.2 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from tensorflow) (3.4.0)\n", + "Requirement already satisfied: packaging in c:\\users\\hp\\anaconda3\\lib\\site-packages (from tensorflow) (23.2)\n", + "Requirement already satisfied: protobuf>=5.28.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from tensorflow) (6.32.1)\n", + "Requirement already satisfied: requests<3,>=2.21.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from tensorflow) (2.32.2)\n", + "Requirement already satisfied: setuptools in c:\\users\\hp\\anaconda3\\lib\\site-packages (from tensorflow) (69.5.1)\n", + "Requirement already satisfied: six>=1.12.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from tensorflow) (1.16.0)\n", + "Requirement already satisfied: termcolor>=1.1.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from tensorflow) (3.1.0)\n", + "Requirement already satisfied: typing_extensions>=3.6.6 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from tensorflow) (4.15.0)\n", + "Requirement already satisfied: wrapt>=1.11.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from tensorflow) (1.14.1)\n", + "Requirement already satisfied: grpcio<2.0,>=1.24.3 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from tensorflow) (1.75.1)\n", + "Requirement already satisfied: tensorboard~=2.20.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from tensorflow) (2.20.0)\n", + "Requirement already satisfied: keras>=3.10.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from tensorflow) (3.11.3)\n", + "Requirement already satisfied: numpy>=1.26.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from tensorflow) (1.26.4)\n", + "Requirement already satisfied: h5py>=3.11.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from tensorflow) (3.11.0)\n", + "Requirement already satisfied: ml_dtypes<1.0.0,>=0.5.1 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from tensorflow) (0.5.3)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from requests<3,>=2.21.0->tensorflow) (2.0.4)\n", + "Requirement already satisfied: idna<4,>=2.5 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from requests<3,>=2.21.0->tensorflow) (3.7)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from requests<3,>=2.21.0->tensorflow) (2.2.2)\n", + "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from requests<3,>=2.21.0->tensorflow) (2025.1.31)\n", + "Requirement already satisfied: markdown>=2.6.8 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from tensorboard~=2.20.0->tensorflow) (3.4.1)\n", + "Requirement already satisfied: pillow in c:\\users\\hp\\anaconda3\\lib\\site-packages (from tensorboard~=2.20.0->tensorflow) (10.3.0)\n", + "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from tensorboard~=2.20.0->tensorflow) (0.7.2)\n", + "Requirement already satisfied: werkzeug>=1.0.1 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from tensorboard~=2.20.0->tensorflow) (3.0.3)\n", + "Requirement already satisfied: wheel<1.0,>=0.23.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from astunparse>=1.6.0->tensorflow) (0.43.0)\n", + "Requirement already satisfied: rich in c:\\users\\hp\\anaconda3\\lib\\site-packages (from keras>=3.10.0->tensorflow) (13.3.5)\n", + "Requirement already satisfied: namex in c:\\users\\hp\\anaconda3\\lib\\site-packages (from keras>=3.10.0->tensorflow) (0.1.0)\n", + "Requirement already satisfied: optree in c:\\users\\hp\\anaconda3\\lib\\site-packages (from keras>=3.10.0->tensorflow) (0.17.0)\n", + "Requirement already satisfied: MarkupSafe>=2.1.1 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from werkzeug>=1.0.1->tensorboard~=2.20.0->tensorflow) (2.1.3)\n", + "Requirement already satisfied: markdown-it-py<3.0.0,>=2.2.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from rich->keras>=3.10.0->tensorflow) (2.2.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from rich->keras>=3.10.0->tensorflow) (2.15.1)\n", + "Requirement already satisfied: mdurl~=0.1 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from markdown-it-py<3.0.0,>=2.2.0->rich->keras>=3.10.0->tensorflow) (0.1.0)\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "pip install tensorflow" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "BxOrU-i6J0z4", + "outputId": "b91c6e1b-3985-4369-9de0-6868ec873464" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Hp\\anaconda3\\Lib\\site-packages\\keras\\src\\layers\\rnn\\rnn.py:199: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.\n", + " super().__init__(**kwargs)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/10\n", + "\u001b[1m65/65\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m9s\u001b[0m 66ms/step - loss: 0.0018 - val_loss: 6.9563e-04\n", + "Epoch 2/10\n", + "\u001b[1m65/65\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 55ms/step - loss: 1.8511e-04 - val_loss: 0.0010\n", + "Epoch 3/10\n", + "\u001b[1m65/65\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 57ms/step - loss: 1.6209e-04 - val_loss: 8.2369e-04\n", + "Epoch 4/10\n", + "\u001b[1m65/65\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 56ms/step - loss: 1.8279e-04 - val_loss: 5.5513e-04\n", + "Epoch 5/10\n", + "\u001b[1m65/65\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 55ms/step - loss: 1.6396e-04 - val_loss: 9.0062e-04\n", + "Epoch 6/10\n", + "\u001b[1m65/65\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 54ms/step - loss: 1.3207e-04 - val_loss: 4.7759e-04\n", + "Epoch 7/10\n", + "\u001b[1m65/65\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 53ms/step - loss: 1.2545e-04 - val_loss: 5.0669e-04\n", + "Epoch 8/10\n", + "\u001b[1m65/65\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 54ms/step - loss: 1.2764e-04 - val_loss: 4.2861e-04\n", + "Epoch 9/10\n", + "\u001b[1m65/65\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 54ms/step - loss: 1.2959e-04 - val_loss: 4.0455e-04\n", + "Epoch 10/10\n", + "\u001b[1m65/65\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 53ms/step - loss: 1.4660e-04 - val_loss: 5.4251e-04\n", + "\u001b[1m18/18\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 46ms/step\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "import tensorflow as tf\n", + "from tensorflow.keras import Sequential\n", + "from tensorflow.keras.layers import LSTM, Dense\n", + "\n", + "# Scale data\n", + "scaler = MinMaxScaler()\n", + "scaled = scaler.fit_transform(df[['Close']])\n", + "\n", + "# Create sequences\n", + "def create_sequences(df, seq_len=60):\n", + " X, y = [], []\n", + " for i in range(len(df)-seq_len):\n", + " X.append(df[i:i+seq_len])\n", + " y.append(df[i+seq_len])\n", + " return np.array(X), np.array(y)\n", + "\n", + "seq_len = 60\n", + "X, y = create_sequences(scaled, seq_len)\n", + "\n", + "# Train-test split\n", + "split = int(0.8 * len(X))\n", + "X_train, X_test = X[:split], X[split:]\n", + "y_train, y_test = y[:split], y[split:]\n", + "\n", + "# LSTM model\n", + "model = Sequential([\n", + " LSTM(50, return_sequences=True, input_shape=(seq_len,1)),\n", + " LSTM(50),\n", + " Dense(1)\n", + "])\n", + "model.compile(optimizer='adam', loss='mse')\n", + "model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.1)\n", + "\n", + "# Forecast\n", + "pred = model.predict(X_test)\n", + "pred = scaler.inverse_transform(pred)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "id": "sqjLrn3MKbwv" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ARIMA RMSE: 12.955494218221505\n", + "ARIMA MAPE: 0.08013550160453459\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "from statsmodels.tsa.arima.model import ARIMA\n", + "from sklearn.metrics import mean_squared_error\n", + "\n", + "# ---- Robust metrics (handles NaNs, shape, zeros for MAPE) ----\n", + "def safe_metrics(y_true, y_pred, eps=1e-8):\n", + " y_true = np.asarray(y_true, dtype=float).ravel()\n", + " y_pred = np.asarray(y_pred, dtype=float).ravel()\n", + "\n", + " # align lengths if off-by-one happened\n", + " n = min(len(y_true), len(y_pred))\n", + " y_true, y_pred = y_true[:n], y_pred[:n]\n", + "\n", + " # drop NaNs/Infs\n", + " m = np.isfinite(y_true) & np.isfinite(y_pred)\n", + " y_true, y_pred = y_true[m], y_pred[m]\n", + " if y_true.size == 0:\n", + " return np.nan, np.nan\n", + "\n", + " rmse = np.sqrt(mean_squared_error(y_true, y_pred))\n", + " mape = np.mean(np.abs((y_true - y_pred) / np.maximum(np.abs(y_true), eps)))\n", + " return float(rmse), float(mape)\n", + "\n", + "# ===== Example: proper train/test split and ARIMA forecast =====\n", + "if \"Close\" not in df.columns:\n", + " raise ValueError(\"❌ 'Close' column not found.\")\n", + "\n", + "y = df[\"Close\"].astype(float)\n", + "\n", + "# Use a fixed-size holdout (e.g., last 60 points)\n", + "test_size = 60\n", + "train, test = y.iloc[:-test_size], y.iloc[-test_size:]\n", + "\n", + "# Fit ARIMA on TRAIN only; forecast exactly len(test) steps\n", + "model = ARIMA(train, order=(5,1,0), enforce_stationarity=False, enforce_invertibility=False)\n", + "res = model.fit(method_kwargs={\"warn_convergence\": False})\n", + "\n", + "forecast = res.forecast(steps=len(test)) # <-- length matches test\n", + "# If you get a pandas Series for both, the indices should be aligned already:\n", + "# test.index, forecast.index\n", + "\n", + "arima_rmse, arima_mape = safe_metrics(test.values, forecast.values)\n", + "\n", + "print(\"ARIMA RMSE:\", arima_rmse)\n", + "print(\"ARIMA MAPE:\", arima_mape)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "id": "5BpmMJkaMf1G" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "LSTM RMSE: 4.381223059835496\n", + "LSTM MAPE: 0.022886644352858705\n" + ] + } + ], + "source": [ + "from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error\n", + "import numpy as np\n", + "\n", + "# Ensure y_test and pred are aligned and scaled back\n", + "# Reshape y_test before inverse scaling\n", + "y_test_inv = scaler.inverse_transform(y_test.reshape(-1, 1))\n", + "\n", + "# Align y_test_inv and pred to have the same number of samples\n", + "# Take the last 'len(pred)' elements of y_test_inv\n", + "y_test_aligned = y_test_inv[-len(pred):]\n", + "\n", + "lstm_rmse = np.sqrt(mean_squared_error(y_test_aligned, pred))\n", + "lstm_mape = mean_absolute_percentage_error(y_test_aligned, pred)\n", + "\n", + "print(\"LSTM RMSE:\", lstm_rmse)\n", + "print(\"LSTM MAPE:\", lstm_mape)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "id": "v3cNcUXsMuJG" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Hp\\anaconda3\\Lib\\site-packages\\statsmodels\\base\\model.py:607: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals\n", + " warnings.warn(\"Maximum Likelihood optimization failed to \"\n", + "C:\\Users\\Hp\\anaconda3\\Lib\\site-packages\\statsmodels\\base\\model.py:607: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals\n", + " warnings.warn(\"Maximum Likelihood optimization failed to \"\n", + "C:\\Users\\Hp\\anaconda3\\Lib\\site-packages\\statsmodels\\base\\model.py:607: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals\n", + " warnings.warn(\"Maximum Likelihood optimization failed to \"\n", + "C:\\Users\\Hp\\anaconda3\\Lib\\site-packages\\statsmodels\\base\\model.py:607: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals\n", + " warnings.warn(\"Maximum Likelihood optimization failed to \"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ARIMA Rolling RMSE: 0.7900705786191281\n", + "ARIMA Rolling MAPE: 0.009881837288304923\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "from statsmodels.tsa.arima.model import ARIMA\n", + "from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error\n", + "\n", + "def rolling_window_arima(series, window_size=200, forecast_horizon=1):\n", + " errors_rmse, errors_mape = [], []\n", + "\n", + " # Ensure input is a numpy array\n", + " series = np.array(series)\n", + "\n", + " for i in range(window_size, len(series) - forecast_horizon):\n", + " train = series[i-window_size:i]\n", + " test = series[i:i+forecast_horizon]\n", + "\n", + " try:\n", + " model = ARIMA(train, order=(5,1,0))\n", + " model_fit = model.fit()\n", + " forecast = model_fit.forecast(steps=forecast_horizon)\n", + "\n", + " rmse = np.sqrt(mean_squared_error(test, forecast))\n", + " mape = mean_absolute_percentage_error(test, forecast)\n", + "\n", + " errors_rmse.append(rmse)\n", + " errors_mape.append(mape)\n", + " except Exception as e:\n", + " # Optional: print errors for debugging\n", + " # print(f\"Iteration {i} failed: {e}\")\n", + " continue\n", + "\n", + " # Handle case where no errors collected\n", + " if len(errors_rmse) == 0:\n", + " return np.nan, np.nan\n", + "\n", + " return np.mean(errors_rmse), np.mean(errors_mape)\n", + "\n", + "# Example usage\n", + "arima_rmse_roll, arima_mape_roll = rolling_window_arima(df['Close'], window_size=200)\n", + "print(\"ARIMA Rolling RMSE:\", arima_rmse_roll)\n", + "print(\"ARIMA Rolling MAPE:\", arima_mape_roll)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "id": "5rfxUys8nYqw" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processed window 1/2719\n", + "Processed window 20/2719\n", + "Processed window 40/2719\n", + "Processed window 60/2719\n", + "Processed window 80/2719\n", + "Processed window 100/2719\n", + "Processed window 120/2719\n", + "Processed window 140/2719\n", + "Processed window 160/2719\n", + "Processed window 180/2719\n", + "Processed window 200/2719\n", + "Processed window 220/2719\n", + "Processed window 240/2719\n", + "Processed window 260/2719\n", + "Processed window 280/2719\n", + "Processed window 300/2719\n", + "Processed window 320/2719\n", + "Processed window 340/2719\n", + "Processed window 360/2719\n", + "Processed window 380/2719\n", + "Processed window 400/2719\n", + "Processed window 420/2719\n", + "Processed window 440/2719\n", + "Processed window 460/2719\n", + "Processed window 480/2719\n", + "Processed window 500/2719\n", + "Processed window 520/2719\n", + "Processed window 540/2719\n", + "Processed window 560/2719\n", + "Processed window 580/2719\n", + "Processed window 600/2719\n", + "Processed window 620/2719\n", + "Processed window 640/2719\n", + "Processed window 660/2719\n", + "Processed window 680/2719\n", + "Processed window 700/2719\n", + "Processed window 720/2719\n", + "Processed window 740/2719\n", + "Processed window 760/2719\n", + "Processed window 780/2719\n", + "Processed window 800/2719\n", + "Processed window 820/2719\n", + "Processed window 840/2719\n", + "Processed window 860/2719\n", + "Processed window 880/2719\n", + "Processed window 900/2719\n", + "Processed window 920/2719\n", + "Processed window 940/2719\n", + "Processed window 960/2719\n", + "Processed window 980/2719\n", + "Processed window 1000/2719\n", + "Processed window 1020/2719\n", + "Processed window 1040/2719\n", + "Processed window 1060/2719\n", + "Processed window 1080/2719\n", + "Processed window 1100/2719\n", + "Processed window 1120/2719\n", + "Processed window 1140/2719\n", + "Processed window 1160/2719\n", + "Processed window 1180/2719\n", + "Processed window 1200/2719\n", + "Processed window 1220/2719\n", + "Processed window 1240/2719\n", + "Processed window 1260/2719\n", + "Processed window 1280/2719\n", + "Processed window 1300/2719\n", + "Processed window 1320/2719\n", + "Processed window 1340/2719\n", + "Processed window 1360/2719\n", + "Processed window 1380/2719\n", + "Processed window 1400/2719\n", + "Processed window 1420/2719\n", + "Processed window 1440/2719\n", + "Processed window 1460/2719\n", + "Processed window 1480/2719\n", + "Processed window 1500/2719\n", + "Processed window 1520/2719\n", + "Processed window 1540/2719\n", + "Processed window 1560/2719\n", + "Processed window 1580/2719\n", + "Processed window 1600/2719\n", + "Processed window 1620/2719\n", + "Processed window 1640/2719\n", + "Processed window 1660/2719\n", + "Processed window 1680/2719\n", + "Processed window 1700/2719\n", + "Processed window 1720/2719\n", + "Processed window 1740/2719\n", + "Processed window 1760/2719\n", + "Processed window 1780/2719\n", + "Processed window 1800/2719\n", + "Processed window 1820/2719\n", + "Processed window 1840/2719\n", + "Processed window 1860/2719\n", + "Processed window 1880/2719\n", + "Processed window 1900/2719\n", + "Processed window 1920/2719\n", + "Processed window 1940/2719\n", + "Processed window 1960/2719\n", + "Processed window 1980/2719\n", + "Processed window 2000/2719\n", + "Processed window 2020/2719\n", + "Processed window 2040/2719\n", + "Processed window 2060/2719\n", + "Processed window 2080/2719\n", + "Processed window 2100/2719\n", + "Processed window 2120/2719\n", + "Processed window 2140/2719\n", + "Processed window 2160/2719\n", + "Processed window 2180/2719\n", + "Processed window 2200/2719\n", + "Processed window 2220/2719\n", + "Processed window 2240/2719\n", + "Processed window 2260/2719\n", + "Processed window 2280/2719\n", + "Processed window 2300/2719\n", + "Processed window 2320/2719\n", + "Processed window 2340/2719\n", + "Processed window 2360/2719\n", + "Processed window 2380/2719\n", + "Processed window 2400/2719\n", + "Processed window 2420/2719\n", + "Processed window 2440/2719\n", + "Processed window 2460/2719\n", + "Processed window 2480/2719\n", + "Processed window 2500/2719\n", + "Processed window 2520/2719\n", + "Processed window 2540/2719\n", + "Processed window 2560/2719\n", + "Processed window 2580/2719\n", + "Processed window 2600/2719\n", + "Processed window 2620/2719\n", + "Processed window 2640/2719\n", + "Processed window 2660/2719\n", + "Processed window 2680/2719\n", + "Processed window 2700/2719\n", + "Processed window 2719/2719\n", + "✅ Rolling LSTM RMSE: 1.9061913541153364\n", + "✅ Rolling LSTM MAPE: 0.030438538947571786\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "from numpy.lib.stride_tricks import sliding_window_view # Import the function\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "import tensorflow as tf\n", + "from tensorflow.keras import Sequential, Input # Also import Input\n", + "\n", + "# Scale data\n", + "# scaler = MinMaxScaler()\n", + "# scaled = scaler.fit_transform(data[['Close_AAPL']]) # 'data' might not be available here\n", + "\n", + "# Create sequences\n", + "def _make_seq_xy(arr_2d, seq_len, horizon=1):\n", + " \"\"\"\n", + " arr_2d: shape (N, 1)\n", + " Returns X: (N-seq_len-horizon+1, seq_len, 1), y: (N-seq_len-horizon+1, 1)\n", + " \"\"\"\n", + " if arr_2d.ndim != 2 or arr_2d.shape[1] != 1:\n", + " raise ValueError(\"arr_2d must have shape (N, 1)\")\n", + " if len(arr_2d) <= seq_len + horizon -1: # ensure enough data for at least one sequence and its horizon target\n", + " return np.empty((0, seq_len, 1)), np.empty((0, 1))\n", + "\n", + " # X: (N-seq_len+1, seq_len)\n", + " X = sliding_window_view(arr_2d[:, 0], seq_len)\n", + "\n", + " # y: (N-seq_len, 1) - target is the value right after the sequence\n", + " y = arr_2d[seq_len:]\n", + "\n", + " # Adjust X and y to align for the given horizon\n", + " # We want sequences ending at time t, and targets at time t + horizon\n", + " # So, X should end at N - horizon, and y should start at seq_len + horizon - 1\n", + " if horizon > 0:\n", + " X = X[:-horizon]\n", + " y = y[horizon-1:-horizon+1] # Need to adjust indexing here\n", + "\n", + " X = X[..., None] # (N-seq_len-horizon+1, seq_len, 1) -> add channel\n", + "\n", + " # Recalculate y to ensure it has the same number of samples as X\n", + " y = arr_2d[seq_len + horizon - 1: len(arr_2d) - (horizon - 1 if horizon > 1 else 0)]\n", + " y = y[:len(X)] # Trim y to match X length in case of remainder\n", + "\n", + " return X, y\n", + "\n", + "\n", + "def _make_dataset(X, y, batch_size=64, shuffle=True):\n", + " ds = tf.data.Dataset.from_tensor_slices((X.astype(np.float32), y.astype(np.float32)))\n", + " if shuffle:\n", + " ds = ds.shuffle(min(len(X), 2048), seed=42)\n", + " ds = ds.batch(batch_size).prefetch(tf.data.AUTOTUNE)\n", + " return ds\n", + "\n", + "def build_lstm(seq_len=60, units=32):\n", + " model = Sequential([\n", + " Input(shape=(seq_len, 1)),\n", + " LSTM(units, return_sequences=True),\n", + " LSTM(units),\n", + " Dense(1)\n", + " ])\n", + " model.compile(optimizer=\"adam\", loss=\"mse\")\n", + " return model\n", + "\n", + "def rolling_window_lstm_fast(\n", + " data, # np.array shape (N,1) or (N,) values\n", + " seq_len=60,\n", + " window_size=200,\n", + " horizon=1,\n", + " base_epochs=5, # more epochs for the very first window\n", + " update_epochs=1, # tiny updates for subsequent windows\n", + " train_every=10, # retrain every k windows instead of every step\n", + " batch_size=64,\n", + " units=32,\n", + " use_global_scaler=True, # FAST (may leak scale info across time)\n", + " verbose=False\n", + "):\n", + " \"\"\"\n", + " Returns: (avg_RMSE, avg_MAPE) over all rolling steps\n", + " Much faster than rebuilding a model per step.\n", + " \"\"\"\n", + "\n", + " # Ensure 2D (N,1)\n", + " data = np.asarray(data)\n", + " if data.ndim == 1:\n", + " data = data.reshape(-1, 1)\n", + " if data.shape[1] != 1:\n", + " raise ValueError(\"data must be a single column array of shape (N,1)\")\n", + "\n", + " N = len(data)\n", + " if N < window_size + seq_len + horizon:\n", + " raise ValueError(\"Not enough data for the requested window_size/seq_len/horizon.\")\n", + "\n", + " # Scaler(s)\n", + " global_scaler = MinMaxScaler()\n", + " if use_global_scaler:\n", + " scaled_all = global_scaler.fit_transform(data)\n", + " else:\n", + " scaled_all = None # we’ll fit a scaler per window\n", + "\n", + " # Model (built once)\n", + " model = build_lstm(seq_len=seq_len, units=units)\n", + "\n", + " errors_rmse, errors_mape = [], []\n", + "\n", + " total_steps = (N - horizon) - window_size # number of rolling positions\n", + " # Train on the very first window\n", + " start = window_size\n", + " end = start # for progress reporting\n", + " subset = data[start - window_size:start] # (window_size, 1)\n", + "\n", + " if use_global_scaler:\n", + " subset_scaled = scaled_all[start - window_size:start]\n", + " else:\n", + " scaler = MinMaxScaler()\n", + " # fit scaler only on the training portion inside the subset to avoid leakage\n", + " fit_end = len(subset) - horizon\n", + " scaler.fit(subset[:fit_end])\n", + " subset_scaled = scaler.transform(subset)\n", + "\n", + " X, y = _make_seq_xy(subset_scaled, seq_len, horizon=horizon)\n", + " if len(X) == 0:\n", + " return np.nan, np.nan\n", + "\n", + " # Split into train/test for the first window\n", + " X_train, y_train = X[:-horizon], y[:-horizon]\n", + " X_test, y_test = X[-horizon:], y[-horizon:]\n", + "\n", + " # Train base\n", + " ds_train = _make_dataset(X_train, y_train, batch_size=batch_size, shuffle=True)\n", + " cb = [tf.keras.callbacks.EarlyStopping(monitor=\"loss\", patience=2, min_delta=1e-5, restore_best_weights=True)]\n", + " model.fit(ds_train, epochs=base_epochs, verbose=0, callbacks=cb)\n", + "\n", + " # Evaluate first window\n", + " pred = model.predict(X_test, verbose=0)\n", + " # Inverse-transform for metrics\n", + " if use_global_scaler:\n", + " pred_inv = global_scaler.inverse_transform(pred)\n", + " y_inv = global_scaler.inverse_transform(y_test)\n", + " else:\n", + " scaler.inverse_transform(pred) # Need scaler here\n", + " y_inv = scaler.inverse_transform(y_test)\n", + "\n", + " errors_rmse.append(np.sqrt(mean_squared_error(y_inv, pred_inv)))\n", + " errors_mape.append(mean_absolute_percentage_error(y_inv, pred_inv))\n", + "\n", + " if verbose:\n", + " print(f\"Processed window 1/{total_steps}\")\n", + "\n", + " # Walk forward\n", + " for step, i in enumerate(range(window_size + 1, N - horizon), start=2):\n", + " subset = data[i - window_size:i]\n", + "\n", + " if use_global_scaler:\n", + " subset_scaled = scaled_all[i - window_size:i]\n", + " else:\n", + " scaler = MinMaxScaler()\n", + " fit_end = len(subset) - horizon\n", + " scaler.fit(subset[:fit_end])\n", + " subset_scaled = scaler.transform(subset)\n", + "\n", + " X, y = _make_seq_xy(subset_scaled, seq_len, horizon=horizon)\n", + " if len(X) == 0:\n", + " continue\n", + "\n", + " X_train, y_train = X[:-horizon], y[:-horizon]\n", + " X_test, y_test = X[-horizon:], y[-horizon:]\n", + "\n", + " # Only retrain every `train_every` steps with tiny epochs (warm start)\n", + " if (step - 1) % train_every == 0:\n", + " ds_train = _make_dataset(X_train, y_train, batch_size=batch_size, shuffle=True)\n", + " model.fit(ds_train, epochs=update_epochs, verbose=0)\n", + "\n", + " pred = model.predict(X_test, verbose=0)\n", + "\n", + " if use_global_scaler:\n", + " pred_inv = global_scaler.inverse_transform(pred)\n", + " y_inv = global_scaler.inverse_transform(y_test)\n", + " else:\n", + " # Need scaler here\n", + " pred_inv = scaler.inverse_transform(pred)\n", + " y_inv = scaler.inverse_transform(y_test)\n", + "\n", + "\n", + " errors_rmse.append(np.sqrt(mean_squared_error(y_inv, pred_inv)))\n", + " errors_mape.append(mean_absolute_percentage_error(y_inv, pred_inv))\n", + "\n", + " if verbose and (step % 20 == 0 or step == total_steps):\n", + " print(f\"Processed window {step}/{total_steps}\")\n", + "\n", + " if step >= total_steps:\n", + " break\n", + "\n", + "\n", + " if not errors_rmse:\n", + " return np.nan, np.nan\n", + " return float(np.mean(errors_rmse)), float(np.mean(errors_mape))\n", + "\n", + "# ===== Example usage (keeps your original interface) =====\n", + "if \"Close\" in df.columns:\n", + " lstm_rmse_roll, lstm_mape_roll = rolling_window_lstm_fast(\n", + " df[['Close']].values,\n", + " seq_len=60,\n", + " window_size=200,\n", + " horizon=1,\n", + " base_epochs=5, # try 3–5 to get a decent base fit\n", + " update_epochs=1, # keep tiny for speed\n", + " train_every=10, # retrain every 10 steps (tune for speed vs accuracy)\n", + " batch_size=128, # bigger batch often faster on GPU\n", + " units=32, # smaller than 50 for speed\n", + " use_global_scaler=True, # fastest; set False to avoid scaling leakage\n", + " verbose=True\n", + " )\n", + " print(\"✅ Rolling LSTM RMSE:\", lstm_rmse_roll)\n", + " print(\"✅ Rolling LSTM MAPE:\", lstm_mape_roll)\n", + "else:\n", + " print(\"❌ Error: 'Close' column not found in data.\")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "id": "ia9hsAicy4TW" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Model RMSE MAPE Rolling RMSE Rolling MAPE\n", + "0 ARIMA 12.9555 0.0801 0.7913 0.0099\n", + "1 LSTM 4.3812 0.0229 1.9062 0.0304\n" + ] + } + ], + "source": [ + "import warnings\n", + "warnings.filterwarnings(\"ignore\") # silence statsmodels convergence warnings\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error\n", + "from statsmodels.tsa.arima.model import ARIMA\n", + "\n", + "# -----------------------------\n", + "# Utility: safe fetch a variable if it exists\n", + "# -----------------------------\n", + "def _get_if_defined(name, default=np.nan):\n", + " try:\n", + " return eval(name)\n", + " except NameError:\n", + " return default\n", + "\n", + "# -----------------------------\n", + "# Rolling-window ARIMA\n", + "# -----------------------------\n", + "def rolling_window_arima(series, window_size=200, forecast_horizon=1, order=(5,1,0)):\n", + " \"\"\"\n", + " Walks forward one step at a time.\n", + " For each position: fit ARIMA on the last `window_size` points; forecast `forecast_horizon`.\n", + " Returns mean RMSE/MAPE across all steps (ignores steps that failed to fit).\n", + " \"\"\"\n", + " errors_rmse, errors_mape = [], []\n", + "\n", + " series = np.asarray(series, dtype=np.float64).ravel()\n", + " if len(series) < window_size + forecast_horizon:\n", + " return np.nan, np.nan\n", + "\n", + " for i in range(window_size, len(series) - forecast_horizon + 1):\n", + " train = series[i-window_size:i]\n", + " test = series[i:i+forecast_horizon]\n", + " try:\n", + " model = ARIMA(train, order=order, enforce_stationarity=False, enforce_invertibility=False)\n", + " model_fit = model.fit(method_kwargs={\"warn_convergence\": False})\n", + " forecast = model_fit.forecast(steps=forecast_horizon)\n", + " rmse = np.sqrt(mean_squared_error(test, forecast))\n", + " mape = mean_absolute_percentage_error(test, forecast)\n", + " errors_rmse.append(rmse)\n", + " errors_mape.append(mape)\n", + " except Exception:\n", + " # skip windows that fail to converge\n", + " continue\n", + "\n", + " if not errors_rmse:\n", + " return np.nan, np.nan\n", + " return float(np.mean(errors_rmse)), float(np.mean(errors_mape))\n", + "\n", + "# -----------------------------\n", + "# Holdout ARIMA (single fit)\n", + "# -----------------------------\n", + "def holdout_arima(series, test_size=60, order=(5,1,0)):\n", + " \"\"\"\n", + " Fit ARIMA on all but last `test_size` points; forecast next `test_size`.\n", + " Returns RMSE, MAPE on the holdout.\n", + " \"\"\"\n", + " series = np.asarray(series, dtype=np.float64).ravel()\n", + " if len(series) <= test_size + 5: # need a bit of room to fit\n", + " return np.nan, np.nan\n", + "\n", + " train, test = series[:-test_size], series[-test_size:]\n", + " try:\n", + " model = ARIMA(train, order=order, enforce_stationarity=False, enforce_invertibility=False)\n", + " model_fit = model.fit(method_kwargs={\"warn_convergence\": False})\n", + " forecast = model_fit.forecast(steps=len(test))\n", + " rmse = np.sqrt(mean_squared_error(test, forecast))\n", + " mape = mean_absolute_percentage_error(test, forecast)\n", + " return float(rmse), float(mape)\n", + " except Exception:\n", + " return np.nan, np.nan\n", + "\n", + "# =========================================================\n", + "# Compute metrics\n", + "# =========================================================\n", + "if \"Close\" not in df.columns:\n", + " raise ValueError(\"❌ Error: 'Close' column not found in data.\")\n", + "\n", + "close_series = df[\"Close\"].astype(float).values\n", + "\n", + "# ARIMA metrics\n", + "arima_rmse, arima_mape = holdout_arima(close_series, test_size=60, order=(5,1,0))\n", + "arima_rmse_roll, arima_mape_roll = rolling_window_arima(close_series, window_size=200, forecast_horizon=1, order=(5,1,0))\n", + "\n", + "# LSTM metrics:\n", + "# If you already computed these earlier (e.g., from your LSTM code), they will be picked up.\n", + "# Otherwise, they will default to NaN and the table will still render.\n", + "lstm_rmse = _get_if_defined(\"lstm_rmse\", np.nan)\n", + "lstm_mape = _get_if_defined(\"lstm_mape\", np.nan)\n", + "lstm_rmse_roll = _get_if_defined(\"lstm_rmse_roll\", np.nan)\n", + "lstm_mape_roll = _get_if_defined(\"lstm_mape_roll\", np.nan)\n", + "\n", + "# =========================================================\n", + "# Performance comparison table\n", + "# =========================================================\n", + "results = {\n", + " \"Model\": [\"ARIMA\", \"LSTM\"],\n", + " \"RMSE\": [arima_rmse, lstm_rmse],\n", + " \"MAPE\": [arima_mape, lstm_mape],\n", + " \"Rolling RMSE\": [arima_rmse_roll, lstm_rmse_roll],\n", + " \"Rolling MAPE\": [arima_mape_roll, lstm_mape_roll],\n", + "}\n", + "\n", + "df_results = pd.DataFrame(results)\n", + "\n", + "# Optional: nice formatting (4-decimal precision)\n", + "df_results_fmt = df_results.copy()\n", + "for col in [\"RMSE\", \"MAPE\", \"Rolling RMSE\", \"Rolling MAPE\"]:\n", + " df_results_fmt[col] = df_results_fmt[col].apply(lambda x: f\"{x:.4f}\" if pd.notnull(x) else \"NaN\")\n", + "\n", + "print(df_results_fmt)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "id": "SUqnlUkYcTcA" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: huggingface_hub in c:\\users\\hp\\anaconda3\\lib\\site-packages (0.35.3)\n", + "Requirement already satisfied: filelock in c:\\users\\hp\\anaconda3\\lib\\site-packages (from huggingface_hub) (3.13.1)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from huggingface_hub) (2024.3.1)\n", + "Requirement already satisfied: packaging>=20.9 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from huggingface_hub) (23.2)\n", + "Requirement already satisfied: pyyaml>=5.1 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from huggingface_hub) (6.0.1)\n", + "Requirement already satisfied: requests in c:\\users\\hp\\anaconda3\\lib\\site-packages (from huggingface_hub) (2.32.2)\n", + "Requirement already satisfied: tqdm>=4.42.1 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from huggingface_hub) (4.66.4)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from huggingface_hub) (4.15.0)\n", + "Requirement already satisfied: colorama in c:\\users\\hp\\anaconda3\\lib\\site-packages (from tqdm>=4.42.1->huggingface_hub) (0.4.6)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from requests->huggingface_hub) (2.0.4)\n", + "Requirement already satisfied: idna<4,>=2.5 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from requests->huggingface_hub) (3.7)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from requests->huggingface_hub) (2.2.2)\n", + "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from requests->huggingface_hub) (2025.1.31)\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "93e8b85b39384a07bf68e19f7eba097d", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "VBox(children=(HTML(value='