sycod
/

frugal_cviz

Model card Files Files and versions Community

sycod commited on 28 days ago

Commit

6ebb6d1

1 Parent(s): d20ece8

eda begun

Browse files

Files changed (9) hide show

.gitignore +8 -10
EDA.ipynb +0 -0
config.yaml +8 -21
notebooks/template-audio.ipynb +0 -1351
notebooks/template-image.ipynb +0 -475
notebooks/template-text.ipynb +0 -1642
src/load_data.py +97 -0
src/models.py +395 -0
tasks/utils/load_data.py +0 -59

.gitignore CHANGED Viewed

@@ -1,19 +1,17 @@
-.ipynb_checkpoints/sandbox-checkpoint.ipynb
-.DS_Store
-auto_evals/
-venv/
 __pycache__/
 .env
 .ipynb_checkpoints
-.vscode/
 .venv
 eval-queue/
 eval-results/
 eval-queue-bk/
 eval-results-bk/
 logs/
-emissions.csv
-data/
-pyro-sdis/

 __pycache__/
+.DS_Store
 .env
 .ipynb_checkpoints
+.ipynb_checkpoints/sandbox-checkpoint.ipynb
 .venv
+.vscode/
+auto_evals/
+data/
+emissions.csv
 eval-queue/
 eval-results/
 eval-queue-bk/
 eval-results-bk/
 logs/
+pyro-sdis/
+venv/

EDA.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

config.yaml CHANGED Viewed

@@ -1,23 +1,10 @@
-data:
-  local_path: "data"
-  img_dir: "Images"
-  annot_dir: "Annotation"
-  img_db_uri: "img_db.csv"
-  train_dir: "train"
-  test_dir: "test"
-  checkpoint_dir : "model_chkpts"
-  app_dir: "app"
-log:
-  log_dir: "logs"
-models:
-  classes_3: ["pug", "Siberian_husky", "borzoi"]
-  classes_10: ["Leonberg", "basenji", "malamute", "papillon", "chow", "dhole", "dingo", "Cardigan", "Brabancon_griffon", "boxer"]
-app_data:
-  local_path: "app_data"
-  model: "EfficientNetB0_app.keras"
-  onnx: "EfficientNetB0_app.onnx"
-  # breeds are not in the same order as original classes
-  breeds: ['Brabancon_griffon', 'Cardigan', 'Leonberg', 'basenji', 'boxer', 'chow', 'dhole', 'dingo', 'malamute', 'papillon']

+data_dir: "data"
+db_info_uri: "data_info.csv"
+# log:
+#   log_dir: "logs"
+# app_data:
+#   local_path: "app_data"
+#   model: "EfficientNetB0_app.keras"
+#   onnx: "EfficientNetB0_app.onnx"

notebooks/template-audio.ipynb DELETED Viewed

@@ -1,1351 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Text task notebook template\n",
-    "## Loading the necessary libraries"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "[codecarbon WARNING @ 19:48:07] Multiple instances of codecarbon are allowed to run at the same time.\n",
-      "[codecarbon INFO @ 19:48:07] [setup] RAM Tracking...\n",
-      "[codecarbon INFO @ 19:48:07] [setup] CPU Tracking...\n",
-      "[codecarbon WARNING @ 19:48:09] We saw that you have a 13th Gen Intel(R) Core(TM) i7-1365U but we don't know it. Please contact us.\n",
-      "[codecarbon WARNING @ 19:48:09] No CPU tracking mode found. Falling back on CPU constant mode. \n",
-      " Windows OS detected: Please install Intel Power Gadget to measure CPU\n",
-      "\n",
-      "[codecarbon WARNING @ 19:48:11] We saw that you have a 13th Gen Intel(R) Core(TM) i7-1365U but we don't know it. Please contact us.\n",
-      "[codecarbon INFO @ 19:48:11] CPU Model on constant consumption mode: 13th Gen Intel(R) Core(TM) i7-1365U\n",
-      "[codecarbon WARNING @ 19:48:11] No CPU tracking mode found. Falling back on CPU constant mode.\n",
-      "[codecarbon INFO @ 19:48:11] [setup] GPU Tracking...\n",
-      "[codecarbon INFO @ 19:48:11] No GPU found.\n",
-      "[codecarbon INFO @ 19:48:11] >>> Tracker's metadata:\n",
-      "[codecarbon INFO @ 19:48:11]   Platform system: Windows-11-10.0.22631-SP0\n",
-      "[codecarbon INFO @ 19:48:11]   Python version: 3.12.7\n",
-      "[codecarbon INFO @ 19:48:11]   CodeCarbon version: 3.0.0_rc0\n",
-      "[codecarbon INFO @ 19:48:11]   Available RAM : 31.347 GB\n",
-      "[codecarbon INFO @ 19:48:11]   CPU count: 12\n",
-      "[codecarbon INFO @ 19:48:11]   CPU model: 13th Gen Intel(R) Core(TM) i7-1365U\n",
-      "[codecarbon INFO @ 19:48:11]   GPU count: None\n",
-      "[codecarbon INFO @ 19:48:11]   GPU model: None\n",
-      "[codecarbon INFO @ 19:48:11] Saving emissions data to file c:\\git\\submission-template\\notebooks\\emissions.csv\n"
-     ]
-    }
-   ],
-   "source": [
-    "from fastapi import APIRouter\n",
-    "from datetime import datetime\n",
-    "from datasets import load_dataset\n",
-    "from sklearn.metrics import accuracy_score\n",
-    "import random\n",
-    "\n",
-    "import sys\n",
-    "sys.path.append('../tasks')\n",
-    "\n",
-    "from utils.evaluation import AudioEvaluationRequest\n",
-    "from utils.emissions import tracker, clean_emissions_data, get_space_info\n",
-    "\n",
-    "\n",
-    "# Define the label mapping\n",
-    "LABEL_MAPPING = {\n",
-    "    \"chainsaw\": 0,\n",
-    "    \"environment\": 1\n",
-    "}"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Loading the datasets and splitting them"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "668da7bf85434e098b95c3ec447d78fe",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "README.md:   0%|          | 0.00/5.18k [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "c:\\Users\\theo.alvesdacosta\\AppData\\Local\\anaconda3\\Lib\\site-packages\\huggingface_hub\\file_download.py:139: UserWarning: `huggingface_hub` cache-system uses symlinks by default to efficiently store duplicated files but your machine does not support them in C:\\Users\\theo.alvesdacosta\\.cache\\huggingface\\hub\\datasets--QuotaClimat--frugalaichallenge-text-train. Caching files will still work but in a degraded version that might require more space on your disk. This warning can be disabled by setting the `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For more details, see https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations.\n",
-      "To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development\n",
-      "  warnings.warn(message)\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "5b68d43359eb429395da8be7d4b15556",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "train.parquet:   0%|          | 0.00/1.21M [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "140a304773914e9db8f698eabeb40298",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Generating train split:   0%|          | 0/6091 [00:00<?, ? examples/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "6d04e8ab1906400e8e0029949dc523a5",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Map:   0%|          | 0/6091 [00:00<?, ? examples/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "request = AudioEvaluationRequest()\n",
-    "\n",
-    "# Load and prepare the dataset\n",
-    "dataset = load_dataset(request.dataset_name)\n",
-    "\n",
-    "# Split dataset\n",
-    "train_test = dataset[\"train\"].train_test_split(test_size=request.test_size, seed=request.test_seed)\n",
-    "test_dataset = train_test[\"test\"]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Random Baseline"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Start tracking emissions\n",
-    "tracker.start()\n",
-    "tracker.start_task(\"inference\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[1,\n",
-       " 7,\n",
-       " 6,\n",
-       " 6,\n",
-       " 2,\n",
-       " 0,\n",
-       " 1,\n",
-       " 7,\n",
-       " 3,\n",
-       " 6,\n",
-       " 6,\n",
-       " 3,\n",
-       " 6,\n",
-       " 6,\n",
-       " 5,\n",
-       " 0,\n",
-       " 2,\n",
-       " 6,\n",
-       " 2,\n",
-       " 6,\n",
-       " 5,\n",
-       " 4,\n",
-       " 1,\n",
-       " 3,\n",
-       " 6,\n",
-       " 4,\n",
-       " 2,\n",
-       " 1,\n",
-       " 4,\n",
-       " 0,\n",
-       " 3,\n",
-       " 4,\n",
-       " 1,\n",
-       " 5,\n",
-       " 5,\n",
-       " 1,\n",
-       " 2,\n",
-       " 7,\n",
-       " 6,\n",
-       " 1,\n",
-       " 3,\n",
-       " 1,\n",
-       " 7,\n",
-       " 7,\n",
-       " 0,\n",
-       " 0,\n",
-       " 3,\n",
-       " 3,\n",
-       " 3,\n",
-       " 4,\n",
-       " 1,\n",
-       " 4,\n",
-       " 4,\n",
-       " 1,\n",
-       " 4,\n",
-       " 5,\n",
-       " 6,\n",
-       " 1,\n",
-       " 2,\n",
-       " 2,\n",
-       " 2,\n",
-       " 5,\n",
-       " 2,\n",
-       " 7,\n",
-       " 2,\n",
-       " 7,\n",
-       " 7,\n",
-       " 6,\n",
-       " 4,\n",
-       " 2,\n",
-       " 0,\n",
-       " 1,\n",
-       " 6,\n",
-       " 3,\n",
-       " 2,\n",
-       " 5,\n",
-       " 5,\n",
-       " 2,\n",
-       " 0,\n",
-       " 7,\n",
-       " 0,\n",
-       " 1,\n",
-       " 5,\n",
-       " 5,\n",
-       " 7,\n",
-       " 4,\n",
-       " 6,\n",
-       " 7,\n",
-       " 1,\n",
-       " 7,\n",
-       " 1,\n",
-       " 0,\n",
-       " 3,\n",
-       " 4,\n",
-       " 2,\n",
-       " 5,\n",
-       " 3,\n",
-       " 3,\n",
-       " 3,\n",
-       " 2,\n",
-       " 2,\n",
-       " 1,\n",
-       " 0,\n",
-       " 4,\n",
-       " 5,\n",
-       " 7,\n",
-       " 0,\n",
-       " 3,\n",
-       " 1,\n",
-       " 4,\n",
-       " 6,\n",
-       " 0,\n",
-       " 7,\n",
-       " 1,\n",
-       " 1,\n",
-       " 2,\n",
-       " 2,\n",
-       " 4,\n",
-       " 0,\n",
-       " 4,\n",
-       " 3,\n",
-       " 4,\n",
-       " 4,\n",
-       " 2,\n",
-       " 2,\n",
-       " 3,\n",
-       " 3,\n",
-       " 7,\n",
-       " 4,\n",
-       " 7,\n",
-       " 6,\n",
-       " 4,\n",
-       " 5,\n",
-       " 4,\n",
-       " 3,\n",
-       " 6,\n",
-       " 0,\n",
-       " 4,\n",
-       " 0,\n",
-       " 1,\n",
-       " 3,\n",
-       " 6,\n",
-       " 7,\n",
-       " 3,\n",
-       " 3,\n",
-       " 0,\n",
-       " 1,\n",
-       " 2,\n",
-       " 4,\n",
-       " 4,\n",
-       " 3,\n",
-       " 1,\n",
-       " 2,\n",
-       " 4,\n",
-       " 3,\n",
-       " 0,\n",
-       " 5,\n",
-       " 3,\n",
-       " 6,\n",
-       " 3,\n",
-       " 6,\n",
-       " 1,\n",
-       " 3,\n",
-       " 4,\n",
-       " 5,\n",
-       " 4,\n",
-       " 0,\n",
-       " 7,\n",
-       " 3,\n",
-       " 6,\n",
-       " 7,\n",
-       " 4,\n",
-       " 4,\n",
-       " 5,\n",
-       " 3,\n",
-       " 1,\n",
-       " 7,\n",
-       " 4,\n",
-       " 1,\n",
-       " 0,\n",
-       " 3,\n",
-       " 0,\n",
-       " 5,\n",
-       " 3,\n",
-       " 6,\n",
-       " 3,\n",
-       " 0,\n",
-       " 7,\n",
-       " 2,\n",
-       " 0,\n",
-       " 4,\n",
-       " 1,\n",
-       " 2,\n",
-       " 6,\n",
-       " 3,\n",
-       " 4,\n",
-       " 4,\n",
-       " 5,\n",
-       " 1,\n",
-       " 5,\n",
-       " 4,\n",
-       " 0,\n",
-       " 1,\n",
-       " 7,\n",
-       " 3,\n",
-       " 6,\n",
-       " 0,\n",
-       " 7,\n",
-       " 4,\n",
-       " 6,\n",
-       " 3,\n",
-       " 0,\n",
-       " 0,\n",
-       " 4,\n",
-       " 6,\n",
-       " 6,\n",
-       " 4,\n",
-       " 0,\n",
-       " 5,\n",
-       " 7,\n",
-       " 5,\n",
-       " 1,\n",
-       " 3,\n",
-       " 6,\n",
-       " 2,\n",
-       " 3,\n",
-       " 2,\n",
-       " 4,\n",
-       " 5,\n",
-       " 1,\n",
-       " 5,\n",
-       " 0,\n",
-       " 3,\n",
-       " 3,\n",
-       " 0,\n",
-       " 0,\n",
-       " 6,\n",
-       " 6,\n",
-       " 2,\n",
-       " 0,\n",
-       " 7,\n",
-       " 4,\n",
-       " 5,\n",
-       " 7,\n",
-       " 1,\n",
-       " 0,\n",
-       " 4,\n",
-       " 5,\n",
-       " 1,\n",
-       " 7,\n",
-       " 0,\n",
-       " 7,\n",
-       " 2,\n",
-       " 6,\n",
-       " 1,\n",
-       " 3,\n",
-       " 5,\n",
-       " 5,\n",
-       " 6,\n",
-       " 5,\n",
-       " 4,\n",
-       " 3,\n",
-       " 7,\n",
-       " 4,\n",
-       " 3,\n",
-       " 5,\n",
-       " 5,\n",
-       " 7,\n",
-       " 2,\n",
-       " 6,\n",
-       " 1,\n",
-       " 5,\n",
-       " 0,\n",
-       " 3,\n",
-       " 4,\n",
-       " 2,\n",
-       " 3,\n",
-       " 7,\n",
-       " 0,\n",
-       " 1,\n",
-       " 7,\n",
-       " 6,\n",
-       " 7,\n",
-       " 7,\n",
-       " 5,\n",
-       " 6,\n",
-       " 3,\n",
-       " 2,\n",
-       " 3,\n",
-       " 0,\n",
-       " 4,\n",
-       " 3,\n",
-       " 5,\n",
-       " 6,\n",
-       " 0,\n",
-       " 0,\n",
-       " 6,\n",
-       " 6,\n",
-       " 1,\n",
-       " 4,\n",
-       " 0,\n",
-       " 4,\n",
-       " 2,\n",
-       " 7,\n",
-       " 5,\n",
-       " 7,\n",
-       " 6,\n",
-       " 3,\n",
-       " 5,\n",
-       " 6,\n",
-       " 0,\n",
-       " 4,\n",
-       " 5,\n",
-       " 6,\n",
-       " 1,\n",
-       " 2,\n",
-       " 1,\n",
-       " 5,\n",
-       " 3,\n",
-       " 0,\n",
-       " 3,\n",
-       " 7,\n",
-       " 1,\n",
-       " 0,\n",
-       " 7,\n",
-       " 0,\n",
-       " 1,\n",
-       " 0,\n",
-       " 4,\n",
-       " 1,\n",
-       " 1,\n",
-       " 0,\n",
-       " 7,\n",
-       " 1,\n",
-       " 0,\n",
-       " 7,\n",
-       " 6,\n",
-       " 2,\n",
-       " 3,\n",
-       " 7,\n",
-       " 4,\n",
-       " 3,\n",
-       " 4,\n",
-       " 3,\n",
-       " 3,\n",
-       " 2,\n",
-       " 5,\n",
-       " 1,\n",
-       " 5,\n",
-       " 1,\n",
-       " 7,\n",
-       " 3,\n",
-       " 2,\n",
-       " 6,\n",
-       " 4,\n",
-       " 4,\n",
-       " 1,\n",
-       " 2,\n",
-       " 6,\n",
-       " 7,\n",
-       " 2,\n",
-       " 7,\n",
-       " 1,\n",
-       " 3,\n",
-       " 5,\n",
-       " 2,\n",
-       " 6,\n",
-       " 4,\n",
-       " 6,\n",
-       " 7,\n",
-       " 0,\n",
-       " 5,\n",
-       " 1,\n",
-       " 6,\n",
-       " 5,\n",
-       " 3,\n",
-       " 6,\n",
-       " 5,\n",
-       " 4,\n",
-       " 7,\n",
-       " 6,\n",
-       " 5,\n",
-       " 4,\n",
-       " 3,\n",
-       " 0,\n",
-       " 0,\n",
-       " 1,\n",
-       " 7,\n",
-       " 7,\n",
-       " 6,\n",
-       " 1,\n",
-       " 4,\n",
-       " 5,\n",
-       " 6,\n",
-       " 1,\n",
-       " 5,\n",
-       " 1,\n",
-       " 2,\n",
-       " 6,\n",
-       " 2,\n",
-       " 6,\n",
-       " 0,\n",
-       " 2,\n",
-       " 1,\n",
-       " 5,\n",
-       " 5,\n",
-       " 1,\n",
-       " 7,\n",
-       " 0,\n",
-       " 5,\n",
-       " 5,\n",
-       " 1,\n",
-       " 7,\n",
-       " 7,\n",
-       " 2,\n",
-       " 1,\n",
-       " 0,\n",
-       " 1,\n",
-       " 0,\n",
-       " 5,\n",
-       " 4,\n",
-       " 2,\n",
-       " 7,\n",
-       " 4,\n",
-       " 3,\n",
-       " 6,\n",
-       " 7,\n",
-       " 5,\n",
-       " 1,\n",
-       " 0,\n",
-       " 7,\n",
-       " 2,\n",
-       " 1,\n",
-       " 2,\n",
-       " 3,\n",
-       " 1,\n",
-       " 0,\n",
-       " 3,\n",
-       " 2,\n",
-       " 6,\n",
-       " 0,\n",
-       " 5,\n",
-       " 4,\n",
-       " 7,\n",
-       " 1,\n",
-       " 1,\n",
-       " 0,\n",
-       " 7,\n",
-       " 0,\n",
-       " 6,\n",
-       " 7,\n",
-       " 6,\n",
-       " 1,\n",
-       " 5,\n",
-       " 5,\n",
-       " 7,\n",
-       " 6,\n",
-       " 1,\n",
-       " 7,\n",
-       " 6,\n",
-       " 5,\n",
-       " 4,\n",
-       " 1,\n",
-       " 4,\n",
-       " 7,\n",
-       " 5,\n",
-       " 4,\n",
-       " 0,\n",
-       " 0,\n",
-       " 7,\n",
-       " 0,\n",
-       " 0,\n",
-       " 3,\n",
-       " 6,\n",
-       " 2,\n",
-       " 5,\n",
-       " 3,\n",
-       " 0,\n",
-       " 3,\n",
-       " 6,\n",
-       " 5,\n",
-       " 7,\n",
-       " 2,\n",
-       " 6,\n",
-       " 7,\n",
-       " 5,\n",
-       " 2,\n",
-       " 3,\n",
-       " 6,\n",
-       " 7,\n",
-       " 7,\n",
-       " 7,\n",
-       " 6,\n",
-       " 1,\n",
-       " 7,\n",
-       " 4,\n",
-       " 2,\n",
-       " 7,\n",
-       " 5,\n",
-       " 4,\n",
-       " 1,\n",
-       " 2,\n",
-       " 3,\n",
-       " 7,\n",
-       " 0,\n",
-       " 2,\n",
-       " 7,\n",
-       " 6,\n",
-       " 1,\n",
-       " 4,\n",
-       " 0,\n",
-       " 6,\n",
-       " 3,\n",
-       " 1,\n",
-       " 0,\n",
-       " 3,\n",
-       " 4,\n",
-       " 7,\n",
-       " 7,\n",
-       " 4,\n",
-       " 2,\n",
-       " 1,\n",
-       " 0,\n",
-       " 5,\n",
-       " 1,\n",
-       " 7,\n",
-       " 4,\n",
-       " 6,\n",
-       " 7,\n",
-       " 7,\n",
-       " 3,\n",
-       " 4,\n",
-       " 3,\n",
-       " 5,\n",
-       " 4,\n",
-       " 4,\n",
-       " 5,\n",
-       " 0,\n",
-       " 1,\n",
-       " 3,\n",
-       " 7,\n",
-       " 5,\n",
-       " 4,\n",
-       " 7,\n",
-       " 3,\n",
-       " 3,\n",
-       " 3,\n",
-       " 5,\n",
-       " 3,\n",
-       " 3,\n",
-       " 4,\n",
-       " 0,\n",
-       " 1,\n",
-       " 7,\n",
-       " 4,\n",
-       " 7,\n",
-       " 7,\n",
-       " 5,\n",
-       " 0,\n",
-       " 0,\n",
-       " 5,\n",
-       " 2,\n",
-       " 6,\n",
-       " 2,\n",
-       " 6,\n",
-       " 7,\n",
-       " 6,\n",
-       " 5,\n",
-       " 7,\n",
-       " 5,\n",
-       " 7,\n",
-       " 1,\n",
-       " 6,\n",
-       " 6,\n",
-       " 0,\n",
-       " 4,\n",
-       " 7,\n",
-       " 3,\n",
-       " 0,\n",
-       " 0,\n",
-       " 2,\n",
-       " 5,\n",
-       " 2,\n",
-       " 3,\n",
-       " 7,\n",
-       " 1,\n",
-       " 0,\n",
-       " 3,\n",
-       " 0,\n",
-       " 0,\n",
-       " 3,\n",
-       " 3,\n",
-       " 7,\n",
-       " 3,\n",
-       " 0,\n",
-       " 1,\n",
-       " 1,\n",
-       " 6,\n",
-       " 0,\n",
-       " 0,\n",
-       " 5,\n",
-       " 0,\n",
-       " 3,\n",
-       " 4,\n",
-       " 6,\n",
-       " 7,\n",
-       " 4,\n",
-       " 0,\n",
-       " 4,\n",
-       " 4,\n",
-       " 5,\n",
-       " 4,\n",
-       " 4,\n",
-       " 3,\n",
-       " 6,\n",
-       " 5,\n",
-       " 2,\n",
-       " 0,\n",
-       " 6,\n",
-       " 0,\n",
-       " 6,\n",
-       " 4,\n",
-       " 3,\n",
-       " 5,\n",
-       " 7,\n",
-       " 7,\n",
-       " 5,\n",
-       " 5,\n",
-       " 1,\n",
-       " 5,\n",
-       " 2,\n",
-       " 7,\n",
-       " 7,\n",
-       " 6,\n",
-       " 6,\n",
-       " 7,\n",
-       " 6,\n",
-       " 5,\n",
-       " 2,\n",
-       " 4,\n",
-       " 0,\n",
-       " 4,\n",
-       " 4,\n",
-       " 7,\n",
-       " 5,\n",
-       " 2,\n",
-       " 7,\n",
-       " 0,\n",
-       " 6,\n",
-       " 0,\n",
-       " 2,\n",
-       " 6,\n",
-       " 6,\n",
-       " 2,\n",
-       " 3,\n",
-       " 0,\n",
-       " 5,\n",
-       " 0,\n",
-       " 5,\n",
-       " 7,\n",
-       " 2,\n",
-       " 7,\n",
-       " 4,\n",
-       " 7,\n",
-       " 4,\n",
-       " 0,\n",
-       " 7,\n",
-       " 1,\n",
-       " 4,\n",
-       " 5,\n",
-       " 0,\n",
-       " 5,\n",
-       " 5,\n",
-       " 2,\n",
-       " 0,\n",
-       " 2,\n",
-       " 5,\n",
-       " 5,\n",
-       " 6,\n",
-       " 3,\n",
-       " 4,\n",
-       " 1,\n",
-       " 7,\n",
-       " 7,\n",
-       " 2,\n",
-       " 3,\n",
-       " 2,\n",
-       " 5,\n",
-       " 0,\n",
-       " 7,\n",
-       " 2,\n",
-       " 3,\n",
-       " 7,\n",
-       " 2,\n",
-       " 4,\n",
-       " 0,\n",
-       " 5,\n",
-       " 7,\n",
-       " 3,\n",
-       " 6,\n",
-       " 7,\n",
-       " 6,\n",
-       " 4,\n",
-       " 3,\n",
-       " 6,\n",
-       " 5,\n",
-       " 4,\n",
-       " 0,\n",
-       " 3,\n",
-       " 4,\n",
-       " 3,\n",
-       " 5,\n",
-       " 2,\n",
-       " 4,\n",
-       " 0,\n",
-       " 3,\n",
-       " 6,\n",
-       " 1,\n",
-       " 3,\n",
-       " 1,\n",
-       " 4,\n",
-       " 3,\n",
-       " 3,\n",
-       " 3,\n",
-       " 0,\n",
-       " 7,\n",
-       " 6,\n",
-       " 2,\n",
-       " 4,\n",
-       " 6,\n",
-       " 5,\n",
-       " 4,\n",
-       " 1,\n",
-       " 7,\n",
-       " 6,\n",
-       " 1,\n",
-       " 4,\n",
-       " 3,\n",
-       " 0,\n",
-       " 7,\n",
-       " 3,\n",
-       " 1,\n",
-       " 2,\n",
-       " 1,\n",
-       " 6,\n",
-       " 4,\n",
-       " 7,\n",
-       " 1,\n",
-       " 7,\n",
-       " 1,\n",
-       " 5,\n",
-       " 1,\n",
-       " 6,\n",
-       " 3,\n",
-       " 0,\n",
-       " 2,\n",
-       " 6,\n",
-       " 7,\n",
-       " 7,\n",
-       " 0,\n",
-       " 1,\n",
-       " 4,\n",
-       " 0,\n",
-       " 4,\n",
-       " 5,\n",
-       " 3,\n",
-       " 6,\n",
-       " 2,\n",
-       " 3,\n",
-       " 4,\n",
-       " 1,\n",
-       " 6,\n",
-       " 2,\n",
-       " 4,\n",
-       " 4,\n",
-       " 6,\n",
-       " 4,\n",
-       " 5,\n",
-       " 7,\n",
-       " 1,\n",
-       " 7,\n",
-       " 7,\n",
-       " 4,\n",
-       " 7,\n",
-       " 4,\n",
-       " 3,\n",
-       " 3,\n",
-       " 6,\n",
-       " 1,\n",
-       " 2,\n",
-       " 0,\n",
-       " 0,\n",
-       " 0,\n",
-       " 2,\n",
-       " 5,\n",
-       " 6,\n",
-       " 5,\n",
-       " 7,\n",
-       " 5,\n",
-       " 7,\n",
-       " 1,\n",
-       " 1,\n",
-       " 2,\n",
-       " 1,\n",
-       " 6,\n",
-       " 5,\n",
-       " 7,\n",
-       " 0,\n",
-       " 0,\n",
-       " 5,\n",
-       " 5,\n",
-       " 0,\n",
-       " 3,\n",
-       " 7,\n",
-       " 5,\n",
-       " 2,\n",
-       " 5,\n",
-       " 4,\n",
-       " 2,\n",
-       " 3,\n",
-       " 6,\n",
-       " 2,\n",
-       " 3,\n",
-       " 6,\n",
-       " 0,\n",
-       " 0,\n",
-       " 2,\n",
-       " 6,\n",
-       " 0,\n",
-       " 1,\n",
-       " 3,\n",
-       " 3,\n",
-       " 6,\n",
-       " 4,\n",
-       " 6,\n",
-       " 4,\n",
-       " 6,\n",
-       " 0,\n",
-       " 0,\n",
-       " 2,\n",
-       " 3,\n",
-       " 6,\n",
-       " 2,\n",
-       " 2,\n",
-       " 6,\n",
-       " 6,\n",
-       " 2,\n",
-       " 4,\n",
-       " 3,\n",
-       " 3,\n",
-       " 6,\n",
-       " 7,\n",
-       " 7,\n",
-       " 1,\n",
-       " 1,\n",
-       " 7,\n",
-       " 7,\n",
-       " 6,\n",
-       " 1,\n",
-       " 7,\n",
-       " 0,\n",
-       " 0,\n",
-       " 2,\n",
-       " 4,\n",
-       " 2,\n",
-       " 2,\n",
-       " 3,\n",
-       " 0,\n",
-       " 1,\n",
-       " 4,\n",
-       " 0,\n",
-       " 4,\n",
-       " 6,\n",
-       " 5,\n",
-       " 3,\n",
-       " 2,\n",
-       " 3,\n",
-       " 2,\n",
-       " 3,\n",
-       " 6,\n",
-       " 2,\n",
-       " 1,\n",
-       " 4,\n",
-       " 7,\n",
-       " 6,\n",
-       " 4,\n",
-       " 5,\n",
-       " 6,\n",
-       " 7,\n",
-       " 7,\n",
-       " 2,\n",
-       " 0,\n",
-       " 5,\n",
-       " 5,\n",
-       " 0,\n",
-       " 3,\n",
-       " 6,\n",
-       " 6,\n",
-       " 5,\n",
-       " 4,\n",
-       " 4,\n",
-       " 7,\n",
-       " 0,\n",
-       " 5,\n",
-       " 1,\n",
-       " 7,\n",
-       " 0,\n",
-       " 3,\n",
-       " 1,\n",
-       " 7,\n",
-       " 0,\n",
-       " 1,\n",
-       " 4,\n",
-       " 7,\n",
-       " 5,\n",
-       " 0,\n",
-       " 4,\n",
-       " 0,\n",
-       " 0,\n",
-       " 1,\n",
-       " 0,\n",
-       " 6,\n",
-       " 4,\n",
-       " 0,\n",
-       " 5,\n",
-       " 4,\n",
-       " 6,\n",
-       " 6,\n",
-       " 7,\n",
-       " 2,\n",
-       " 6,\n",
-       " 2,\n",
-       " 6,\n",
-       " 0,\n",
-       " 3,\n",
-       " 2,\n",
-       " 2,\n",
-       " 1,\n",
-       " 5,\n",
-       " 4,\n",
-       " 7,\n",
-       " 6,\n",
-       " 6,\n",
-       " 2,\n",
-       " 5,\n",
-       " 5,\n",
-       " 5,\n",
-       " 0,\n",
-       " 3,\n",
-       " 5,\n",
-       " 4,\n",
-       " 5,\n",
-       " 7,\n",
-       " 5,\n",
-       " 0,\n",
-       " 5,\n",
-       " 0,\n",
-       " 0,\n",
-       " 2,\n",
-       " 0,\n",
-       " 2,\n",
-       " 1,\n",
-       " 0,\n",
-       " 2,\n",
-       " 4,\n",
-       " 3,\n",
-       " 4,\n",
-       " 1,\n",
-       " 7,\n",
-       " 2,\n",
-       " 1,\n",
-       " 0,\n",
-       " 3,\n",
-       " 0,\n",
-       " 3,\n",
-       " 1,\n",
-       " 1,\n",
-       " 0,\n",
-       " 5,\n",
-       " 3,\n",
-       " 1,\n",
-       " 2,\n",
-       " 5,\n",
-       " 6,\n",
-       " 7,\n",
-       " 6,\n",
-       " 7,\n",
-       " 0,\n",
-       " 2,\n",
-       " 6,\n",
-       " 3,\n",
-       " 1,\n",
-       " 5,\n",
-       " 4,\n",
-       " 2,\n",
-       " 4,\n",
-       " 6,\n",
-       " 5,\n",
-       " 2,\n",
-       " 7,\n",
-       " ...]"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "\n",
-    "#--------------------------------------------------------------------------------------------\n",
-    "# YOUR MODEL INFERENCE CODE HERE\n",
-    "# Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.\n",
-    "#--------------------------------------------------------------------------------------------   \n",
-    "\n",
-    "# Make random predictions (placeholder for actual model inference)\n",
-    "true_labels = test_dataset[\"label\"]\n",
-    "predictions = [random.randint(0, 1) for _ in range(len(true_labels))]\n",
-    "\n",
-    "predictions\n",
-    "\n",
-    "#--------------------------------------------------------------------------------------------\n",
-    "# YOUR MODEL INFERENCE STOPS HERE\n",
-    "#--------------------------------------------------------------------------------------------   "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "[codecarbon WARNING @ 19:53:32] Background scheduler didn't run for a long period (47s), results might be inaccurate\n",
-      "[codecarbon INFO @ 19:53:32] Energy consumed for RAM : 0.000156 kWh. RAM Power : 11.755242347717285 W\n",
-      "[codecarbon INFO @ 19:53:32] Delta energy consumed for CPU with constant : 0.000564 kWh, power : 42.5 W\n",
-      "[codecarbon INFO @ 19:53:32] Energy consumed for All CPU : 0.000564 kWh\n",
-      "[codecarbon INFO @ 19:53:32] 0.000720 kWh of electricity used since the beginning.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "EmissionsData(timestamp='2025-01-21T19:53:32', project_name='codecarbon', run_id='908f2e7e-4bb2-4991-a0f6-56bf8d7eda21', experiment_id='5b0fa12a-3dd7-45bb-9766-cc326314d9f1', duration=47.736408500000834, emissions=4.032368007471064e-05, emissions_rate=8.444466886328872e-07, cpu_power=42.5, gpu_power=0.0, ram_power=11.755242347717285, cpu_energy=0.0005636615353475565, gpu_energy=0, ram_energy=0.00015590305493261682, energy_consumed=0.0007195645902801733, country_name='France', country_iso_code='FRA', region='île-de-france', cloud_provider='', cloud_region='', os='Windows-11-10.0.22631-SP0', python_version='3.12.7', codecarbon_version='3.0.0_rc0', cpu_count=12, cpu_model='13th Gen Intel(R) Core(TM) i7-1365U', gpu_count=None, gpu_model=None, longitude=2.3494, latitude=48.8558, ram_total_size=31.347312927246094, tracking_mode='machine', on_cloud='N', pue=1.0)"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# Stop tracking emissions\n",
-    "emissions_data = tracker.stop_task()\n",
-    "emissions_data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.10090237899917966"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# Calculate accuracy\n",
-    "accuracy = accuracy_score(true_labels, predictions)\n",
-    "accuracy"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'submission_timestamp': '2025-01-21T19:53:46.639165',\n",
-       " 'accuracy': 0.10090237899917966,\n",
-       " 'energy_consumed_wh': 0.7195645902801733,\n",
-       " 'emissions_gco2eq': 0.040323680074710634,\n",
-       " 'emissions_data': {'run_id': '908f2e7e-4bb2-4991-a0f6-56bf8d7eda21',\n",
-       "  'duration': 47.736408500000834,\n",
-       "  'emissions': 4.032368007471064e-05,\n",
-       "  'emissions_rate': 8.444466886328872e-07,\n",
-       "  'cpu_power': 42.5,\n",
-       "  'gpu_power': 0.0,\n",
-       "  'ram_power': 11.755242347717285,\n",
-       "  'cpu_energy': 0.0005636615353475565,\n",
-       "  'gpu_energy': 0,\n",
-       "  'ram_energy': 0.00015590305493261682,\n",
-       "  'energy_consumed': 0.0007195645902801733,\n",
-       "  'country_name': 'France',\n",
-       "  'country_iso_code': 'FRA',\n",
-       "  'region': 'île-de-france',\n",
-       "  'cloud_provider': '',\n",
-       "  'cloud_region': '',\n",
-       "  'os': 'Windows-11-10.0.22631-SP0',\n",
-       "  'python_version': '3.12.7',\n",
-       "  'codecarbon_version': '3.0.0_rc0',\n",
-       "  'cpu_count': 12,\n",
-       "  'cpu_model': '13th Gen Intel(R) Core(TM) i7-1365U',\n",
-       "  'gpu_count': None,\n",
-       "  'gpu_model': None,\n",
-       "  'ram_total_size': 31.347312927246094,\n",
-       "  'tracking_mode': 'machine',\n",
-       "  'on_cloud': 'N',\n",
-       "  'pue': 1.0},\n",
-       " 'dataset_config': {'dataset_name': 'QuotaClimat/frugalaichallenge-text-train',\n",
-       "  'test_size': 0.2,\n",
-       "  'test_seed': 42}}"
-      ]
-     },
-     "execution_count": 10,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# Prepare results dictionary\n",
-    "results = {\n",
-    "    \"submission_timestamp\": datetime.now().isoformat(),\n",
-    "    \"accuracy\": float(accuracy),\n",
-    "    \"energy_consumed_wh\": emissions_data.energy_consumed * 1000,\n",
-    "    \"emissions_gco2eq\": emissions_data.emissions * 1000,\n",
-    "    \"emissions_data\": clean_emissions_data(emissions_data),\n",
-    "    \"dataset_config\": {\n",
-    "        \"dataset_name\": request.dataset_name,\n",
-    "        \"test_size\": request.test_size,\n",
-    "        \"test_seed\": request.test_seed\n",
-    "    }\n",
-    "}\n",
-    "\n",
-    "results"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "base",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.12.7"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

notebooks/template-image.ipynb DELETED Viewed

@@ -1,475 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# 🚧 Info\n",
-    "\n",
-    "https://huggingface.co/datasets/pyronear/pyro-sdis\n",
-    "\n",
-    "https://frugalaichallenge.org/participate/"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Image task notebook template\n",
-    "## Loading the necessary libraries"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [
-    {
-     "ename": "ModuleNotFoundError",
-     "evalue": "No module named 'tasks'",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[1], line 9\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mrandom\u001b[39;00m\n\u001b[1;32m      6\u001b[0m \u001b[38;5;66;03m# import sys\u001b[39;00m\n\u001b[1;32m      7\u001b[0m \u001b[38;5;66;03m# sys.path.append('../')\u001b[39;00m\n\u001b[0;32m----> 9\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mtasks\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mevaluation\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m ImageEvaluationRequest\n\u001b[1;32m     10\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mtasks\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01memissions\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m tracker, clean_emissions_data, get_space_info\n\u001b[1;32m     11\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mtasks\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mload_data\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m load_data\n",
-      "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'tasks'"
-     ]
-    }
-   ],
-   "source": [
-    "from fastapi import APIRouter\n",
-    "from datetime import datetime\n",
-    "from sklearn.metrics import accuracy_score, precision_score, recall_score\n",
-    "\n",
-    "import random\n",
-    "# import sys\n",
-    "# sys.path.append('../')\n",
-    "\n",
-    "from tasks.utils.evaluation import ImageEvaluationRequest\n",
-    "from tasks.utils.emissions import tracker, clean_emissions_data, get_space_info\n",
-    "from tasks.utils.load_data import load_data\n",
-    "from tasks.image import parse_boxes,compute_iou,compute_max_iou"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Loading the datasets and splitting them"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "request = ImageEvaluationRequest()\n",
-    "# Define paths\n",
-    "REPO_ID = request.dataset_name\n",
-    "OUTPUT_DIR = \"../pyro-sdis\""
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 🚧 Code JL"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "**Export Dataset**: Use the following function to save the dataset in Ultralytics format:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Load and prepare dataset\n",
-    "ds = load_data(REPO_ID, OUTPUT_DIR)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Generating train split: 100%|██████████| 29537/29537 [00:03<00:00, 7616.82 examples/s]\n",
-      "Generating val split: 100%|██████████| 4099/4099 [00:00<00:00, 10697.80 examples/s]\n"
-     ]
-    }
-   ],
-   "source": [
-    "# # Create the directory structure\n",
-    "# for split in [\"train\", \"val\"]:\n",
-    "#     os.makedirs(os.path.join(IMAGE_DIR, split), exist_ok=True)\n",
-    "#     os.makedirs(os.path.join(LABEL_DIR, split), exist_ok=True)\n",
-    "\n",
-    "# # Load the dataset from the Hugging Face Hub\n",
-    "# dataset = load_dataset(REPO_ID)\n",
-    "\n",
-    "# # Save in Ultralytics format\n",
-    "# def save_ultralytics_format(dataset_split, split):\n",
-    "#     \"\"\"\n",
-    "#     Save a dataset split into the Ultralytics format.\n",
-    "#     Args:\n",
-    "#         dataset_split: The dataset split (e.g., dataset[\"train\"])\n",
-    "#         split: \"train\" or \"val\"\n",
-    "#     \"\"\"\n",
-    "#     for example in dataset_split:\n",
-    "#         # Save the image to the appropriate folder\n",
-    "#         image = example[\"image\"]  # PIL.Image.Image\n",
-    "#         image_name = example[\"image_name\"]  # Original file name\n",
-    "#         output_image_path = os.path.join(IMAGE_DIR, split, image_name)\n",
-    "\n",
-    "#         # Save the image object to disk\n",
-    "#         image.save(output_image_path)\n",
-    "\n",
-    "#         # Save label\n",
-    "#         annotations = example[\"annotations\"]\n",
-    "#         label_name = image_name.replace(\".jpg\", \".txt\").replace(\".png\", \".txt\")\n",
-    "#         output_label_path = os.path.join(LABEL_DIR, split, label_name)\n",
-    "        \n",
-    "#         with open(output_label_path, \"w\") as label_file:\n",
-    "#             label_file.write(annotations)\n",
-    "\n",
-    "# # Save train and validation splits\n",
-    "# save_ultralytics_format(dataset[\"train\"], \"train\")\n",
-    "# save_ultralytics_format(dataset[\"val\"], \"val\")\n",
-    "\n",
-    "# print(\"Dataset exported to Ultralytics format.\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "**Training** with Ultralytics YOLO"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# from huggingface_hub import hf_hub_download\n",
-    "\n",
-    "# # Correctly set repo_id and repo_type\n",
-    "# repo_id = \"pyronear/pyro-sdis\"\n",
-    "# filename = \"data.yaml\"\n",
-    "\n",
-    "# # Download data.yaml to the current directory\n",
-    "# yaml_path = hf_hub_download(repo_id=repo_id, filename=filename, repo_type=\"dataset\", local_dir=\".\")\n",
-    "# print(f\"data.yaml downloaded to: {yaml_path}\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Train with Yolo (command line)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# yolo task=detect mode=train data=data.yaml model=yolov8n.pt epochs=50 imgsz=640 single_cls=True"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 🚧 fin Code JL"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Split dataset\n",
-    "train_test = dataset[\"train\"].train_test_split(test_size=request.test_size, seed=request.test_seed)\n",
-    "test_dataset = train_test[\"test\"]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Dataset({\n",
-       "    features: ['image', 'annotations', 'image_name', 'partner', 'camera', 'date'],\n",
-       "    num_rows: 29537\n",
-       "})"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "dataset[\"train\"]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "datasets.dataset_dict.DatasetDict"
-      ]
-     },
-     "execution_count": 14,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "type(dataset)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Random Baseline"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "[codecarbon WARNING @ 17:11:39] Already started tracking\n",
-      "[codecarbon INFO @ 17:11:39] A task is already under measure\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Start tracking emissions\n",
-    "tracker.start()\n",
-    "tracker.start_task(\"inference\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "\n",
-    "#--------------------------------------------------------------------------------------------\n",
-    "# YOUR MODEL INFERENCE CODE HERE\n",
-    "# Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.\n",
-    "#--------------------------------------------------------------------------------------------\n",
-    "\n",
-    "# Make random predictions (placeholder for actual model inference)\n",
-    "\n",
-    "predictions = []\n",
-    "true_labels = []\n",
-    "pred_boxes = []\n",
-    "true_boxes_list = []  # List of lists, each inner list contains boxes for one image\n",
-    "\n",
-    "for example in test_dataset:\n",
-    "    # Parse true annotation (YOLO format: class_id x_center y_center width height)\n",
-    "    annotation = example.get(\"annotations\", \"\").strip()\n",
-    "    has_smoke = len(annotation) > 0\n",
-    "    true_labels.append(int(has_smoke))\n",
-    "    \n",
-    "    # Make random classification prediction\n",
-    "    pred_has_smoke = random.random() > 0.5\n",
-    "    predictions.append(int(pred_has_smoke))\n",
-    "    \n",
-    "    # If there's a true box, parse it and make random box prediction\n",
-    "    if has_smoke:\n",
-    "        # Parse all true boxes from the annotation\n",
-    "        image_true_boxes = parse_boxes(annotation)\n",
-    "        true_boxes_list.append(image_true_boxes)\n",
-    "        \n",
-    "        # For baseline, make one random box prediction per image\n",
-    "        # In a real model, you might want to predict multiple boxes\n",
-    "        random_box = [\n",
-    "            random.random(),  # x_center\n",
-    "            random.random(),  # y_center\n",
-    "            random.random() * 0.5,  # width (max 0.5)\n",
-    "            random.random() * 0.5   # height (max 0.5)\n",
-    "        ]\n",
-    "        pred_boxes.append(random_box)\n",
-    "\n",
-    "\n",
-    "#--------------------------------------------------------------------------------------------\n",
-    "# YOUR MODEL INFERENCE STOPS HERE\n",
-    "#--------------------------------------------------------------------------------------------   "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "[codecarbon WARNING @ 17:12:24] Background scheduler didn't run for a long period (1885s), results might be inaccurate\n",
-      "[codecarbon INFO @ 17:12:24] Energy consumed for RAM : 0.003142 kWh. RAM Power : 6.0 W\n",
-      "[codecarbon INFO @ 17:12:24] Energy consumed for all CPUs : 0.002618 kWh. Total CPU Power : 5.0 W\n",
-      "[codecarbon INFO @ 17:12:24] 0.005760 kWh of electricity used since the beginning.\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Stop tracking emissions\n",
-    "emissions_data = tracker.stop_task()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np\n",
-    "\n",
-    "# Calculate classification metrics\n",
-    "classification_accuracy = accuracy_score(true_labels, predictions)\n",
-    "classification_precision = precision_score(true_labels, predictions)\n",
-    "classification_recall = recall_score(true_labels, predictions)\n",
-    "\n",
-    "# Calculate mean IoU for object detection (only for images with smoke)\n",
-    "# For each image, we compute the max IoU between the predicted box and all true boxes\n",
-    "ious = []\n",
-    "for true_boxes, pred_box in zip(true_boxes_list, pred_boxes):\n",
-    "    max_iou = compute_max_iou(true_boxes, pred_box)\n",
-    "    ious.append(max_iou)\n",
-    "\n",
-    "mean_iou = float(np.mean(ious)) if ious else 0.0"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'submission_timestamp': '2025-01-23T17:13:47.158903',\n",
-       " 'classification_accuracy': 0.4974610697359513,\n",
-       " 'classification_precision': 0.8362892223738063,\n",
-       " 'classification_recall': 0.49625581866019025,\n",
-       " 'mean_iou': 0.0026954029097350594,\n",
-       " 'energy_consumed_wh': 5.759879923426909,\n",
-       " 'emissions_gco2eq': 0.2006914961719638,\n",
-       " 'emissions_data': {'run_id': 'fbab9dd9-2893-4216-91c4-232be358d4dd',\n",
-       "  'duration': 1885.054949500016,\n",
-       "  'emissions': 0.0002006914961719638,\n",
-       "  'emissions_rate': 1.0646457428260931e-07,\n",
-       "  'cpu_power': 5.0,\n",
-       "  'gpu_power': 0.0,\n",
-       "  'ram_power': 6.0,\n",
-       "  'cpu_energy': 0.002618128800231918,\n",
-       "  'gpu_energy': 0,\n",
-       "  'ram_energy': 0.0031417511231949906,\n",
-       "  'energy_consumed': 0.005759879923426909,\n",
-       "  'country_name': 'Switzerland',\n",
-       "  'country_iso_code': 'CHE',\n",
-       "  'region': 'zurich',\n",
-       "  'cloud_provider': '',\n",
-       "  'cloud_region': '',\n",
-       "  'os': 'macOS-15.2-arm64-arm-64bit',\n",
-       "  'python_version': '3.12.7',\n",
-       "  'codecarbon_version': '2.8.3',\n",
-       "  'cpu_count': 8,\n",
-       "  'cpu_model': 'Apple M1',\n",
-       "  'gpu_count': None,\n",
-       "  'gpu_model': None,\n",
-       "  'ram_total_size': 16.0,\n",
-       "  'tracking_mode': 'machine',\n",
-       "  'on_cloud': 'N',\n",
-       "  'pue': 1.0},\n",
-       " 'dataset_config': {'dataset_name': 'pyronear/pyro-sdis',\n",
-       "  'test_size': 0.2,\n",
-       "  'test_seed': 42}}"
-      ]
-     },
-     "execution_count": 13,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "\n",
-    "# Prepare results dictionary\n",
-    "results = {\n",
-    "    \"submission_timestamp\": datetime.now().isoformat(),\n",
-    "    \"classification_accuracy\": float(classification_accuracy),\n",
-    "    \"classification_precision\": float(classification_precision),\n",
-    "    \"classification_recall\": float(classification_recall),\n",
-    "    \"mean_iou\": mean_iou,\n",
-    "    \"energy_consumed_wh\": emissions_data.energy_consumed * 1000,\n",
-    "    \"emissions_gco2eq\": emissions_data.emissions * 1000,\n",
-    "    \"emissions_data\": clean_emissions_data(emissions_data),\n",
-    "    \"dataset_config\": {\n",
-    "        \"dataset_name\": request.dataset_name,\n",
-    "        \"test_size\": request.test_size,\n",
-    "        \"test_seed\": request.test_seed\n",
-    "    }\n",
-    "}\n",
-    "results"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": ".venv",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.12.7"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

notebooks/template-text.ipynb DELETED Viewed

@@ -1,1642 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Text task notebook template\n",
-    "## Loading the necessary libraries"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "[codecarbon WARNING @ 19:48:07] Multiple instances of codecarbon are allowed to run at the same time.\n",
-      "[codecarbon INFO @ 19:48:07] [setup] RAM Tracking...\n",
-      "[codecarbon INFO @ 19:48:07] [setup] CPU Tracking...\n",
-      "[codecarbon WARNING @ 19:48:09] We saw that you have a 13th Gen Intel(R) Core(TM) i7-1365U but we don't know it. Please contact us.\n",
-      "[codecarbon WARNING @ 19:48:09] No CPU tracking mode found. Falling back on CPU constant mode. \n",
-      " Windows OS detected: Please install Intel Power Gadget to measure CPU\n",
-      "\n",
-      "[codecarbon WARNING @ 19:48:11] We saw that you have a 13th Gen Intel(R) Core(TM) i7-1365U but we don't know it. Please contact us.\n",
-      "[codecarbon INFO @ 19:48:11] CPU Model on constant consumption mode: 13th Gen Intel(R) Core(TM) i7-1365U\n",
-      "[codecarbon WARNING @ 19:48:11] No CPU tracking mode found. Falling back on CPU constant mode.\n",
-      "[codecarbon INFO @ 19:48:11] [setup] GPU Tracking...\n",
-      "[codecarbon INFO @ 19:48:11] No GPU found.\n",
-      "[codecarbon INFO @ 19:48:11] >>> Tracker's metadata:\n",
-      "[codecarbon INFO @ 19:48:11]   Platform system: Windows-11-10.0.22631-SP0\n",
-      "[codecarbon INFO @ 19:48:11]   Python version: 3.12.7\n",
-      "[codecarbon INFO @ 19:48:11]   CodeCarbon version: 3.0.0_rc0\n",
-      "[codecarbon INFO @ 19:48:11]   Available RAM : 31.347 GB\n",
-      "[codecarbon INFO @ 19:48:11]   CPU count: 12\n",
-      "[codecarbon INFO @ 19:48:11]   CPU model: 13th Gen Intel(R) Core(TM) i7-1365U\n",
-      "[codecarbon INFO @ 19:48:11]   GPU count: None\n",
-      "[codecarbon INFO @ 19:48:11]   GPU model: None\n",
-      "[codecarbon INFO @ 19:48:11] Saving emissions data to file c:\\git\\submission-template\\notebooks\\emissions.csv\n"
-     ]
-    }
-   ],
-   "source": [
-    "from fastapi import APIRouter\n",
-    "from datetime import datetime\n",
-    "from datasets import load_dataset\n",
-    "from sklearn.metrics import accuracy_score\n",
-    "import random\n",
-    "\n",
-    "import sys\n",
-    "sys.path.append('../tasks')\n",
-    "\n",
-    "from utils.evaluation import TextEvaluationRequest\n",
-    "from utils.emissions import tracker, clean_emissions_data, get_space_info\n",
-    "\n",
-    "\n",
-    "# Define the label mapping\n",
-    "LABEL_MAPPING = {\n",
-    "    \"0_not_relevant\": 0,\n",
-    "    \"1_not_happening\": 1,\n",
-    "    \"2_not_human\": 2,\n",
-    "    \"3_not_bad\": 3,\n",
-    "    \"4_solutions_harmful_unnecessary\": 4,\n",
-    "    \"5_science_unreliable\": 5,\n",
-    "    \"6_proponents_biased\": 6,\n",
-    "    \"7_fossil_fuels_needed\": 7\n",
-    "}"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Loading the datasets and splitting them"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "668da7bf85434e098b95c3ec447d78fe",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "README.md:   0%|          | 0.00/5.18k [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "c:\\Users\\theo.alvesdacosta\\AppData\\Local\\anaconda3\\Lib\\site-packages\\huggingface_hub\\file_download.py:139: UserWarning: `huggingface_hub` cache-system uses symlinks by default to efficiently store duplicated files but your machine does not support them in C:\\Users\\theo.alvesdacosta\\.cache\\huggingface\\hub\\datasets--QuotaClimat--frugalaichallenge-text-train. Caching files will still work but in a degraded version that might require more space on your disk. This warning can be disabled by setting the `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For more details, see https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations.\n",
-      "To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development\n",
-      "  warnings.warn(message)\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "5b68d43359eb429395da8be7d4b15556",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "train.parquet:   0%|          | 0.00/1.21M [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "140a304773914e9db8f698eabeb40298",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Generating train split:   0%|          | 0/6091 [00:00<?, ? examples/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "6d04e8ab1906400e8e0029949dc523a5",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Map:   0%|          | 0/6091 [00:00<?, ? examples/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "request = TextEvaluationRequest()\n",
-    "\n",
-    "# Load and prepare the dataset\n",
-    "dataset = load_dataset(request.dataset_name)\n",
-    "\n",
-    "# Convert string labels to integers\n",
-    "dataset = dataset.map(lambda x: {\"label\": LABEL_MAPPING[x[\"label\"]]})\n",
-    "\n",
-    "# Split dataset\n",
-    "train_test = dataset[\"train\"].train_test_split(test_size=request.test_size, seed=request.test_seed)\n",
-    "test_dataset = train_test[\"test\"]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Random Baseline"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Start tracking emissions\n",
-    "tracker.start()\n",
-    "tracker.start_task(\"inference\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[1,\n",
-       " 7,\n",
-       " 6,\n",
-       " 6,\n",
-       " 2,\n",
-       " 0,\n",
-       " 1,\n",
-       " 7,\n",
-       " 3,\n",
-       " 6,\n",
-       " 6,\n",
-       " 3,\n",
-       " 6,\n",
-       " 6,\n",
-       " 5,\n",
-       " 0,\n",
-       " 2,\n",
-       " 6,\n",
-       " 2,\n",
-       " 6,\n",
-       " 5,\n",
-       " 4,\n",
-       " 1,\n",
-       " 3,\n",
-       " 6,\n",
-       " 4,\n",
-       " 2,\n",
-       " 1,\n",
-       " 4,\n",
-       " 0,\n",
-       " 3,\n",
-       " 4,\n",
-       " 1,\n",
-       " 5,\n",
-       " 5,\n",
-       " 1,\n",
-       " 2,\n",
-       " 7,\n",
-       " 6,\n",
-       " 1,\n",
-       " 3,\n",
-       " 1,\n",
-       " 7,\n",
-       " 7,\n",
-       " 0,\n",
-       " 0,\n",
-       " 3,\n",
-       " 3,\n",
-       " 3,\n",
-       " 4,\n",
-       " 1,\n",
-       " 4,\n",
-       " 4,\n",
-       " 1,\n",
-       " 4,\n",
-       " 5,\n",
-       " 6,\n",
-       " 1,\n",
-       " 2,\n",
-       " 2,\n",
-       " 2,\n",
-       " 5,\n",
-       " 2,\n",
-       " 7,\n",
-       " 2,\n",
-       " 7,\n",
-       " 7,\n",
-       " 6,\n",
-       " 4,\n",
-       " 2,\n",
-       " 0,\n",
-       " 1,\n",
-       " 6,\n",
-       " 3,\n",
-       " 2,\n",
-       " 5,\n",
-       " 5,\n",
-       " 2,\n",
-       " 0,\n",
-       " 7,\n",
-       " 0,\n",
-       " 1,\n",
-       " 5,\n",
-       " 5,\n",
-       " 7,\n",
-       " 4,\n",
-       " 6,\n",
-       " 7,\n",
-       " 1,\n",
-       " 7,\n",
-       " 1,\n",
-       " 0,\n",
-       " 3,\n",
-       " 4,\n",
-       " 2,\n",
-       " 5,\n",
-       " 3,\n",
-       " 3,\n",
-       " 3,\n",
-       " 2,\n",
-       " 2,\n",
-       " 1,\n",
-       " 0,\n",
-       " 4,\n",
-       " 5,\n",
-       " 7,\n",
-       " 0,\n",
-       " 3,\n",
-       " 1,\n",
-       " 4,\n",
-       " 6,\n",
-       " 0,\n",
-       " 7,\n",
-       " 1,\n",
-       " 1,\n",
-       " 2,\n",
-       " 2,\n",
-       " 4,\n",
-       " 0,\n",
-       " 4,\n",
-       " 3,\n",
-       " 4,\n",
-       " 4,\n",
-       " 2,\n",
-       " 2,\n",
-       " 3,\n",
-       " 3,\n",
-       " 7,\n",
-       " 4,\n",
-       " 7,\n",
-       " 6,\n",
-       " 4,\n",
-       " 5,\n",
-       " 4,\n",
-       " 3,\n",
-       " 6,\n",
-       " 0,\n",
-       " 4,\n",
-       " 0,\n",
-       " 1,\n",
-       " 3,\n",
-       " 6,\n",
-       " 7,\n",
-       " 3,\n",
-       " 3,\n",
-       " 0,\n",
-       " 1,\n",
-       " 2,\n",
-       " 4,\n",
-       " 4,\n",
-       " 3,\n",
-       " 1,\n",
-       " 2,\n",
-       " 4,\n",
-       " 3,\n",
-       " 0,\n",
-       " 5,\n",
-       " 3,\n",
-       " 6,\n",
-       " 3,\n",
-       " 6,\n",
-       " 1,\n",
-       " 3,\n",
-       " 4,\n",
-       " 5,\n",
-       " 4,\n",
-       " 0,\n",
-       " 7,\n",
-       " 3,\n",
-       " 6,\n",
-       " 7,\n",
-       " 4,\n",
-       " 4,\n",
-       " 5,\n",
-       " 3,\n",
-       " 1,\n",
-       " 7,\n",
-       " 4,\n",
-       " 1,\n",
-       " 0,\n",
-       " 3,\n",
-       " 0,\n",
-       " 5,\n",
-       " 3,\n",
-       " 6,\n",
-       " 3,\n",
-       " 0,\n",
-       " 7,\n",
-       " 2,\n",
-       " 0,\n",
-       " 4,\n",
-       " 1,\n",
-       " 2,\n",
-       " 6,\n",
-       " 3,\n",
-       " 4,\n",
-       " 4,\n",
-       " 5,\n",
-       " 1,\n",
-       " 5,\n",
-       " 4,\n",
-       " 0,\n",
-       " 1,\n",
-       " 7,\n",
-       " 3,\n",
-       " 6,\n",
-       " 0,\n",
-       " 7,\n",
-       " 4,\n",
-       " 6,\n",
-       " 3,\n",
-       " 0,\n",
-       " 0,\n",
-       " 4,\n",
-       " 6,\n",
-       " 6,\n",
-       " 4,\n",
-       " 0,\n",
-       " 5,\n",
-       " 7,\n",
-       " 5,\n",
-       " 1,\n",
-       " 3,\n",
-       " 6,\n",
-       " 2,\n",
-       " 3,\n",
-       " 2,\n",
-       " 4,\n",
-       " 5,\n",
-       " 1,\n",
-       " 5,\n",
-       " 0,\n",
-       " 3,\n",
-       " 3,\n",
-       " 0,\n",
-       " 0,\n",
-       " 6,\n",
-       " 6,\n",
-       " 2,\n",
-       " 0,\n",
-       " 7,\n",
-       " 4,\n",
-       " 5,\n",
-       " 7,\n",
-       " 1,\n",
-       " 0,\n",
-       " 4,\n",
-       " 5,\n",
-       " 1,\n",
-       " 7,\n",
-       " 0,\n",
-       " 7,\n",
-       " 2,\n",
-       " 6,\n",
-       " 1,\n",
-       " 3,\n",
-       " 5,\n",
-       " 5,\n",
-       " 6,\n",
-       " 5,\n",
-       " 4,\n",
-       " 3,\n",
-       " 7,\n",
-       " 4,\n",
-       " 3,\n",
-       " 5,\n",
-       " 5,\n",
-       " 7,\n",
-       " 2,\n",
-       " 6,\n",
-       " 1,\n",
-       " 5,\n",
-       " 0,\n",
-       " 3,\n",
-       " 4,\n",
-       " 2,\n",
-       " 3,\n",
-       " 7,\n",
-       " 0,\n",
-       " 1,\n",
-       " 7,\n",
-       " 6,\n",
-       " 7,\n",
-       " 7,\n",
-       " 5,\n",
-       " 6,\n",
-       " 3,\n",
-       " 2,\n",
-       " 3,\n",
-       " 0,\n",
-       " 4,\n",
-       " 3,\n",
-       " 5,\n",
-       " 6,\n",
-       " 0,\n",
-       " 0,\n",
-       " 6,\n",
-       " 6,\n",
-       " 1,\n",
-       " 4,\n",
-       " 0,\n",
-       " 4,\n",
-       " 2,\n",
-       " 7,\n",
-       " 5,\n",
-       " 7,\n",
-       " 6,\n",
-       " 3,\n",
-       " 5,\n",
-       " 6,\n",
-       " 0,\n",
-       " 4,\n",
-       " 5,\n",
-       " 6,\n",
-       " 1,\n",
-       " 2,\n",
-       " 1,\n",
-       " 5,\n",
-       " 3,\n",
-       " 0,\n",
-       " 3,\n",
-       " 7,\n",
-       " 1,\n",
-       " 0,\n",
-       " 7,\n",
-       " 0,\n",
-       " 1,\n",
-       " 0,\n",
-       " 4,\n",
-       " 1,\n",
-       " 1,\n",
-       " 0,\n",
-       " 7,\n",
-       " 1,\n",
-       " 0,\n",
-       " 7,\n",
-       " 6,\n",
-       " 2,\n",
-       " 3,\n",
-       " 7,\n",
-       " 4,\n",
-       " 3,\n",
-       " 4,\n",
-       " 3,\n",
-       " 3,\n",
-       " 2,\n",
-       " 5,\n",
-       " 1,\n",
-       " 5,\n",
-       " 1,\n",
-       " 7,\n",
-       " 3,\n",
-       " 2,\n",
-       " 6,\n",
-       " 4,\n",
-       " 4,\n",
-       " 1,\n",
-       " 2,\n",
-       " 6,\n",
-       " 7,\n",
-       " 2,\n",
-       " 7,\n",
-       " 1,\n",
-       " 3,\n",
-       " 5,\n",
-       " 2,\n",
-       " 6,\n",
-       " 4,\n",
-       " 6,\n",
-       " 7,\n",
-       " 0,\n",
-       " 5,\n",
-       " 1,\n",
-       " 6,\n",
-       " 5,\n",
-       " 3,\n",
-       " 6,\n",
-       " 5,\n",
-       " 4,\n",
-       " 7,\n",
-       " 6,\n",
-       " 5,\n",
-       " 4,\n",
-       " 3,\n",
-       " 0,\n",
-       " 0,\n",
-       " 1,\n",
-       " 7,\n",
-       " 7,\n",
-       " 6,\n",
-       " 1,\n",
-       " 4,\n",
-       " 5,\n",
-       " 6,\n",
-       " 1,\n",
-       " 5,\n",
-       " 1,\n",
-       " 2,\n",
-       " 6,\n",
-       " 2,\n",
-       " 6,\n",
-       " 0,\n",
-       " 2,\n",
-       " 1,\n",
-       " 5,\n",
-       " 5,\n",
-       " 1,\n",
-       " 7,\n",
-       " 0,\n",
-       " 5,\n",
-       " 5,\n",
-       " 1,\n",
-       " 7,\n",
-       " 7,\n",
-       " 2,\n",
-       " 1,\n",
-       " 0,\n",
-       " 1,\n",
-       " 0,\n",
-       " 5,\n",
-       " 4,\n",
-       " 2,\n",
-       " 7,\n",
-       " 4,\n",
-       " 3,\n",
-       " 6,\n",
-       " 7,\n",
-       " 5,\n",
-       " 1,\n",
-       " 0,\n",
-       " 7,\n",
-       " 2,\n",
-       " 1,\n",
-       " 2,\n",
-       " 3,\n",
-       " 1,\n",
-       " 0,\n",
-       " 3,\n",
-       " 2,\n",
-       " 6,\n",
-       " 0,\n",
-       " 5,\n",
-       " 4,\n",
-       " 7,\n",
-       " 1,\n",
-       " 1,\n",
-       " 0,\n",
-       " 7,\n",
-       " 0,\n",
-       " 6,\n",
-       " 7,\n",
-       " 6,\n",
-       " 1,\n",
-       " 5,\n",
-       " 5,\n",
-       " 7,\n",
-       " 6,\n",
-       " 1,\n",
-       " 7,\n",
-       " 6,\n",
-       " 5,\n",
-       " 4,\n",
-       " 1,\n",
-       " 4,\n",
-       " 7,\n",
-       " 5,\n",
-       " 4,\n",
-       " 0,\n",
-       " 0,\n",
-       " 7,\n",
-       " 0,\n",
-       " 0,\n",
-       " 3,\n",
-       " 6,\n",
-       " 2,\n",
-       " 5,\n",
-       " 3,\n",
-       " 0,\n",
-       " 3,\n",
-       " 6,\n",
-       " 5,\n",
-       " 7,\n",
-       " 2,\n",
-       " 6,\n",
-       " 7,\n",
-       " 5,\n",
-       " 2,\n",
-       " 3,\n",
-       " 6,\n",
-       " 7,\n",
-       " 7,\n",
-       " 7,\n",
-       " 6,\n",
-       " 1,\n",
-       " 7,\n",
-       " 4,\n",
-       " 2,\n",
-       " 7,\n",
-       " 5,\n",
-       " 4,\n",
-       " 1,\n",
-       " 2,\n",
-       " 3,\n",
-       " 7,\n",
-       " 0,\n",
-       " 2,\n",
-       " 7,\n",
-       " 6,\n",
-       " 1,\n",
-       " 4,\n",
-       " 0,\n",
-       " 6,\n",
-       " 3,\n",
-       " 1,\n",
-       " 0,\n",
-       " 3,\n",
-       " 4,\n",
-       " 7,\n",
-       " 7,\n",
-       " 4,\n",
-       " 2,\n",
-       " 1,\n",
-       " 0,\n",
-       " 5,\n",
-       " 1,\n",
-       " 7,\n",
-       " 4,\n",
-       " 6,\n",
-       " 7,\n",
-       " 7,\n",
-       " 3,\n",
-       " 4,\n",
-       " 3,\n",
-       " 5,\n",
-       " 4,\n",
-       " 4,\n",
-       " 5,\n",
-       " 0,\n",
-       " 1,\n",
-       " 3,\n",
-       " 7,\n",
-       " 5,\n",
-       " 4,\n",
-       " 7,\n",
-       " 3,\n",
-       " 3,\n",
-       " 3,\n",
-       " 5,\n",
-       " 3,\n",
-       " 3,\n",
-       " 4,\n",
-       " 0,\n",
-       " 1,\n",
-       " 7,\n",
-       " 4,\n",
-       " 7,\n",
-       " 7,\n",
-       " 5,\n",
-       " 0,\n",
-       " 0,\n",
-       " 5,\n",
-       " 2,\n",
-       " 6,\n",
-       " 2,\n",
-       " 6,\n",
-       " 7,\n",
-       " 6,\n",
-       " 5,\n",
-       " 7,\n",
-       " 5,\n",
-       " 7,\n",
-       " 1,\n",
-       " 6,\n",
-       " 6,\n",
-       " 0,\n",
-       " 4,\n",
-       " 7,\n",
-       " 3,\n",
-       " 0,\n",
-       " 0,\n",
-       " 2,\n",
-       " 5,\n",
-       " 2,\n",
-       " 3,\n",
-       " 7,\n",
-       " 1,\n",
-       " 0,\n",
-       " 3,\n",
-       " 0,\n",
-       " 0,\n",
-       " 3,\n",
-       " 3,\n",
-       " 7,\n",
-       " 3,\n",
-       " 0,\n",
-       " 1,\n",
-       " 1,\n",
-       " 6,\n",
-       " 0,\n",
-       " 0,\n",
-       " 5,\n",
-       " 0,\n",
-       " 3,\n",
-       " 4,\n",
-       " 6,\n",
-       " 7,\n",
-       " 4,\n",
-       " 0,\n",
-       " 4,\n",
-       " 4,\n",
-       " 5,\n",
-       " 4,\n",
-       " 4,\n",
-       " 3,\n",
-       " 6,\n",
-       " 5,\n",
-       " 2,\n",
-       " 0,\n",
-       " 6,\n",
-       " 0,\n",
-       " 6,\n",
-       " 4,\n",
-       " 3,\n",
-       " 5,\n",
-       " 7,\n",
-       " 7,\n",
-       " 5,\n",
-       " 5,\n",
-       " 1,\n",
-       " 5,\n",
-       " 2,\n",
-       " 7,\n",
-       " 7,\n",
-       " 6,\n",
-       " 6,\n",
-       " 7,\n",
-       " 6,\n",
-       " 5,\n",
-       " 2,\n",
-       " 4,\n",
-       " 0,\n",
-       " 4,\n",
-       " 4,\n",
-       " 7,\n",
-       " 5,\n",
-       " 2,\n",
-       " 7,\n",
-       " 0,\n",
-       " 6,\n",
-       " 0,\n",
-       " 2,\n",
-       " 6,\n",
-       " 6,\n",
-       " 2,\n",
-       " 3,\n",
-       " 0,\n",
-       " 5,\n",
-       " 0,\n",
-       " 5,\n",
-       " 7,\n",
-       " 2,\n",
-       " 7,\n",
-       " 4,\n",
-       " 7,\n",
-       " 4,\n",
-       " 0,\n",
-       " 7,\n",
-       " 1,\n",
-       " 4,\n",
-       " 5,\n",
-       " 0,\n",
-       " 5,\n",
-       " 5,\n",
-       " 2,\n",
-       " 0,\n",
-       " 2,\n",
-       " 5,\n",
-       " 5,\n",
-       " 6,\n",
-       " 3,\n",
-       " 4,\n",
-       " 1,\n",
-       " 7,\n",
-       " 7,\n",
-       " 2,\n",
-       " 3,\n",
-       " 2,\n",
-       " 5,\n",
-       " 0,\n",
-       " 7,\n",
-       " 2,\n",
-       " 3,\n",
-       " 7,\n",
-       " 2,\n",
-       " 4,\n",
-       " 0,\n",
-       " 5,\n",
-       " 7,\n",
-       " 3,\n",
-       " 6,\n",
-       " 7,\n",
-       " 6,\n",
-       " 4,\n",
-       " 3,\n",
-       " 6,\n",
-       " 5,\n",
-       " 4,\n",
-       " 0,\n",
-       " 3,\n",
-       " 4,\n",
-       " 3,\n",
-       " 5,\n",
-       " 2,\n",
-       " 4,\n",
-       " 0,\n",
-       " 3,\n",
-       " 6,\n",
-       " 1,\n",
-       " 3,\n",
-       " 1,\n",
-       " 4,\n",
-       " 3,\n",
-       " 3,\n",
-       " 3,\n",
-       " 0,\n",
-       " 7,\n",
-       " 6,\n",
-       " 2,\n",
-       " 4,\n",
-       " 6,\n",
-       " 5,\n",
-       " 4,\n",
-       " 1,\n",
-       " 7,\n",
-       " 6,\n",
-       " 1,\n",
-       " 4,\n",
-       " 3,\n",
-       " 0,\n",
-       " 7,\n",
-       " 3,\n",
-       " 1,\n",
-       " 2,\n",
-       " 1,\n",
-       " 6,\n",
-       " 4,\n",
-       " 7,\n",
-       " 1,\n",
-       " 7,\n",
-       " 1,\n",
-       " 5,\n",
-       " 1,\n",
-       " 6,\n",
-       " 3,\n",
-       " 0,\n",
-       " 2,\n",
-       " 6,\n",
-       " 7,\n",
-       " 7,\n",
-       " 0,\n",
-       " 1,\n",
-       " 4,\n",
-       " 0,\n",
-       " 4,\n",
-       " 5,\n",
-       " 3,\n",
-       " 6,\n",
-       " 2,\n",
-       " 3,\n",
-       " 4,\n",
-       " 1,\n",
-       " 6,\n",
-       " 2,\n",
-       " 4,\n",
-       " 4,\n",
-       " 6,\n",
-       " 4,\n",
-       " 5,\n",
-       " 7,\n",
-       " 1,\n",
-       " 7,\n",
-       " 7,\n",
-       " 4,\n",
-       " 7,\n",
-       " 4,\n",
-       " 3,\n",
-       " 3,\n",
-       " 6,\n",
-       " 1,\n",
-       " 2,\n",
-       " 0,\n",
-       " 0,\n",
-       " 0,\n",
-       " 2,\n",
-       " 5,\n",
-       " 6,\n",
-       " 5,\n",
-       " 7,\n",
-       " 5,\n",
-       " 7,\n",
-       " 1,\n",
-       " 1,\n",
-       " 2,\n",
-       " 1,\n",
-       " 6,\n",
-       " 5,\n",
-       " 7,\n",
-       " 0,\n",
-       " 0,\n",
-       " 5,\n",
-       " 5,\n",
-       " 0,\n",
-       " 3,\n",
-       " 7,\n",
-       " 5,\n",
-       " 2,\n",
-       " 5,\n",
-       " 4,\n",
-       " 2,\n",
-       " 3,\n",
-       " 6,\n",
-       " 2,\n",
-       " 3,\n",
-       " 6,\n",
-       " 0,\n",
-       " 0,\n",
-       " 2,\n",
-       " 6,\n",
-       " 0,\n",
-       " 1,\n",
-       " 3,\n",
-       " 3,\n",
-       " 6,\n",
-       " 4,\n",
-       " 6,\n",
-       " 4,\n",
-       " 6,\n",
-       " 0,\n",
-       " 0,\n",
-       " 2,\n",
-       " 3,\n",
-       " 6,\n",
-       " 2,\n",
-       " 2,\n",
-       " 6,\n",
-       " 6,\n",
-       " 2,\n",
-       " 4,\n",
-       " 3,\n",
-       " 3,\n",
-       " 6,\n",
-       " 7,\n",
-       " 7,\n",
-       " 1,\n",
-       " 1,\n",
-       " 7,\n",
-       " 7,\n",
-       " 6,\n",
-       " 1,\n",
-       " 7,\n",
-       " 0,\n",
-       " 0,\n",
-       " 2,\n",
-       " 4,\n",
-       " 2,\n",
-       " 2,\n",
-       " 3,\n",
-       " 0,\n",
-       " 1,\n",
-       " 4,\n",
-       " 0,\n",
-       " 4,\n",
-       " 6,\n",
-       " 5,\n",
-       " 3,\n",
-       " 2,\n",
-       " 3,\n",
-       " 2,\n",
-       " 3,\n",
-       " 6,\n",
-       " 2,\n",
-       " 1,\n",
-       " 4,\n",
-       " 7,\n",
-       " 6,\n",
-       " 4,\n",
-       " 5,\n",
-       " 6,\n",
-       " 7,\n",
-       " 7,\n",
-       " 2,\n",
-       " 0,\n",
-       " 5,\n",
-       " 5,\n",
-       " 0,\n",
-       " 3,\n",
-       " 6,\n",
-       " 6,\n",
-       " 5,\n",
-       " 4,\n",
-       " 4,\n",
-       " 7,\n",
-       " 0,\n",
-       " 5,\n",
-       " 1,\n",
-       " 7,\n",
-       " 0,\n",
-       " 3,\n",
-       " 1,\n",
-       " 7,\n",
-       " 0,\n",
-       " 1,\n",
-       " 4,\n",
-       " 7,\n",
-       " 5,\n",
-       " 0,\n",
-       " 4,\n",
-       " 0,\n",
-       " 0,\n",
-       " 1,\n",
-       " 0,\n",
-       " 6,\n",
-       " 4,\n",
-       " 0,\n",
-       " 5,\n",
-       " 4,\n",
-       " 6,\n",
-       " 6,\n",
-       " 7,\n",
-       " 2,\n",
-       " 6,\n",
-       " 2,\n",
-       " 6,\n",
-       " 0,\n",
-       " 3,\n",
-       " 2,\n",
-       " 2,\n",
-       " 1,\n",
-       " 5,\n",
-       " 4,\n",
-       " 7,\n",
-       " 6,\n",
-       " 6,\n",
-       " 2,\n",
-       " 5,\n",
-       " 5,\n",
-       " 5,\n",
-       " 0,\n",
-       " 3,\n",
-       " 5,\n",
-       " 4,\n",
-       " 5,\n",
-       " 7,\n",
-       " 5,\n",
-       " 0,\n",
-       " 5,\n",
-       " 0,\n",
-       " 0,\n",
-       " 2,\n",
-       " 0,\n",
-       " 2,\n",
-       " 1,\n",
-       " 0,\n",
-       " 2,\n",
-       " 4,\n",
-       " 3,\n",
-       " 4,\n",
-       " 1,\n",
-       " 7,\n",
-       " 2,\n",
-       " 1,\n",
-       " 0,\n",
-       " 3,\n",
-       " 0,\n",
-       " 3,\n",
-       " 1,\n",
-       " 1,\n",
-       " 0,\n",
-       " 5,\n",
-       " 3,\n",
-       " 1,\n",
-       " 2,\n",
-       " 5,\n",
-       " 6,\n",
-       " 7,\n",
-       " 6,\n",
-       " 7,\n",
-       " 0,\n",
-       " 2,\n",
-       " 6,\n",
-       " 3,\n",
-       " 1,\n",
-       " 5,\n",
-       " 4,\n",
-       " 2,\n",
-       " 4,\n",
-       " 6,\n",
-       " 5,\n",
-       " 2,\n",
-       " 7,\n",
-       " ...]"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "\n",
-    "#--------------------------------------------------------------------------------------------\n",
-    "# YOUR MODEL INFERENCE CODE HERE\n",
-    "# Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.\n",
-    "#--------------------------------------------------------------------------------------------   \n",
-    "\n",
-    "# Make random predictions (placeholder for actual model inference)\n",
-    "true_labels = test_dataset[\"label\"]\n",
-    "predictions = [random.randint(0, 7) for _ in range(len(true_labels))]\n",
-    "\n",
-    "predictions\n",
-    "\n",
-    "#--------------------------------------------------------------------------------------------\n",
-    "# YOUR MODEL INFERENCE STOPS HERE\n",
-    "#--------------------------------------------------------------------------------------------   "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "[codecarbon WARNING @ 19:53:32] Background scheduler didn't run for a long period (47s), results might be inaccurate\n",
-      "[codecarbon INFO @ 19:53:32] Energy consumed for RAM : 0.000156 kWh. RAM Power : 11.755242347717285 W\n",
-      "[codecarbon INFO @ 19:53:32] Delta energy consumed for CPU with constant : 0.000564 kWh, power : 42.5 W\n",
-      "[codecarbon INFO @ 19:53:32] Energy consumed for All CPU : 0.000564 kWh\n",
-      "[codecarbon INFO @ 19:53:32] 0.000720 kWh of electricity used since the beginning.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "EmissionsData(timestamp='2025-01-21T19:53:32', project_name='codecarbon', run_id='908f2e7e-4bb2-4991-a0f6-56bf8d7eda21', experiment_id='5b0fa12a-3dd7-45bb-9766-cc326314d9f1', duration=47.736408500000834, emissions=4.032368007471064e-05, emissions_rate=8.444466886328872e-07, cpu_power=42.5, gpu_power=0.0, ram_power=11.755242347717285, cpu_energy=0.0005636615353475565, gpu_energy=0, ram_energy=0.00015590305493261682, energy_consumed=0.0007195645902801733, country_name='France', country_iso_code='FRA', region='île-de-france', cloud_provider='', cloud_region='', os='Windows-11-10.0.22631-SP0', python_version='3.12.7', codecarbon_version='3.0.0_rc0', cpu_count=12, cpu_model='13th Gen Intel(R) Core(TM) i7-1365U', gpu_count=None, gpu_model=None, longitude=2.3494, latitude=48.8558, ram_total_size=31.347312927246094, tracking_mode='machine', on_cloud='N', pue=1.0)"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# Stop tracking emissions\n",
-    "emissions_data = tracker.stop_task()\n",
-    "emissions_data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.10090237899917966"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# Calculate accuracy\n",
-    "accuracy = accuracy_score(true_labels, predictions)\n",
-    "accuracy"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'submission_timestamp': '2025-01-21T19:53:46.639165',\n",
-       " 'accuracy': 0.10090237899917966,\n",
-       " 'energy_consumed_wh': 0.7195645902801733,\n",
-       " 'emissions_gco2eq': 0.040323680074710634,\n",
-       " 'emissions_data': {'run_id': '908f2e7e-4bb2-4991-a0f6-56bf8d7eda21',\n",
-       "  'duration': 47.736408500000834,\n",
-       "  'emissions': 4.032368007471064e-05,\n",
-       "  'emissions_rate': 8.444466886328872e-07,\n",
-       "  'cpu_power': 42.5,\n",
-       "  'gpu_power': 0.0,\n",
-       "  'ram_power': 11.755242347717285,\n",
-       "  'cpu_energy': 0.0005636615353475565,\n",
-       "  'gpu_energy': 0,\n",
-       "  'ram_energy': 0.00015590305493261682,\n",
-       "  'energy_consumed': 0.0007195645902801733,\n",
-       "  'country_name': 'France',\n",
-       "  'country_iso_code': 'FRA',\n",
-       "  'region': 'île-de-france',\n",
-       "  'cloud_provider': '',\n",
-       "  'cloud_region': '',\n",
-       "  'os': 'Windows-11-10.0.22631-SP0',\n",
-       "  'python_version': '3.12.7',\n",
-       "  'codecarbon_version': '3.0.0_rc0',\n",
-       "  'cpu_count': 12,\n",
-       "  'cpu_model': '13th Gen Intel(R) Core(TM) i7-1365U',\n",
-       "  'gpu_count': None,\n",
-       "  'gpu_model': None,\n",
-       "  'ram_total_size': 31.347312927246094,\n",
-       "  'tracking_mode': 'machine',\n",
-       "  'on_cloud': 'N',\n",
-       "  'pue': 1.0},\n",
-       " 'dataset_config': {'dataset_name': 'QuotaClimat/frugalaichallenge-text-train',\n",
-       "  'test_size': 0.2,\n",
-       "  'test_seed': 42}}"
-      ]
-     },
-     "execution_count": 10,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# Prepare results dictionary\n",
-    "results = {\n",
-    "    \"submission_timestamp\": datetime.now().isoformat(),\n",
-    "    \"accuracy\": float(accuracy),\n",
-    "    \"energy_consumed_wh\": emissions_data.energy_consumed * 1000,\n",
-    "    \"emissions_gco2eq\": emissions_data.emissions * 1000,\n",
-    "    \"emissions_data\": clean_emissions_data(emissions_data),\n",
-    "    \"dataset_config\": {\n",
-    "        \"dataset_name\": request.dataset_name,\n",
-    "        \"test_size\": request.test_size,\n",
-    "        \"test_seed\": request.test_seed\n",
-    "    }\n",
-    "}\n",
-    "\n",
-    "results"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Development of the model"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "90f50ab19698484489f36976745efad3",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "config.json:   0%|          | 0.00/1.15k [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "c:\\Users\\theo.alvesdacosta\\AppData\\Local\\anaconda3\\Lib\\site-packages\\huggingface_hub\\file_download.py:139: UserWarning: `huggingface_hub` cache-system uses symlinks by default to efficiently store duplicated files but your machine does not support them in C:\\Users\\theo.alvesdacosta\\.cache\\huggingface\\hub\\models--facebook--bart-large-mnli. Caching files will still work but in a degraded version that might require more space on your disk. This warning can be disabled by setting the `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For more details, see https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations.\n",
-      "To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development\n",
-      "  warnings.warn(message)\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "6e3974d8ff284603821f7beca9bd353d",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "bc29cb379c644b00b1bdf61d5426d99d",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "635503cf819747c9a83f22aa4f2f11db",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "3a5f53e451e8483ca7c33f42245abd13",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "84f922d1b68a4a0faa5e920d004efca0",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Device set to use cpu\n"
-     ]
-    }
-   ],
-   "source": [
-    "from transformers import pipeline\n",
-    "classifier = pipeline(\"zero-shot-classification\",\n",
-    "                      model=\"facebook/bart-large-mnli\")\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "sequence_to_classify = \"one day I will see the world\"\n",
-    "\n",
-    "candidate_labels = [\n",
-    "    \"Not related to climate change disinformation\",\n",
-    "    \"Climate change is not real and not happening\",\n",
-    "    \"Climate change is not human-induced\",\n",
-    "    \"Climate change impacts are not that bad\",\n",
-    "    \"Climate change solutions are harmful and unnecessary\",\n",
-    "    \"Climate change science is unreliable\",\n",
-    "    \"Climate change proponents are biased\",\n",
-    "    \"Fossil fuels are needed to address climate change\"\n",
-    "]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'sequence': 'one day I will see the world',\n",
-       " 'labels': ['Fossil fuels are needed to address climate change',\n",
-       "  'Climate change science is unreliable',\n",
-       "  'Not related to climate change disinformation',\n",
-       "  'Climate change proponents are biased',\n",
-       "  'Climate change impacts are not that bad',\n",
-       "  'Climate change solutions are harmful and unnecessary',\n",
-       "  'Climate change is not human-induced',\n",
-       "  'Climate change is not real and not happening'],\n",
-       " 'scores': [0.16242119669914246,\n",
-       "  0.15683825314044952,\n",
-       "  0.1564282774925232,\n",
-       "  0.14603719115257263,\n",
-       "  0.12794046103954315,\n",
-       "  0.10180754214525223,\n",
-       "  0.0936085507273674,\n",
-       "  0.0549185685813427]}"
-      ]
-     },
-     "execution_count": 15,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "classifier(sequence_to_classify, candidate_labels)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 26,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "[codecarbon WARNING @ 11:00:07] Already started tracking\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "5d66a13f76a4411d95b62d4a73012495",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "0it [00:00, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "[codecarbon WARNING @ 11:05:57] Background scheduler didn't run for a long period (349s), results might be inaccurate\n",
-      "[codecarbon INFO @ 11:05:57] Energy consumed for RAM : 0.018069 kWh. RAM Power : 11.755242347717285 W\n",
-      "[codecarbon INFO @ 11:05:57] Delta energy consumed for CPU with constant : 0.004122 kWh, power : 42.5 W\n",
-      "[codecarbon INFO @ 11:05:57] Energy consumed for All CPU : 0.065327 kWh\n",
-      "[codecarbon INFO @ 11:05:57] 0.083395 kWh of electricity used since the beginning.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "EmissionsData(timestamp='2025-01-22T11:05:57', project_name='codecarbon', run_id='908f2e7e-4bb2-4991-a0f6-56bf8d7eda21', experiment_id='5b0fa12a-3dd7-45bb-9766-cc326314d9f1', duration=349.19709450000664, emissions=0.0002949120266226386, emissions_rate=8.445461750018632e-07, cpu_power=42.5, gpu_power=0.0, ram_power=11.755242347717285, cpu_energy=0.004122396676597424, gpu_energy=0, ram_energy=0.0011402244733631148, energy_consumed=0.005262621149960539, country_name='France', country_iso_code='FRA', region='île-de-france', cloud_provider='', cloud_region='', os='Windows-11-10.0.22631-SP0', python_version='3.12.7', codecarbon_version='3.0.0_rc0', cpu_count=12, cpu_model='13th Gen Intel(R) Core(TM) i7-1365U', gpu_count=None, gpu_model=None, longitude=2.3494, latitude=48.8558, ram_total_size=31.347312927246094, tracking_mode='machine', on_cloud='N', pue=1.0)"
-      ]
-     },
-     "execution_count": 26,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# Start tracking emissions\n",
-    "tracker.start()\n",
-    "tracker.start_task(\"inference\")\n",
-    "\n",
-    "from tqdm.auto import tqdm\n",
-    "predictions = []\n",
-    "\n",
-    "\n",
-    "\n",
-    "# Option 1: Simple loop approach\n",
-    "\n",
-    "for i, text in tqdm(enumerate(test_dataset[\"quote\"])):\n",
-    "\n",
-    "    result = classifier(text, candidate_labels)\n",
-    "\n",
-    "    # Get index of highest scoring label\n",
-    "\n",
-    "    pred_label = candidate_labels.index(result[\"labels\"][0])\n",
-    "\n",
-    "    predictions.append(pred_label)\n",
-    "    if i == 100:\n",
-    "        break\n",
-    "\n",
-    "\n",
-    "# Stop tracking emissions\n",
-    "emissions_data = tracker.stop_task()\n",
-    "emissions_data\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.4"
-      ]
-     },
-     "execution_count": 28,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# Calculate accuracy\n",
-    "accuracy = accuracy_score(true_labels[:100], predictions[:100])\n",
-    "accuracy"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "base",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.12.7"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

src/load_data.py ADDED Viewed

	@@ -0,0 +1,97 @@

+"""Load dataset and save locally in Ultralytics format"""
+from datasets import load_dataset
+import logging
+import os
+import pandas as pd
+# Save in Ultralytics format
+def save_ultralytics_format(dataset_split, split, IMAGE_DIR, LABEL_DIR):
+    """Save a dataset split into the Ultralytics format.
+    Args:
+        dataset_split: The dataset split (e.g., dataset["train"])
+        split: "train" or "val"
+    """
+    image_split_dir = os.path.join(IMAGE_DIR, split)
+    label_split_dir = os.path.join(LABEL_DIR, split)
+    if len(os.listdir(image_split_dir)) > 0 or len(os.listdir(label_split_dir)) > 0:
+        logging.info(f"{image_split_dir} or {label_split_dir} not empty: passing")
+    else:
+        for example in dataset_split:
+            # Save image to appropriate folder
+            image = example["image"]  # PIL.Image.Image
+            image_name = example["image_name"]  # Original file name
+            output_image_path = os.path.join(image_split_dir, image_name)
+            # Save image object to disk
+            image.save(output_image_path)
+            # Save label
+            annotations = example["annotations"]
+            label_name = image_name.replace(".jpg", ".txt").replace(".png", ".txt")
+            output_label_path = os.path.join(label_split_dir, label_name)
+            # Save label file
+            with open(output_label_path, "w") as label_file:
+                label_file.write(annotations)
+        logging.info(f"Dataset {split} split exported to Ultralytics format")
+def create_df(ds, split_name, OUTPUT_DIR):
+    """Create dataframe from dataset"""
+    df = pd.DataFrame(
+        [[i.size[0], i.size[1], i.format, i.mode] for i in ds["image"]],
+        columns=["width", "height", "format", "mode"]
+    )
+    df["name"] = ds["image_name"]
+    df["uri"] = df['name'].apply(lambda x: os.path.join(OUTPUT_DIR, "images", split_name, x))
+    df["annotations"] = ds["annotations"]
+    df["partner"] = ds["partner"]
+    df["camera"] = ds["camera"]
+    df["timestamp"] = ds["date"]
+    return df
+def load_data(OUTPUT_DIR, REPO_ID, DB_INFO_URI):
+    """Load data and save to local directory in Ultralytics format
+    """
+    # Check if data information already exists before eventually loading model
+    db_info_path = os.path.join(OUTPUT_DIR, DB_INFO_URI)
+    if os.path.exists(db_info_path):
+        df = pd.read_csv(db_info_path, index_col=0)
+        return df
+    # Create the directory structure
+    IMAGE_DIR = os.path.join(OUTPUT_DIR, "images")
+    LABEL_DIR = os.path.join(OUTPUT_DIR, "labels")
+    for split in ["train", "val"]:
+        os.makedirs(os.path.join(IMAGE_DIR, split), exist_ok=True)
+        os.makedirs(os.path.join(LABEL_DIR, split), exist_ok=True)
+    # Load the dataset from the Hugging Face Hub
+    dataset = load_dataset(REPO_ID)
+    logging.info("Dataset loaded in cache folder")
+    # Save train and validation splits
+    save_ultralytics_format(dataset["train"], "train", IMAGE_DIR, LABEL_DIR)
+    save_ultralytics_format(dataset["val"], "val", IMAGE_DIR, LABEL_DIR)
+    # Create global dataframe from splits
+    df_val = create_df(dataset["val"], "val", OUTPUT_DIR)
+    # Separate train to save memory
+    df_train_1 = create_df(dataset["train"][:10000], "train", OUTPUT_DIR)
+    df_train_2 = create_df(dataset["train"][10000:20000], "train", OUTPUT_DIR)
+    df_train_3 = create_df(dataset["train"][20000:], "train", OUTPUT_DIR)
+    # Save as one CSV
+    df = pd.concat([df_val, df_train_1, df_train_2, df_train_3], axis=0, ignore_index=True)
+    with open(db_info_path, "wb") as f:
+        df.to_csv(f)
+    return df
+if __name__ == "__main__":
+    help()

src/models.py ADDED Viewed

	@@ -0,0 +1,395 @@

+"""Training utilities"""
+# OS & env
+import os
+import logging
+import datetime
+import time
+# DS, ML & DL
+import numpy as np
+from sklearn.metrics import confusion_matrix, classification_report
+from keras.utils import image_dataset_from_directory
+from keras.layers import RandomFlip, RandomRotation, RandomZoom
+from keras.layers import GaussianNoise, RandomContrast, RandomBrightness
+from tensorflow.keras.callbacks import Callback, TensorBoard
+from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
+import tensorflow as tf
+# images & data viz
+import matplotlib.pyplot as plt
+import seaborn as sns
+class ConditionalAugmentation(tf.keras.layers.Layer):
+    def __init__(self, rate=0.2, **kwargs):
+        super(ConditionalAugmentation, self).__init__(**kwargs)
+        self.rate = rate
+        self.flip = RandomFlip("horizontal")
+        self.rotation = RandomRotation(0.25)
+        self.zoom = RandomZoom(0.1)
+        self.noise = GaussianNoise(0.1)
+        self.contrast = RandomContrast(0.1)
+        self.brightness = RandomBrightness(0.1)
+    def call(self, inputs, training=None):
+        if training:
+            x = inputs
+            x = tf.cond(
+                tf.random.uniform(()) < self.rate, lambda: self.flip(x), lambda: x
+            )
+            x = tf.cond(
+                tf.random.uniform(()) < self.rate, lambda: self.rotation(x), lambda: x
+            )
+            x = tf.cond(
+                tf.random.uniform(()) < self.rate, lambda: self.zoom(x), lambda: x
+            )
+            x = tf.cond(
+                tf.random.uniform(()) < self.rate, lambda: self.noise(x), lambda: x
+            )
+            x = tf.cond(
+                tf.random.uniform(()) < self.rate, lambda: self.contrast(x), lambda: x
+            )
+            x = tf.cond(
+                tf.random.uniform(()) < self.rate, lambda: self.brightness(x), lambda: x
+            )
+            return x
+        return inputs
+def evaluate_model(
+    model,
+    model_arch,
+    train_ds,
+    val_ds,
+    test_ds,
+    LOG_DIR,
+    CHKPT_DIR,
+    model_name="raw_model",
+    input_size=(224, 224),
+    batch_size=32,
+    n_epochs=10,
+    optimizer="adam",
+    loss="sparse_categorical_crossentropy",
+    metrics=["accuracy", "categorical_accuracy"],
+) -> tuple:
+    """Train, evaluate and log model from architecture and configuration
+    Return model, history and plot confusion matrix
+    """
+    if not os.path.exists(CHKPT_DIR):
+        os.makedirs(CHKPT_DIR)
+    chkpt_name = model_name + ".weights.h5"
+    chkpt_uri = os.path.join(CHKPT_DIR, chkpt_name)
+    model_config = f"""
+| Config | Value |
+|:---:|:---:|
+| **model name** | {model_name} |
+| **input size** | {input_size} |
+| **batch size** | {batch_size} |
+| **n epochs** | {n_epochs} |
+| **optimizer** | {optimizer} |
+| **loss** | {loss} |
+| **metrics** | {metrics} |
+| **best weights URI** | {chkpt_uri} |
+    """
+    # set log folder
+    log_dir = os.path.join(
+        LOG_DIR, model_name, datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
+    )
+    # COMPLIE
+    logging.info("⚙️ compiling")
+    model.compile(
+        optimizer=optimizer,
+        loss=loss,
+        metrics=metrics,
+    )
+    # CALLBACKS
+    logging.info("🛎️ declaring callbacks")
+    class TimingCallback(Callback):
+        def __init__(self):
+            self.logs = []
+            self.start_time = None
+        def on_train_begin(self, logs={}):
+            self.start_time = time.time()
+        # log time by epoch
+        def on_epoch_end(self, epoch, logs={}):
+            self.logs.append(time.time() - self.start_time)
+        # log total time
+        def on_train_end(self, logs={}):
+            self.tot_time_sec = time.time() - self.start_time
+            self.total_time = f"Total train time: {self.tot_time_sec // 60 :.0f}'{self.tot_time_sec % 60 :.0f}s"
+    timing_callback = TimingCallback()
+    checkpoint = ModelCheckpoint(
+        chkpt_uri,
+        save_best_only=True,
+        save_weights_only=True,
+    )
+    early_stopping = EarlyStopping(
+        monitor="val_loss", patience=6, restore_best_weights=True
+    )
+    tensorboard_callback = TensorBoard(
+        log_dir=log_dir,
+        histogram_freq=0,  # do not save weights & biases (too much memory)
+        write_graph=True,
+        write_images=True,
+        update_freq="epoch",
+    )
+    # FIT
+    logging.info("💪 starting training")
+    model_history = model.fit(
+        train_ds,
+        validation_data=val_ds,
+        epochs=n_epochs,
+        callbacks=[timing_callback, checkpoint, early_stopping, tensorboard_callback],
+    )
+    # EVALUATE ON TEST DATASET
+    logging.info("🧐 evaluating model")
+    model.load_weights(chkpt_uri)
+    test_loss, *test_metrics = model.evaluate(test_ds)
+    predictions = model.predict(test_ds)
+    # CONFUSION MATRIX
+    logging.info("📈 plotting results")
+    # get true labels from test dataset
+    true_labels = np.concatenate([y for x, y in test_ds], axis=0)
+    # convert predictions to classes
+    predicted_classes = np.argmax(predictions, axis=1)
+    # compute confusion matrix
+    conf_matrix = confusion_matrix(true_labels, predicted_classes)
+    # precision & F1 score
+    report = classification_report(
+        true_labels,
+        predicted_classes,
+        target_names=test_ds.class_names,
+    )
+    report_dict = classification_report(
+        true_labels,
+        predicted_classes,
+        target_names=test_ds.class_names,
+        output_dict=True,
+    )
+    print(report)
+    # plot it
+    conf_mtx_plot = plt.figure(figsize=(6, 4))
+    sns.heatmap(
+        conf_matrix,
+        annot=True,
+        fmt="d",
+        cmap="Blues",
+        xticklabels=test_ds.class_names,
+        yticklabels=test_ds.class_names,
+    )
+    plt.suptitle(f"{model_name} model", color="blue", weight="bold")
+    plt.title(
+        f"acc. {report_dict['accuracy'] :.02f} - loss {test_loss :.02f} - {timing_callback.total_time}",
+        fontsize=10,
+    )
+    plt.xlabel("Predictions", color="red", weight="bold")
+    plt.ylabel("True labels", color="green", weight="bold")
+    plt.show()
+    # convert image for Tensorboard
+    conf_mtx_plot.canvas.draw()
+    image_array = np.array(conf_mtx_plot.canvas.renderer.buffer_rgba())
+    conf_mtx_plot_tf = tf.convert_to_tensor(image_array)
+    conf_mtx_plot_tf = tf.expand_dims(conf_mtx_plot_tf, 0)
+    plt.close()
+    # LOG IN TENSORBOARD
+    logging.info("📓 logging results")
+    file_writer = tf.summary.create_file_writer(log_dir + "/metrics")
+    with file_writer.as_default():
+        tf.summary.text("configuration", model_config, step=0)
+        tf.summary.text("architecture", model_arch, step=0)
+        tf.summary.text("total_training_time", timing_callback.total_time, step=0)
+        for i, time_per_epoch in enumerate(timing_callback.logs):
+            tf.summary.scalar("time_per_epoch", time_per_epoch, step=i + 1)
+        tf.summary.image("confusion_matrix", conf_mtx_plot_tf, step=0)
+    return model, model_history
+def eval_pretrained_model(
+    model,
+    train_ds,
+    val_ds,
+    test_ds,
+    LOG_DIR,
+    CHKPT_DIR,
+    model_name="raw_model",
+    input_size=(224, 224),
+    batch_size=32,
+    n_epochs=10,
+    optimizer="adam",
+    loss="sparse_categorical_crossentropy",
+    metrics=["accuracy"],
+) -> tuple:
+    """Train, evaluate and log pre-trained model from architecture and configuration
+    Return model, history and plot confusion matrix
+    """
+    if not os.path.exists(CHKPT_DIR):
+        os.makedirs(CHKPT_DIR)
+    chkpt_name = model_name + ".weights.h5"
+    chkpt_uri = os.path.join(CHKPT_DIR, chkpt_name)
+    model_config = f"""
+| Config | Value |
+|:---:|:---:|
+| **model name** | {model_name} |
+| **input size** | {input_size} |
+| **batch size** | {batch_size} |
+| **n epochs** | {n_epochs} |
+| **optimizer** | {optimizer} |
+| **loss** | {loss} |
+| **metrics** | {metrics} |
+| **best weights URI** | {chkpt_uri} |
+    """
+    # set log folder
+    log_dir = os.path.join(
+        LOG_DIR, model_name, datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
+    )
+    # COMPLIE
+    logging.info("⚙️ compiling")
+    model.compile(
+        optimizer=optimizer,
+        loss=loss,
+        metrics=metrics,
+    )
+    # CALLBACKS
+    logging.info("🛎️ declaring callbacks")
+    class TimingCallback(Callback):
+        def __init__(self):
+            self.logs = []
+            self.start_time = None
+        def on_train_begin(self, logs={}):
+            self.start_time = time.time()
+        # log time by epoch
+        def on_epoch_end(self, epoch, logs={}):
+            self.logs.append(time.time() - self.start_time)
+        # log total time
+        def on_train_end(self, logs={}):
+            self.tot_time_sec = time.time() - self.start_time
+            self.total_time = f"Total train time: {self.tot_time_sec // 60 :.0f}'{self.tot_time_sec % 60 :.0f}s"
+    timing_callback = TimingCallback()
+    checkpoint = ModelCheckpoint(
+        chkpt_uri,
+        save_best_only=True,
+        save_weights_only=True,
+    )
+    early_stopping = EarlyStopping(
+        monitor="val_loss", patience=10, restore_best_weights=True
+    )
+    tensorboard_callback = TensorBoard(
+        log_dir=log_dir,
+        histogram_freq=0,  # do not save weights & biases (too much memory)
+        write_graph=True,
+        write_images=True,
+        update_freq="epoch",
+    )
+    # FIT
+    logging.info("💪 starting training")
+    model_history = model.fit(
+        train_ds,
+        validation_data=val_ds,
+        epochs=n_epochs,
+        callbacks=[timing_callback, checkpoint, early_stopping, tensorboard_callback],
+    )
+    # EVALUATE ON TEST DATASET
+    logging.info("🧐 evaluating model")
+    model.load_weights(chkpt_uri)
+    test_loss, *test_metrics = model.evaluate(test_ds)
+    predictions = model.predict(test_ds)
+    # CONFUSION MATRIX
+    logging.info("📈 plotting results")
+    # get true labels from test dataset
+    true_labels = np.concatenate([y for x, y in test_ds], axis=0)
+    # convert predictions to classes
+    predicted_classes = np.argmax(predictions, axis=1)
+    # compute confusion matrix
+    conf_matrix = confusion_matrix(true_labels, predicted_classes)
+    # precision & F1 score
+    report = classification_report(
+        true_labels,
+        predicted_classes,
+        target_names=test_ds.class_names,
+    )
+    report_dict = classification_report(
+        true_labels,
+        predicted_classes,
+        target_names=test_ds.class_names,
+        output_dict=True,
+    )
+    print(report)
+    # plot it
+    conf_mtx_plot = plt.figure(figsize=(6, 4))
+    sns.heatmap(
+        conf_matrix,
+        annot=True,
+        fmt="d",
+        cmap="Blues",
+        xticklabels=test_ds.class_names,
+        yticklabels=test_ds.class_names,
+    )
+    plt.suptitle(f"{model_name} model", color="blue", weight="bold")
+    plt.title(
+        f"acc. {report_dict['accuracy'] :.02f} - loss {test_loss :.02f} - {timing_callback.total_time}",
+        fontsize=10,
+    )
+    plt.xlabel("Predictions", color="red", weight="bold")
+    plt.ylabel("True labels", color="green", weight="bold")
+    plt.show()
+    # convert image for Tensorboard
+    conf_mtx_plot.canvas.draw()
+    image_array = np.array(conf_mtx_plot.canvas.renderer.buffer_rgba())
+    conf_mtx_plot_tf = tf.convert_to_tensor(image_array)
+    conf_mtx_plot_tf = tf.expand_dims(conf_mtx_plot_tf, 0)
+    plt.close()
+    # LOG IN TENSORBOARD
+    logging.info("📓 logging results")
+    file_writer = tf.summary.create_file_writer(log_dir + "/metrics")
+    with file_writer.as_default():
+        tf.summary.text("configuration", model_config, step=0)
+        tf.summary.text("total_training_time", timing_callback.total_time, step=0)
+        for i, time_per_epoch in enumerate(timing_callback.logs):
+            tf.summary.scalar("time_per_epoch", time_per_epoch, step=i + 1)
+        tf.summary.image("confusion_matrix", conf_mtx_plot_tf, step=0)
+    return model, model_history
+if __name__ == "__main__":
+    help()

tasks/utils/load_data.py DELETED Viewed

@@ -1,59 +0,0 @@
-"""Load dataset and save locally in Ultralytics format"""
-from datasets import load_dataset
-import os
-def load_data(REPO_ID, OUTPUT_DIR):
-    """Load data and save to local directory"""
-    IMAGE_DIR = os.path.join(OUTPUT_DIR, "images")
-    LABEL_DIR = os.path.join(OUTPUT_DIR, "labels")
-    # 🚧 CHECK IF FOLDER EXISTS
-    # 🚧 CHECK IF FOLDER EXISTS
-    # 🚧 CHECK IF FOLDER EXISTS
-    # 🚧 CHECK IF FOLDER EXISTS
-    # Create the directory structure
-    for split in ["train", "val"]:
-        os.makedirs(os.path.join(IMAGE_DIR, split), exist_ok=True)
-        os.makedirs(os.path.join(LABEL_DIR, split), exist_ok=True)
-    # Load the dataset from the Hugging Face Hub
-    dataset = load_dataset(REPO_ID)
-    # Save in Ultralytics format
-    def save_ultralytics_format(dataset_split, split):
-        """
-        Save a dataset split into the Ultralytics format.
-        Args:
-            dataset_split: The dataset split (e.g., dataset["train"])
-            split: "train" or "val"
-        """
-        for example in dataset_split:
-            # Save the image to the appropriate folder
-            image = example["image"]  # PIL.Image.Image
-            image_name = example["image_name"]  # Original file name
-            output_image_path = os.path.join(IMAGE_DIR, split, image_name)
-            # Save the image object to disk
-            image.save(output_image_path)
-            # Save label
-            annotations = example["annotations"]
-            label_name = image_name.replace(".jpg", ".txt").replace(".png", ".txt")
-            output_label_path = os.path.join(LABEL_DIR, split, label_name)
-            with open(output_label_path, "w") as label_file:
-                label_file.write(annotations)
-    # Save train and validation splits
-    save_ultralytics_format(dataset["train"], "train")
-    save_ultralytics_format(dataset["val"], "val")
-    print("Dataset exported to Ultralytics format.")
-if __name__ == "__main__":
-    help()