text-Nonnormalizable

Sleeping

App Files Files Community

Nonnormalizable commited on Jan 20

Commit

250d2de

1 Parent(s): 9f48354

Train on just the training set. Automatic model card.

Browse files

Files changed (2) hide show

Finetune BERT.ipynb +543 -301
tasks/text.py +3 -10

Finetune BERT.ipynb CHANGED Viewed

@@ -1,16 +1,24 @@
 {
  "cells": [
   {
    "cell_type": "code",
    "execution_count": 1,
    "id": "73e72549-69f2-46b5-b0f5-655777139972",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2025-01-17T18:17:50.964659Z",
-     "iopub.status.busy": "2025-01-17T18:17:50.964450Z",
-     "iopub.status.idle": "2025-01-17T18:17:53.646932Z",
-     "shell.execute_reply": "2025-01-17T18:17:53.646697Z",
-     "shell.execute_reply.started": "2025-01-17T18:17:50.964637Z"
     }
    },
    "outputs": [],
@@ -20,9 +28,15 @@
     "import torch\n",
     "from torch import nn\n",
     "from transformers import BertTokenizer, BertModel\n",
-    "from huggingface_hub import PyTorchModelHubMixin, notebook_login\n",
-    "from torch.utils.data import Dataset, DataLoader\n",
-    "from datasets import load_dataset"
    ]
   },
   {
@@ -31,11 +45,11 @@
    "id": "07e0787e-c72b-41f3-baba-43cef3f8d6f8",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2025-01-17T18:17:53.648499Z",
-     "iopub.status.busy": "2025-01-17T18:17:53.648417Z",
-     "iopub.status.idle": "2025-01-17T18:17:53.650284Z",
-     "shell.execute_reply": "2025-01-17T18:17:53.650113Z",
-     "shell.execute_reply.started": "2025-01-17T18:17:53.648489Z"
     }
    },
    "outputs": [],
@@ -43,17 +57,25 @@
     "notebook_login(new_session=False)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
    "id": "d4b79fb9-5e70-4600-8885-94bc0a6e917c",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2025-01-17T18:35:15.421761Z",
-     "iopub.status.busy": "2025-01-17T18:35:15.421353Z",
-     "iopub.status.idle": "2025-01-17T18:35:15.433782Z",
-     "shell.execute_reply": "2025-01-17T18:35:15.433001Z",
-     "shell.execute_reply.started": "2025-01-17T18:35:15.421734Z"
     }
    },
    "outputs": [],
@@ -63,6 +85,41 @@
     "    print(time_str, x)\n",
     "\n",
     "\n",
     "class BertClassifier(nn.Module, PyTorchModelHubMixin):\n",
     "    def __init__(self, num_labels=8, bert_variety=\"bert-base-uncased\"):\n",
     "        super().__init__()\n",
@@ -98,12 +155,12 @@
     "        return len(self.labels)\n",
     "\n",
     "\n",
-    "def train_model(model, train_dataloader, device, num_epochs):\n",
     "    optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)\n",
     "    criterion = nn.CrossEntropyLoss()\n",
     "    model.train()\n",
     "\n",
-    "    my_print(\"Starting epoch 1.\")\n",
     "    for epoch in range(num_epochs):\n",
     "        total_loss = 0\n",
     "        for batch in train_dataloader:\n",
@@ -121,7 +178,7 @@
     "\n",
     "            total_loss += loss.item()\n",
     "        avg_loss = total_loss / len(train_dataloader)\n",
-    "        my_print(f\"Epoch {epoch+1}/{num_epochs} done, Average Loss: {avg_loss:0.4f}\")"
    ]
   },
   {
@@ -130,11 +187,11 @@
    "id": "07131bce-23ad-4787-8622-cce401f3e5ce",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2025-01-17T18:17:57.885732Z",
-     "iopub.status.busy": "2025-01-17T18:17:57.884455Z",
-     "iopub.status.idle": "2025-01-17T18:17:57.919509Z",
-     "shell.execute_reply": "2025-01-17T18:17:57.919081Z",
-     "shell.execute_reply.started": "2025-01-17T18:17:57.885667Z"
     }
    },
    "outputs": [],
@@ -154,11 +211,11 @@
    "id": "695bc080-bbd7-4937-af5b-50db1c936500",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2025-01-17T18:17:58.556031Z",
-     "iopub.status.busy": "2025-01-17T18:17:58.555349Z",
-     "iopub.status.idle": "2025-01-17T18:17:58.564519Z",
-     "shell.execute_reply": "2025-01-17T18:17:58.563640Z",
-     "shell.execute_reply.started": "2025-01-17T18:17:58.555979Z"
     }
    },
    "outputs": [],
@@ -171,10 +228,19 @@
     "    batch_size=32,\n",
     "):\n",
     "    hf_dataset = load_dataset(\"quotaclimat/frugalaichallenge-text-train\")\n",
     "    if not max_dataset_size == \"full\" and max_dataset_size < len(hf_dataset[\"train\"]):\n",
-    "        train_dataset = hf_dataset[\"train\"][:max_dataset_size]\n",
     "    else:\n",
-    "        train_dataset = hf_dataset[\"train\"]\n",
     "\n",
     "    tokenizer = BertTokenizer.from_pretrained(bert_variety, max_length=max_length)\n",
     "    model = BertClassifier(bert_variety=bert_variety)\n",
@@ -187,29 +253,64 @@
     "        device = torch.device(\"cpu\")\n",
     "    model.to(device)\n",
     "\n",
-    "    dataset = TextDataset(\n",
     "        train_dataset[\"quote\"],\n",
     "        train_dataset[\"label\"],\n",
     "        tokenizer=tokenizer,\n",
     "        max_length=max_length,\n",
     "    )\n",
-    "    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)\n",
     "\n",
-    "    train_model(model, dataloader, device, num_epochs=num_epochs)\n",
     "    return model, tokenizer"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
    "id": "792fd13f-e7cc-4d90-832d-c0da15e193cd",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2025-01-17T15:22:41.286449Z",
-     "iopub.status.busy": "2025-01-17T15:22:41.285811Z",
-     "iopub.status.idle": "2025-01-17T15:24:35.507909Z",
-     "shell.execute_reply": "2025-01-17T15:24:35.506587Z",
-     "shell.execute_reply.started": "2025-01-17T15:22:41.286404Z"
     }
    },
    "outputs": [
@@ -217,16 +318,16 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2025-01-17 07:22:44 Starting epoch 1.\n",
-      "2025-01-17 07:23:21 Epoch 1/3 done, Average Loss: 1.8129\n",
-      "2025-01-17 07:23:58 Epoch 2/3 done, Average Loss: 1.3089\n",
-      "2025-01-17 07:24:35 Epoch 3/3 done, Average Loss: 0.8916\n"
      ]
     }
    ],
    "source": [
     "model, tokenizer = run_training(\n",
-    "    max_dataset_size=16 * 100,\n",
     "    bert_variety=\"bert-base-uncased\",\n",
     "    max_length=128,\n",
     "    num_epochs=3,\n",
@@ -236,15 +337,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
    "id": "0aedfcca-843e-4f4c-8062-3e4625161bcc",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2025-01-17T15:24:46.754460Z",
-     "iopub.status.busy": "2025-01-17T15:24:46.753753Z",
-     "iopub.status.idle": "2025-01-17T15:24:47.249458Z",
-     "shell.execute_reply": "2025-01-17T15:24:47.249207Z",
-     "shell.execute_reply.started": "2025-01-17T15:24:46.754391Z"
     }
    },
    "outputs": [
@@ -252,7 +353,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2025-01-17 07:24:47 Predictions: tensor([0, 1, 3, 6, 2, 3, 6], device='mps:0')\n"
      ]
     }
    ],
@@ -283,38 +384,11 @@
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": 7,
-   "id": "881b738e-2392-4b7e-a0de-a0bad572ddfa",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2025-01-17T04:47:17.334399Z",
-     "iopub.status.busy": "2025-01-17T04:47:17.334287Z",
-     "iopub.status.idle": "2025-01-17T04:50:59.116389Z",
-     "shell.execute_reply": "2025-01-17T04:50:59.115528Z",
-     "shell.execute_reply.started": "2025-01-17T04:47:17.334390Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2025-01-16 20:47:23 Starting epoch 1.\n",
-      "2025-01-16 20:48:35 Epoch 1/3 done, Average Loss: 1.4272\n",
-      "2025-01-16 20:49:46 Epoch 2/3 done, Average Loss: 0.8694\n",
-      "2025-01-16 20:50:59 Epoch 3/3 done, Average Loss: 0.5774\n"
-     ]
-    }
-   ],
    "source": [
-    "model, tokenizer = run_training(\n",
-    "    max_dataset_size=\"full\",\n",
-    "    bert_variety=\"bert-base-uncased\",\n",
-    "    max_length=64,\n",
-    "    num_epochs=3,\n",
-    "    batch_size=32,\n",
-    ")"
    ]
   },
   {
@@ -323,11 +397,11 @@
    "id": "1d29336e-7f88-4127-afdf-2fe043e310e1",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2025-01-17T04:50:59.118025Z",
-     "iopub.status.busy": "2025-01-17T04:50:59.117838Z",
-     "iopub.status.idle": "2025-01-17T04:58:02.423121Z",
-     "shell.execute_reply": "2025-01-17T04:58:02.421532Z",
-     "shell.execute_reply.started": "2025-01-17T04:50:59.118005Z"
     }
    },
    "outputs": [
@@ -335,10 +409,10 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2025-01-16 20:51:04 Starting epoch 1.\n",
-      "2025-01-16 20:53:20 Epoch 1/3 done, Average Loss: 1.4107\n",
-      "2025-01-16 20:55:41 Epoch 2/3 done, Average Loss: 0.8491\n",
-      "2025-01-16 20:58:02 Epoch 3/3 done, Average Loss: 0.5359\n"
      ]
     }
    ],
@@ -358,11 +432,11 @@
    "id": "461b8f57-0c52-403a-bb69-3bc192b323bf",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2025-01-17T04:58:02.426159Z",
-     "iopub.status.busy": "2025-01-17T04:58:02.425896Z",
-     "iopub.status.idle": "2025-01-17T05:05:36.903446Z",
-     "shell.execute_reply": "2025-01-17T05:05:36.901961Z",
-     "shell.execute_reply.started": "2025-01-17T04:58:02.426132Z"
     }
    },
    "outputs": [
@@ -370,10 +444,10 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2025-01-16 20:58:08 Starting epoch 1.\n",
-      "2025-01-16 21:00:38 Epoch 1/3 done, Average Loss: 1.2946\n",
-      "2025-01-16 21:03:07 Epoch 2/3 done, Average Loss: 0.7425\n",
-      "2025-01-16 21:05:36 Epoch 3/3 done, Average Loss: 0.4126\n"
      ]
     }
    ],
@@ -389,15 +463,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
    "id": "28354e8c-886a-4523-8968-8c688c13f6a3",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2025-01-17T18:35:15.434902Z",
-     "iopub.status.busy": "2025-01-17T18:35:15.434668Z",
-     "iopub.status.idle": "2025-01-17T18:50:43.167167Z",
-     "shell.execute_reply": "2025-01-17T18:50:43.166720Z",
-     "shell.execute_reply.started": "2025-01-17T18:35:15.434880Z"
     }
    },
    "outputs": [
@@ -405,10 +479,10 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2025-01-17 10:35:20 Starting epoch 1.\n",
-      "2025-01-17 10:40:29 Epoch 1/3 done, Average Loss: 1.2876\n",
-      "2025-01-17 10:45:37 Epoch 2/3 done, Average Loss: 0.7289\n",
-      "2025-01-17 10:50:43 Epoch 3/3 done, Average Loss: 0.3990\n"
      ]
     }
    ],
@@ -432,50 +506,300 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
-   "id": "ac5f412c-a745-4327-9303-acf4c5b1efcd",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2025-01-17T18:19:11.590514Z",
-     "iopub.status.busy": "2025-01-17T18:19:11.589753Z",
-     "iopub.status.idle": "2025-01-17T18:26:45.645104Z",
-     "shell.execute_reply": "2025-01-17T18:26:45.644631Z",
-     "shell.execute_reply.started": "2025-01-17T18:19:11.590428Z"
-    }
    },
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2025-01-17 10:19:17 Starting epoch 1.\n",
-      "2025-01-17 10:21:47 Epoch 1/3 done, Average Loss: 1.2608\n",
-      "2025-01-17 10:24:16 Epoch 2/3 done, Average Loss: 0.7134\n",
-      "2025-01-17 10:26:45 Epoch 3/3 done, Average Loss: 0.3931\n"
      ]
     }
    ],
    "source": [
-    "model_final, tokenizer_final = run_training(\n",
-    "    max_dataset_size=\"full\",\n",
-    "    bert_variety=\"bert-base-uncased\",\n",
-    "    max_length=128,\n",
-    "    num_epochs=3,\n",
-    "    batch_size=16,\n",
-    ")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
    "id": "e3b099c6-6b98-473b-8797-5032213b9fcb",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2025-01-17T18:26:45.646178Z",
-     "iopub.status.busy": "2025-01-17T18:26:45.646081Z",
-     "iopub.status.idle": "2025-01-17T18:26:45.722052Z",
-     "shell.execute_reply": "2025-01-17T18:26:45.721803Z",
-     "shell.execute_reply.started": "2025-01-17T18:26:45.646168Z"
     }
    },
    "outputs": [
@@ -483,7 +807,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2025-01-17 10:26:45 Predictions: tensor([0, 0, 3, 1, 2, 4, 6], device='mps:0')\n"
      ]
     }
    ],
@@ -515,22 +839,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
    "id": "befb94b5-88bf-40fc-8b26-cf373d1256e0",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2025-01-17T18:32:40.094019Z",
-     "iopub.status.busy": "2025-01-17T18:32:40.093429Z",
-     "iopub.status.idle": "2025-01-17T18:35:15.419578Z",
-     "shell.execute_reply": "2025-01-17T18:35:15.418848Z",
-     "shell.execute_reply.started": "2025-01-17T18:32:40.093970Z"
     }
    },
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "7dd2d0eb08624920b345ca85712f0169",
        "version_major": 2,
        "version_minor": 0
       },
@@ -544,10 +868,10 @@
     {
      "data": {
       "text/plain": [
-       "CommitInfo(commit_url='https://huggingface.co/Nonnormalizable/frugal-ai-text-bert-base/commit/bd94aa1344798fcf671ddd5f8a7bd4f4dc0b20c4', commit_message='Push model using huggingface_hub.', commit_description='', oid='bd94aa1344798fcf671ddd5f8a7bd4f4dc0b20c4', pr_url=None, repo_url=RepoUrl('https://huggingface.co/Nonnormalizable/frugal-ai-text-bert-base', endpoint='https://huggingface.co', repo_type='model', repo_id='Nonnormalizable/frugal-ai-text-bert-base'), pr_revision=None, pr_num=None)"
       ]
      },
-     "execution_count": 10,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -558,51 +882,66 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
    "id": "251ef9ee-8ba3-495f-8fe6-a93aa63168ce",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2025-01-17T18:31:37.682978Z",
-     "iopub.status.busy": "2025-01-17T18:31:37.682009Z",
-     "iopub.status.idle": "2025-01-17T18:31:39.578706Z",
-     "shell.execute_reply": "2025-01-17T18:31:39.577664Z",
-     "shell.execute_reply.started": "2025-01-17T18:31:37.682910Z"
     }
    },
    "outputs": [
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "b62ae26d30534f8fa6057824124e9c95",
-       "version_major": 2,
-       "version_minor": 0
-      },
       "text/plain": [
-       "README.md:   0%|          | 0.00/320 [00:00<?, ?B/s]"
       ]
      },
      "metadata": {},
-     "output_type": "display_data"
-    },
     {
      "data": {
       "text/plain": [
-       "CommitInfo(commit_url='https://huggingface.co/Nonnormalizable/frugal-ai-text-bert-base/commit/9814436ad5f77cd8c607aa5dba9b67e7983e8ca7', commit_message='Upload tokenizer', commit_description='', oid='9814436ad5f77cd8c607aa5dba9b67e7983e8ca7', pr_url=None, repo_url=RepoUrl('https://huggingface.co/Nonnormalizable/frugal-ai-text-bert-base', endpoint='https://huggingface.co', repo_type='model', repo_id='Nonnormalizable/frugal-ai-text-bert-base'), pr_revision=None, pr_num=None)"
       ]
      },
-     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "tokenizer_final.push_to_hub(\"frugal-ai-text-bert-base\")"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "863d3553-89a6-4188-a8d0-eaa0b6bccb6c",
    "metadata": {},
    "outputs": [],
    "source": []
@@ -629,71 +968,41 @@
   "widgets": {
    "application/vnd.jupyter.widget-state+json": {
     "state": {
-     "25776d7aede3476da6f33fc15fe300c8": {
-      "model_module": "@jupyter-widgets/controls",
-      "model_module_version": "2.0.0",
-      "model_name": "ProgressStyleModel",
-      "state": {
-       "description_width": ""
-      }
-     },
-     "3a03347251c644bd9b5f58bac49ba2b7": {
       "model_module": "@jupyter-widgets/base",
       "model_module_version": "2.0.0",
       "model_name": "LayoutModel",
       "state": {}
      },
-     "3f7dd449d7f84420a836adb899c3b374": {
-      "model_module": "@jupyter-widgets/controls",
-      "model_module_version": "2.0.0",
-      "model_name": "HTMLStyleModel",
-      "state": {
-       "description_width": "",
-       "font_size": null,
-       "text_color": null
-      }
-     },
-     "47f3b8da36704934acf81f357a9da6c3": {
-      "model_module": "@jupyter-widgets/controls",
-      "model_module_version": "2.0.0",
-      "model_name": "FloatProgressModel",
-      "state": {
-       "bar_style": "success",
-       "layout": "IPY_MODEL_ae0e1835546645cd85915a133bd0b578",
-       "max": 437977072,
-       "style": "IPY_MODEL_25776d7aede3476da6f33fc15fe300c8",
-       "value": 437977072
-      }
-     },
-     "4eff913c8c554820b957c2192d04a8cd": {
-      "model_module": "@jupyter-widgets/controls",
-      "model_module_version": "2.0.0",
-      "model_name": "HTMLModel",
-      "state": {
-       "layout": "IPY_MODEL_54b8a0d455794f8881e6d9ceddcac787",
-       "style": "IPY_MODEL_3f7dd449d7f84420a836adb899c3b374",
-       "value": " 438M/438M [02:32&lt;00:00, 3.02MB/s]"
-      }
-     },
-     "54b8a0d455794f8881e6d9ceddcac787": {
       "model_module": "@jupyter-widgets/base",
       "model_module_version": "2.0.0",
       "model_name": "LayoutModel",
       "state": {}
      },
-     "5c96c3617819467d9fb70aa3b716106e": {
       "model_module": "@jupyter-widgets/base",
       "model_module_version": "2.0.0",
       "model_name": "LayoutModel",
       "state": {}
      },
-     "62f9a837c04142b5a2fd66097be6fb6e": {
       "model_module": "@jupyter-widgets/base",
       "model_module_version": "2.0.0",
       "model_name": "LayoutModel",
       "state": {}
      },
-     "68c0e93ffde14a40b3599dff15512174": {
       "model_module": "@jupyter-widgets/controls",
       "model_module_version": "2.0.0",
       "model_name": "HTMLStyleModel",
@@ -703,32 +1012,17 @@
        "text_color": null
       }
      },
-     "6f679b19e9824e1cac8545d7244ec83a": {
-      "model_module": "@jupyter-widgets/controls",
-      "model_module_version": "2.0.0",
-      "model_name": "FloatProgressModel",
-      "state": {
-       "bar_style": "success",
-       "layout": "IPY_MODEL_9785d5bb51544986b4c51b63a39d46cf",
-       "max": 320,
-       "style": "IPY_MODEL_88bc5db626a242af8879201d263d9eef",
-       "value": 320
-      }
-     },
-     "7dd2d0eb08624920b345ca85712f0169": {
       "model_module": "@jupyter-widgets/controls",
       "model_module_version": "2.0.0",
-      "model_name": "HBoxModel",
       "state": {
-       "children": [
-        "IPY_MODEL_bdca6adbcf2347729287c1d2dc44fa2e",
-        "IPY_MODEL_47f3b8da36704934acf81f357a9da6c3",
-        "IPY_MODEL_4eff913c8c554820b957c2192d04a8cd"
-       ],
-       "layout": "IPY_MODEL_3a03347251c644bd9b5f58bac49ba2b7"
       }
      },
-     "88bc5db626a242af8879201d263d9eef": {
       "model_module": "@jupyter-widgets/controls",
       "model_module_version": "2.0.0",
       "model_name": "ProgressStyleModel",
@@ -736,58 +1030,7 @@
        "description_width": ""
       }
      },
-     "9396575ac43b4832bb12e246801a2316": {
-      "model_module": "@jupyter-widgets/controls",
-      "model_module_version": "2.0.0",
-      "model_name": "HTMLModel",
-      "state": {
-       "layout": "IPY_MODEL_c16752a4cf734193accaae9835d55aab",
-       "style": "IPY_MODEL_c1b70a1ce9d149cf87169838a18f2e58",
-       "value": "README.md: 100%"
-      }
-     },
-     "9785d5bb51544986b4c51b63a39d46cf": {
-      "model_module": "@jupyter-widgets/base",
-      "model_module_version": "2.0.0",
-      "model_name": "LayoutModel",
-      "state": {}
-     },
-     "ae0e1835546645cd85915a133bd0b578": {
-      "model_module": "@jupyter-widgets/base",
-      "model_module_version": "2.0.0",
-      "model_name": "LayoutModel",
-      "state": {}
-     },
-     "b62ae26d30534f8fa6057824124e9c95": {
-      "model_module": "@jupyter-widgets/controls",
-      "model_module_version": "2.0.0",
-      "model_name": "HBoxModel",
-      "state": {
-       "children": [
-        "IPY_MODEL_9396575ac43b4832bb12e246801a2316",
-        "IPY_MODEL_6f679b19e9824e1cac8545d7244ec83a",
-        "IPY_MODEL_ce85ada4df3c41e9a9b35b7401cd1883"
-       ],
-       "layout": "IPY_MODEL_62f9a837c04142b5a2fd66097be6fb6e"
-      }
-     },
-     "bdca6adbcf2347729287c1d2dc44fa2e": {
-      "model_module": "@jupyter-widgets/controls",
-      "model_module_version": "2.0.0",
-      "model_name": "HTMLModel",
-      "state": {
-       "layout": "IPY_MODEL_5c96c3617819467d9fb70aa3b716106e",
-       "style": "IPY_MODEL_c18dc3ed330d4d97a0c9d7dba32a9217",
-       "value": "model.safetensors: 100%"
-      }
-     },
-     "c16752a4cf734193accaae9835d55aab": {
-      "model_module": "@jupyter-widgets/base",
-      "model_module_version": "2.0.0",
-      "model_name": "LayoutModel",
-      "state": {}
-     },
-     "c18dc3ed330d4d97a0c9d7dba32a9217": {
       "model_module": "@jupyter-widgets/controls",
       "model_module_version": "2.0.0",
       "model_name": "HTMLStyleModel",
@@ -797,31 +1040,30 @@
        "text_color": null
       }
      },
-     "c1b70a1ce9d149cf87169838a18f2e58": {
       "model_module": "@jupyter-widgets/controls",
       "model_module_version": "2.0.0",
-      "model_name": "HTMLStyleModel",
       "state": {
-       "description_width": "",
-       "font_size": null,
-       "text_color": null
       }
      },
-     "ce85ada4df3c41e9a9b35b7401cd1883": {
       "model_module": "@jupyter-widgets/controls",
       "model_module_version": "2.0.0",
-      "model_name": "HTMLModel",
       "state": {
-       "layout": "IPY_MODEL_dae692ab00184ab190368530f21dcad9",
-       "style": "IPY_MODEL_68c0e93ffde14a40b3599dff15512174",
-       "value": " 320/320 [00:00&lt;00:00, 21.4kB/s]"
       }
-     },
-     "dae692ab00184ab190368530f21dcad9": {
-      "model_module": "@jupyter-widgets/base",
-      "model_module_version": "2.0.0",
-      "model_name": "LayoutModel",
-      "state": {}
      }
     },
     "version_major": 2,

 {
  "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "33faae25-af36-4781-bf8f-2084ddc96a52",
+   "metadata": {},
+   "source": [
+    "# Setup"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 1,
    "id": "73e72549-69f2-46b5-b0f5-655777139972",
    "metadata": {
     "execution": {
+     "iopub.execute_input": "2025-01-20T20:17:03.803583Z",
+     "iopub.status.busy": "2025-01-20T20:17:03.803051Z",
+     "iopub.status.idle": "2025-01-20T20:17:06.786959Z",
+     "shell.execute_reply": "2025-01-20T20:17:06.786718Z",
+     "shell.execute_reply.started": "2025-01-20T20:17:03.803542Z"
     }
    },
    "outputs": [],
     "import torch\n",
     "from torch import nn\n",
     "from transformers import BertTokenizer, BertModel\n",
+    "from huggingface_hub import (\n",
+    "    PyTorchModelHubMixin,\n",
+    "    notebook_login,\n",
+    "    ModelCard,\n",
+    "    ModelCardData,\n",
+    "    EvalResult,\n",
+    ")\n",
+    "from datasets import DatasetDict, load_dataset\n",
+    "from torch.utils.data import Dataset, DataLoader"
    ]
   },
   {
    "id": "07e0787e-c72b-41f3-baba-43cef3f8d6f8",
    "metadata": {
     "execution": {
+     "iopub.execute_input": "2025-01-20T20:17:06.787691Z",
+     "iopub.status.busy": "2025-01-20T20:17:06.787547Z",
+     "iopub.status.idle": "2025-01-20T20:17:06.789420Z",
+     "shell.execute_reply": "2025-01-20T20:17:06.789211Z",
+     "shell.execute_reply.started": "2025-01-20T20:17:06.787682Z"
     }
    },
    "outputs": [],
     "notebook_login(new_session=False)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "a919d72c-8d10-4275-a2ca-4ead295f41a8",
+   "metadata": {},
+   "source": [
+    "# Functions"
+   ]
+  },
   {
    "cell_type": "code",
+   "execution_count": 3,
    "id": "d4b79fb9-5e70-4600-8885-94bc0a6e917c",
    "metadata": {
     "execution": {
+     "iopub.execute_input": "2025-01-20T20:17:06.789829Z",
+     "iopub.status.busy": "2025-01-20T20:17:06.789761Z",
+     "iopub.status.idle": "2025-01-20T20:17:06.794443Z",
+     "shell.execute_reply": "2025-01-20T20:17:06.794260Z",
+     "shell.execute_reply.started": "2025-01-20T20:17:06.789822Z"
     }
    },
    "outputs": [],
     "    print(time_str, x)\n",
     "\n",
     "\n",
+    "def model_metrics(model, dataloader):\n",
+    "    criterion = nn.CrossEntropyLoss()\n",
+    "    model.eval()\n",
+    "    with torch.no_grad():\n",
+    "        total_loss = 0\n",
+    "        total_correct = 0\n",
+    "        total_length = 0\n",
+    "        for batch in dataloader:\n",
+    "            input_ids = batch[\"input_ids\"].to(device)\n",
+    "            attention_mask = batch[\"attention_mask\"].to(device)\n",
+    "            labels = batch[\"labels\"].to(device)\n",
+    "\n",
+    "            outputs = model(input_ids, attention_mask)\n",
+    "            loss = criterion(outputs, labels)\n",
+    "            predictions_cpu = torch.argmax(outputs, dim=1).cpu().numpy()\n",
+    "            labels_cpu = labels.cpu().numpy()\n",
+    "            correct_count = (predictions_cpu == labels_cpu).sum()\n",
+    "\n",
+    "            total_loss += loss.item()\n",
+    "            total_correct += correct_count\n",
+    "            total_length += len(labels_cpu)\n",
+    "        avg_loss = total_loss / len(dataloader)\n",
+    "        avg_acc = total_correct / total_length\n",
+    "    model.train()\n",
+    "    return avg_loss, avg_acc\n",
+    "\n",
+    "\n",
+    "def print_model_status(epoch, num_epochs, model, train_dataloader, test_dataloader):\n",
+    "    train_loss, train_acc = model_metrics(model, train_dataloader)\n",
+    "    test_loss, test_acc = model_metrics(model, test_dataloader)\n",
+    "    loss_str = f\"Loss: Train {train_loss:0.3f}, Test {test_loss:0.3f}\"\n",
+    "    acc_str = f\"Acc: Train {train_acc:0.3f}, Test {test_acc:0.3f}\"\n",
+    "    my_print(f\"Epoch {epoch+1}/{num_epochs} done. {loss_str}; and {acc_str}\")\n",
+    "\n",
+    "\n",
     "class BertClassifier(nn.Module, PyTorchModelHubMixin):\n",
     "    def __init__(self, num_labels=8, bert_variety=\"bert-base-uncased\"):\n",
     "        super().__init__()\n",
     "        return len(self.labels)\n",
     "\n",
     "\n",
+    "def train_model(model, train_dataloader, test_dataloader, device, num_epochs):\n",
     "    optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)\n",
     "    criterion = nn.CrossEntropyLoss()\n",
     "    model.train()\n",
     "\n",
+    "    print_model_status(-1, num_epochs, model, train_dataloader, test_dataloader)\n",
     "    for epoch in range(num_epochs):\n",
     "        total_loss = 0\n",
     "        for batch in train_dataloader:\n",
     "\n",
     "            total_loss += loss.item()\n",
     "        avg_loss = total_loss / len(train_dataloader)\n",
+    "        print_model_status(epoch, num_epochs, model, train_dataloader, test_dataloader)"
    ]
   },
   {
    "id": "07131bce-23ad-4787-8622-cce401f3e5ce",
    "metadata": {
     "execution": {
+     "iopub.execute_input": "2025-01-20T20:17:06.795335Z",
+     "iopub.status.busy": "2025-01-20T20:17:06.795239Z",
+     "iopub.status.idle": "2025-01-20T20:17:06.821293Z",
+     "shell.execute_reply": "2025-01-20T20:17:06.821061Z",
+     "shell.execute_reply.started": "2025-01-20T20:17:06.795328Z"
     }
    },
    "outputs": [],
    "id": "695bc080-bbd7-4937-af5b-50db1c936500",
    "metadata": {
     "execution": {
+     "iopub.execute_input": "2025-01-20T20:17:06.821637Z",
+     "iopub.status.busy": "2025-01-20T20:17:06.821569Z",
+     "iopub.status.idle": "2025-01-20T20:17:06.824265Z",
+     "shell.execute_reply": "2025-01-20T20:17:06.824082Z",
+     "shell.execute_reply.started": "2025-01-20T20:17:06.821630Z"
     }
    },
    "outputs": [],
     "    batch_size=32,\n",
     "):\n",
     "    hf_dataset = load_dataset(\"quotaclimat/frugalaichallenge-text-train\")\n",
+    "    test_size = 0.2\n",
+    "    test_seed = 42\n",
+    "    train_test = hf_dataset[\"train\"].train_test_split(\n",
+    "        test_size=test_size, seed=test_seed\n",
+    "    )\n",
+    "    train_dataset = train_test[\"train\"]\n",
+    "    test_dataset = train_test[\"test\"]\n",
     "    if not max_dataset_size == \"full\" and max_dataset_size < len(hf_dataset[\"train\"]):\n",
+    "        train_dataset = train_dataset[:max_dataset_size]\n",
+    "        test_dataset = test_dataset[:max_dataset_size]\n",
     "    else:\n",
+    "        train_dataset = train_dataset\n",
+    "        test_dataset = test_dataset\n",
     "\n",
     "    tokenizer = BertTokenizer.from_pretrained(bert_variety, max_length=max_length)\n",
     "    model = BertClassifier(bert_variety=bert_variety)\n",
     "        device = torch.device(\"cpu\")\n",
     "    model.to(device)\n",
     "\n",
+    "    text_dataset_train = TextDataset(\n",
     "        train_dataset[\"quote\"],\n",
     "        train_dataset[\"label\"],\n",
     "        tokenizer=tokenizer,\n",
     "        max_length=max_length,\n",
     "    )\n",
+    "    text_dataset_test = TextDataset(\n",
+    "        test_dataset[\"quote\"],\n",
+    "        test_dataset[\"label\"],\n",
+    "        tokenizer=tokenizer,\n",
+    "        max_length=max_length,\n",
+    "    )\n",
+    "    dataloader_train = DataLoader(\n",
+    "        text_dataset_train, batch_size=batch_size, shuffle=True\n",
+    "    )\n",
+    "    dataloader_test = DataLoader(\n",
+    "        text_dataset_test, batch_size=batch_size, shuffle=False\n",
+    "    )\n",
     "\n",
+    "    train_model(model, dataloader_train, dataloader_test, device, num_epochs=num_epochs)\n",
     "    return model, tokenizer"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "5af751f3-1fc4-4540-ae25-638db9d33c67",
+   "metadata": {},
+   "source": [
+    "# Exploration"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a847135f-ce86-46a1-9c61-3459a847cb29",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-01-20T19:13:05.482383Z",
+     "iopub.status.busy": "2025-01-20T19:13:05.481449Z",
+     "iopub.status.idle": "2025-01-20T19:13:05.487546Z",
+     "shell.execute_reply": "2025-01-20T19:13:05.486557Z",
+     "shell.execute_reply.started": "2025-01-20T19:13:05.482339Z"
+    }
+   },
+   "source": [
+    "## Check if runs"
+   ]
+  },
   {
    "cell_type": "code",
+   "execution_count": 6,
    "id": "792fd13f-e7cc-4d90-832d-c0da15e193cd",
    "metadata": {
     "execution": {
+     "iopub.execute_input": "2025-01-20T20:17:06.824513Z",
+     "iopub.status.busy": "2025-01-20T20:17:06.824457Z",
+     "iopub.status.idle": "2025-01-20T20:17:14.130284Z",
+     "shell.execute_reply": "2025-01-20T20:17:14.129964Z",
+     "shell.execute_reply.started": "2025-01-20T20:17:06.824506Z"
     }
    },
    "outputs": [
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "2025-01-20 12:17:10 Epoch 0/3 done. Loss: Train 2.111, Test 2.247; and Acc: Train 0.281, Test 0.156\n",
+      "2025-01-20 12:17:11 Epoch 1/3 done. Loss: Train 2.026, Test 2.222; and Acc: Train 0.344, Test 0.156\n",
+      "2025-01-20 12:17:12 Epoch 2/3 done. Loss: Train 1.943, Test 2.194; and Acc: Train 0.312, Test 0.156\n",
+      "2025-01-20 12:17:14 Epoch 3/3 done. Loss: Train 1.859, Test 2.159; and Acc: Train 0.344, Test 0.156\n"
      ]
     }
    ],
    "source": [
     "model, tokenizer = run_training(\n",
+    "    max_dataset_size=16 * 2,\n",
     "    bert_variety=\"bert-base-uncased\",\n",
     "    max_length=128,\n",
     "    num_epochs=3,\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 7,
    "id": "0aedfcca-843e-4f4c-8062-3e4625161bcc",
    "metadata": {
     "execution": {
+     "iopub.execute_input": "2025-01-20T20:17:14.130879Z",
+     "iopub.status.busy": "2025-01-20T20:17:14.130792Z",
+     "iopub.status.idle": "2025-01-20T20:17:14.193695Z",
+     "shell.execute_reply": "2025-01-20T20:17:14.193466Z",
+     "shell.execute_reply.started": "2025-01-20T20:17:14.130869Z"
     }
    },
    "outputs": [
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "2025-01-20 12:17:14 Predictions: tensor([4, 1, 1, 1, 3, 1, 1], device='mps:0')\n"
      ]
     }
    ],
    ]
   },
   {
+   "cell_type": "markdown",
+   "id": "0c3ea938-dd87-4673-b1d6-f06c70b19455",
+   "metadata": {},
    "source": [
+    "## Hyperparameters"
    ]
   },
   {
    "id": "1d29336e-7f88-4127-afdf-2fe043e310e1",
    "metadata": {
     "execution": {
+     "iopub.execute_input": "2025-01-20T20:17:14.194160Z",
+     "iopub.status.busy": "2025-01-20T20:17:14.194076Z",
+     "iopub.status.idle": "2025-01-20T20:25:46.660251Z",
+     "shell.execute_reply": "2025-01-20T20:25:46.659652Z",
+     "shell.execute_reply.started": "2025-01-20T20:17:14.194152Z"
     }
    },
    "outputs": [
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "2025-01-20 12:18:02 Epoch 0/3 done. Loss: Train 2.106, Test 2.091; and Acc: Train 0.118, Test 0.135\n",
+      "2025-01-20 12:20:37 Epoch 1/3 done. Loss: Train 0.989, Test 1.114; and Acc: Train 0.647, Test 0.603\n",
+      "2025-01-20 12:23:12 Epoch 2/3 done. Loss: Train 0.584, Test 0.928; and Acc: Train 0.825, Test 0.669\n",
+      "2025-01-20 12:25:46 Epoch 3/3 done. Loss: Train 0.313, Test 0.950; and Acc: Train 0.913, Test 0.683\n"
      ]
     }
    ],
    "id": "461b8f57-0c52-403a-bb69-3bc192b323bf",
    "metadata": {
     "execution": {
+     "iopub.execute_input": "2025-01-20T20:25:46.661264Z",
+     "iopub.status.busy": "2025-01-20T20:25:46.661132Z",
+     "iopub.status.idle": "2025-01-20T20:34:54.221239Z",
+     "shell.execute_reply": "2025-01-20T20:34:54.220590Z",
+     "shell.execute_reply.started": "2025-01-20T20:25:46.661249Z"
     }
    },
    "outputs": [
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "2025-01-20 12:26:34 Epoch 0/3 done. Loss: Train 2.174, Test 2.168; and Acc: Train 0.096, Test 0.094\n",
+      "2025-01-20 12:29:21 Epoch 1/3 done. Loss: Train 0.878, Test 1.033; and Acc: Train 0.712, Test 0.653\n",
+      "2025-01-20 12:32:07 Epoch 2/3 done. Loss: Train 0.458, Test 0.906; and Acc: Train 0.869, Test 0.678\n",
+      "2025-01-20 12:34:54 Epoch 3/3 done. Loss: Train 0.218, Test 0.959; and Acc: Train 0.944, Test 0.695\n"
      ]
     }
    ],
   },
   {
    "cell_type": "code",
+   "execution_count": 10,
    "id": "28354e8c-886a-4523-8968-8c688c13f6a3",
    "metadata": {
     "execution": {
+     "iopub.execute_input": "2025-01-20T20:34:54.224989Z",
+     "iopub.status.busy": "2025-01-20T20:34:54.224772Z",
+     "iopub.status.idle": "2025-01-20T20:54:07.531338Z",
+     "shell.execute_reply": "2025-01-20T20:54:07.530559Z",
+     "shell.execute_reply.started": "2025-01-20T20:34:54.224968Z"
     }
    },
    "outputs": [
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "2025-01-20 12:36:37 Epoch 0/3 done. Loss: Train 2.122, Test 2.127; and Acc: Train 0.122, Test 0.118\n",
+      "2025-01-20 12:42:26 Epoch 1/3 done. Loss: Train 0.779, Test 0.978; and Acc: Train 0.748, Test 0.652\n",
+      "2025-01-20 12:48:16 Epoch 2/3 done. Loss: Train 0.391, Test 0.884; and Acc: Train 0.897, Test 0.696\n",
+      "2025-01-20 12:54:07 Epoch 3/3 done. Loss: Train 0.154, Test 0.978; and Acc: Train 0.959, Test 0.705\n"
      ]
     }
    ],
   },
   {
    "cell_type": "code",
+   "execution_count": 14,
+   "id": "ec2516f9-79f2-4ae1-ab9a-9a51a7a50587",
    "metadata": {
     "execution": {
+     "iopub.execute_input": "2025-01-20T22:10:34.055595Z",
+     "iopub.status.busy": "2025-01-20T22:10:34.054690Z",
+     "iopub.status.idle": "2025-01-20T22:10:34.083784Z",
+     "shell.execute_reply": "2025-01-20T22:10:34.083448Z",
+     "shell.execute_reply.started": "2025-01-20T22:10:34.055529Z"
+    },
+    "scrolled": true
    },
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "---\n",
+      "base_model: google-bert/bert-base-uncased\n",
+      "datasets:\n",
+      "- QuotaClimat/frugalaichallenge-text-train\n",
+      "language:\n",
+      "- en\n",
+      "license: apache-2.0\n",
+      "model_name: frugal-ai-text-bert-base\n",
+      "pipeline_tag: text-classification\n",
+      "tags:\n",
+      "- model_hub_mixin\n",
+      "- pytorch_model_hub_mixin\n",
+      "- climate\n",
+      "---\n",
+      "\n",
+      "# Model Card for Model ID\n",
+      "\n",
+      "<!-- Provide a quick summary of what the model is/does. -->\n",
+      "\n",
+      "Classify text into 8 categories of climate misinformation.\n",
+      "\n",
+      "## Model Details\n",
+      "\n",
+      "### Model Description\n",
+      "\n",
+      "<!-- Provide a longer summary of what this model is. -->\n",
+      "\n",
+      "Fine trained BERT for classifying climate information as part of the Frugal AI Challenge, for submission to https://huggingface.co/frugal-ai-challenge and scoring on accuracy and efficiency. Trainied on only the non-evaluation 80% of the data, so it's (non-cheating) score will be lower.\n",
+      "\n",
+      "- **Developed by:** Andre Bach\n",
+      "- **Funded by [optional]:** N/A\n",
+      "- **Shared by [optional]:** Andre Bach\n",
+      "- **Model type:** Text classification\n",
+      "- **Language(s) (NLP):** ['en']\n",
+      "- **License:** apache-2.0\n",
+      "- **Finetuned from model [optional]:** google-bert/bert-base-uncased\n",
+      "\n",
+      "### Model Sources [optional]\n",
+      "\n",
+      "<!-- Provide the basic links for the model. -->\n",
+      "\n",
+      "- **Repository:** frugal-ai-text-bert-base\n",
+      "- **Paper [optional]:** [More Information Needed]\n",
+      "- **Demo [optional]:** [More Information Needed]\n",
+      "\n",
+      "## Uses\n",
+      "\n",
+      "<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->\n",
+      "\n",
+      "### Direct Use\n",
+      "\n",
+      "<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->\n",
+      "\n",
+      "[More Information Needed]\n",
+      "\n",
+      "### Downstream Use [optional]\n",
+      "\n",
+      "<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->\n",
+      "\n",
+      "[More Information Needed]\n",
+      "\n",
+      "### Out-of-Scope Use\n",
+      "\n",
+      "<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->\n",
+      "\n",
+      "[More Information Needed]\n",
+      "\n",
+      "## Bias, Risks, and Limitations\n",
+      "\n",
+      "<!-- This section is meant to convey both technical and sociotechnical limitations. -->\n",
+      "\n",
+      "[More Information Needed]\n",
+      "\n",
+      "### Recommendations\n",
+      "\n",
+      "<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->\n",
+      "\n",
+      "Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.\n",
+      "\n",
+      "## How to Get Started with the Model\n",
+      "\n",
+      "Use the code below to get started with the model.\n",
+      "\n",
+      "[More Information Needed]\n",
+      "\n",
+      "## Training Details\n",
+      "\n",
+      "### Training Data\n",
+      "\n",
+      "<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->\n",
+      "\n",
+      "[More Information Needed]\n",
+      "\n",
+      "### Training Procedure\n",
+      "\n",
+      "<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->\n",
+      "\n",
+      "#### Preprocessing [optional]\n",
+      "\n",
+      "[More Information Needed]\n",
+      "\n",
+      "\n",
+      "#### Training Hyperparameters\n",
+      "\n",
+      "- **Training regime:** {'max_dataset_size': 'full', 'bert_variety': 'bert-base-uncased', 'max_length': 256, 'num_epochs': 3, 'batch_size': 16} <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->\n",
+      "\n",
+      "#### Speeds, Sizes, Times [optional]\n",
+      "\n",
+      "<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->\n",
+      "\n",
+      "[More Information Needed]\n",
+      "\n",
+      "## Evaluation\n",
+      "\n",
+      "<!-- This section describes the evaluation protocols and provides the results. -->\n",
+      "\n",
+      "### Testing Data, Factors & Metrics\n",
+      "\n",
+      "#### Testing Data\n",
+      "\n",
+      "<!-- This should link to a Dataset Card if possible. -->\n",
+      "\n",
+      "[More Information Needed]\n",
+      "\n",
+      "#### Factors\n",
+      "\n",
+      "<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->\n",
+      "\n",
+      "[More Information Needed]\n",
+      "\n",
+      "#### Metrics\n",
+      "\n",
+      "<!-- These are the evaluation metrics being used, ideally with a description of why. -->\n",
+      "\n",
+      "{'loss_train': 0.154, 'loss_test': 0.978, 'acc_train': 0.959, 'acc_test': 0.705}\n",
+      "\n",
+      "### Results\n",
+      "\n",
+      "[More Information Needed]\n",
+      "\n",
+      "#### Summary\n",
+      "\n",
+      "\n",
+      "\n",
+      "## Model Examination [optional]\n",
+      "\n",
+      "<!-- Relevant interpretability work for the model goes here -->\n",
+      "\n",
+      "[More Information Needed]\n",
+      "\n",
+      "## Environmental Impact\n",
+      "\n",
+      "<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->\n",
+      "\n",
+      "Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).\n",
+      "\n",
+      "- **Hardware Type:** [More Information Needed]\n",
+      "- **Hours used:** [More Information Needed]\n",
+      "- **Cloud Provider:** [More Information Needed]\n",
+      "- **Compute Region:** [More Information Needed]\n",
+      "- **Carbon Emitted:** [More Information Needed]\n",
+      "\n",
+      "## Technical Specifications [optional]\n",
+      "\n",
+      "### Model Architecture and Objective\n",
+      "\n",
+      "[More Information Needed]\n",
+      "\n",
+      "### Compute Infrastructure\n",
+      "\n",
+      "[More Information Needed]\n",
+      "\n",
+      "#### Hardware\n",
+      "\n",
+      "[More Information Needed]\n",
+      "\n",
+      "#### Software\n",
+      "\n",
+      "[More Information Needed]\n",
+      "\n",
+      "## Citation [optional]\n",
+      "\n",
+      "<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->\n",
+      "\n",
+      "**BibTeX:**\n",
+      "\n",
+      "[More Information Needed]\n",
+      "\n",
+      "**APA:**\n",
+      "\n",
+      "[More Information Needed]\n",
+      "\n",
+      "## Glossary [optional]\n",
+      "\n",
+      "<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->\n",
+      "\n",
+      "[More Information Needed]\n",
+      "\n",
+      "## More Information [optional]\n",
+      "\n",
+      "[More Information Needed]\n",
+      "\n",
+      "## Model Card Authors [optional]\n",
+      "\n",
+      "[More Information Needed]\n",
+      "\n",
+      "## Model Card Contact\n",
+      "\n",
+      "[More Information Needed]\n"
      ]
     }
    ],
    "source": [
+    "model_and_repo_name = \"frugal-ai-text-bert-base\"\n",
+    "card_data = ModelCardData(\n",
+    "    model_name=model_and_repo_name,\n",
+    "    base_model=\"google-bert/bert-base-uncased\",\n",
+    "    license=\"apache-2.0\",\n",
+    "    language=[\"en\"],\n",
+    "    datasets=[\"QuotaClimat/frugalaichallenge-text-train\"],\n",
+    "    tags=[\"model_hub_mixin\", \"pytorch_model_hub_mixin\", \"climate\"],\n",
+    "    pipeline_tag=\"text-classification\",\n",
+    ")\n",
+    "card = ModelCard.from_template(\n",
+    "    card_data,\n",
+    "    model_summary=\"Classify text into 8 categories of climate misinformation.\",\n",
+    "    model_description=\"Fine trained BERT for classifying climate information as part of the Frugal AI Challenge, for submission to https://huggingface.co/frugal-ai-challenge and scoring on accuracy and efficiency. Trainied on only the non-evaluation 80% of the data, so it's (non-cheating) score will be lower.\",\n",
+    "    developers=\"Andre Bach\",\n",
+    "    funded_by=\"N/A\",\n",
+    "    shared_by=\"Andre Bach\",\n",
+    "    model_type=\"Text classification\",\n",
+    "    repo=model_and_repo_name,\n",
+    "    training_regime=dict(\n",
+    "        max_dataset_size=\"full\",\n",
+    "        bert_variety=\"bert-base-uncased\",\n",
+    "        max_length=256,\n",
+    "        num_epochs=3,\n",
+    "        batch_size=16,\n",
+    "    ),\n",
+    "    testing_metrics=dict(\n",
+    "        loss_train=0.154, loss_test=0.978, acc_train=0.959, acc_test=0.705\n",
+    "    ),\n",
+    ")\n",
+    "# print(card_data.to_yaml())\n",
+    "print(card)"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 17,
+   "id": "29d3bbf9-ab2a-48e2-a550-e16da5025720",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-01-20T22:11:59.827681Z",
+     "iopub.status.busy": "2025-01-20T22:11:59.827001Z",
+     "iopub.status.idle": "2025-01-20T22:11:59.831852Z",
+     "shell.execute_reply": "2025-01-20T22:11:59.831047Z",
+     "shell.execute_reply.started": "2025-01-20T22:11:59.827635Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "model_final = model\n",
+    "tokenizer_final = tokenizer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
    "id": "e3b099c6-6b98-473b-8797-5032213b9fcb",
    "metadata": {
     "execution": {
+     "iopub.execute_input": "2025-01-20T22:12:00.576369Z",
+     "iopub.status.busy": "2025-01-20T22:12:00.575421Z",
+     "iopub.status.idle": "2025-01-20T22:12:01.065512Z",
+     "shell.execute_reply": "2025-01-20T22:12:01.065237Z",
+     "shell.execute_reply.started": "2025-01-20T22:12:00.576294Z"
     }
    },
    "outputs": [
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "2025-01-20 14:12:01 Predictions: tensor([0, 0, 3, 6, 2, 4, 6], device='mps:0')\n"
      ]
     }
    ],
   },
   {
    "cell_type": "code",
+   "execution_count": 19,
    "id": "befb94b5-88bf-40fc-8b26-cf373d1256e0",
    "metadata": {
     "execution": {
+     "iopub.execute_input": "2025-01-20T22:12:15.099356Z",
+     "iopub.status.busy": "2025-01-20T22:12:15.098818Z",
+     "iopub.status.idle": "2025-01-20T22:12:33.175760Z",
+     "shell.execute_reply": "2025-01-20T22:12:33.174719Z",
+     "shell.execute_reply.started": "2025-01-20T22:12:15.099315Z"
     }
    },
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "fbc09ae2c5614831a2fb02fa48a44fd1",
        "version_major": 2,
        "version_minor": 0
       },
     {
      "data": {
       "text/plain": [
+       "CommitInfo(commit_url='https://huggingface.co/Nonnormalizable/frugal-ai-text-bert-base/commit/bdc2daf80d9647566ef56297f2cdc32f898170df', commit_message='Push model using huggingface_hub.', commit_description='', oid='bdc2daf80d9647566ef56297f2cdc32f898170df', pr_url=None, repo_url=RepoUrl('https://huggingface.co/Nonnormalizable/frugal-ai-text-bert-base', endpoint='https://huggingface.co', repo_type='model', repo_id='Nonnormalizable/frugal-ai-text-bert-base'), pr_revision=None, pr_num=None)"
       ]
      },
+     "execution_count": 19,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
+   "execution_count": 20,
    "id": "251ef9ee-8ba3-495f-8fe6-a93aa63168ce",
    "metadata": {
     "execution": {
+     "iopub.execute_input": "2025-01-20T22:12:33.178424Z",
+     "iopub.status.busy": "2025-01-20T22:12:33.178028Z",
+     "iopub.status.idle": "2025-01-20T22:12:34.321979Z",
+     "shell.execute_reply": "2025-01-20T22:12:34.320974Z",
+     "shell.execute_reply.started": "2025-01-20T22:12:33.178397Z"
     }
    },
    "outputs": [
     {
      "data": {
       "text/plain": [
+       "CommitInfo(commit_url='https://huggingface.co/Nonnormalizable/frugal-ai-text-bert-base/commit/9081285a20fa0d62c5c1580aa17884de2b3bc236', commit_message='Upload tokenizer', commit_description='', oid='9081285a20fa0d62c5c1580aa17884de2b3bc236', pr_url=None, repo_url=RepoUrl('https://huggingface.co/Nonnormalizable/frugal-ai-text-bert-base', endpoint='https://huggingface.co', repo_type='model', repo_id='Nonnormalizable/frugal-ai-text-bert-base'), pr_revision=None, pr_num=None)"
       ]
      },
+     "execution_count": 20,
      "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "tokenizer_final.push_to_hub(\"frugal-ai-text-bert-base\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "863d3553-89a6-4188-a8d0-eaa0b6bccb6c",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-01-20T22:12:34.324003Z",
+     "iopub.status.busy": "2025-01-20T22:12:34.323725Z",
+     "iopub.status.idle": "2025-01-20T22:12:35.350962Z",
+     "shell.execute_reply": "2025-01-20T22:12:35.350482Z",
+     "shell.execute_reply.started": "2025-01-20T22:12:34.323976Z"
+    }
+   },
+   "outputs": [
     {
      "data": {
       "text/plain": [
+       "CommitInfo(commit_url='https://huggingface.co/Nonnormalizable/frugal-ai-text-bert-base/commit/b3078a95ea36d71c1d1bf0d153e069b83f74bddf', commit_message='Upload README.md with huggingface_hub', commit_description='', oid='b3078a95ea36d71c1d1bf0d153e069b83f74bddf', pr_url=None, repo_url=RepoUrl('https://huggingface.co/Nonnormalizable/frugal-ai-text-bert-base', endpoint='https://huggingface.co', repo_type='model', repo_id='Nonnormalizable/frugal-ai-text-bert-base'), pr_revision=None, pr_num=None)"
       ]
      },
+     "execution_count": 21,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
+    "card.push_to_hub(\"Nonnormalizable/frugal-ai-text-bert-base\")"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "2c22cc30-7578-4aad-b7db-1ffe4954c46c",
    "metadata": {},
    "outputs": [],
    "source": []
   "widgets": {
    "application/vnd.jupyter.widget-state+json": {
     "state": {
+     "47fba054bcbc4563934b6d25ea787e43": {
       "model_module": "@jupyter-widgets/base",
       "model_module_version": "2.0.0",
       "model_name": "LayoutModel",
       "state": {}
      },
+     "5cdf8fe39a634d048f2140b3af85165f": {
       "model_module": "@jupyter-widgets/base",
       "model_module_version": "2.0.0",
       "model_name": "LayoutModel",
       "state": {}
      },
+     "6a6b93c568744ed48ba6c58f84c3d59a": {
       "model_module": "@jupyter-widgets/base",
       "model_module_version": "2.0.0",
       "model_name": "LayoutModel",
       "state": {}
      },
+     "802b81b278a34a1a9ed480ca2ae299a0": {
+      "model_module": "@jupyter-widgets/controls",
+      "model_module_version": "2.0.0",
+      "model_name": "HTMLModel",
+      "state": {
+       "layout": "IPY_MODEL_47fba054bcbc4563934b6d25ea787e43",
+       "style": "IPY_MODEL_cab10a06b0064a4f876d47bbd5dda288",
+       "value": "model.safetensors: 100%"
+      }
+     },
+     "80984aaf16ce41ce839cc4bd5c0ea202": {
       "model_module": "@jupyter-widgets/base",
       "model_module_version": "2.0.0",
       "model_name": "LayoutModel",
       "state": {}
      },
+     "87a62c5c11cc43649d6ce177ab39f244": {
       "model_module": "@jupyter-widgets/controls",
       "model_module_version": "2.0.0",
       "model_name": "HTMLStyleModel",
        "text_color": null
       }
      },
+     "8b033d0c246145a082c43e73d1377035": {
       "model_module": "@jupyter-widgets/controls",
       "model_module_version": "2.0.0",
+      "model_name": "HTMLModel",
       "state": {
+       "layout": "IPY_MODEL_5cdf8fe39a634d048f2140b3af85165f",
+       "style": "IPY_MODEL_87a62c5c11cc43649d6ce177ab39f244",
+       "value": " 438M/438M [00:15&lt;00:00, 22.9MB/s]"
       }
      },
+     "c5eebb3e916e4c59864d29582ab336bf": {
       "model_module": "@jupyter-widgets/controls",
       "model_module_version": "2.0.0",
       "model_name": "ProgressStyleModel",
        "description_width": ""
       }
      },
+     "cab10a06b0064a4f876d47bbd5dda288": {
       "model_module": "@jupyter-widgets/controls",
       "model_module_version": "2.0.0",
       "model_name": "HTMLStyleModel",
        "text_color": null
       }
      },
+     "d83e79effc3542f49c38928463bb41ec": {
       "model_module": "@jupyter-widgets/controls",
       "model_module_version": "2.0.0",
+      "model_name": "FloatProgressModel",
       "state": {
+       "bar_style": "success",
+       "layout": "IPY_MODEL_6a6b93c568744ed48ba6c58f84c3d59a",
+       "max": 437977072,
+       "style": "IPY_MODEL_c5eebb3e916e4c59864d29582ab336bf",
+       "value": 437977072
       }
      },
+     "fbc09ae2c5614831a2fb02fa48a44fd1": {
       "model_module": "@jupyter-widgets/controls",
       "model_module_version": "2.0.0",
+      "model_name": "HBoxModel",
       "state": {
+       "children": [
+        "IPY_MODEL_802b81b278a34a1a9ed480ca2ae299a0",
+        "IPY_MODEL_d83e79effc3542f49c38928463bb41ec",
+        "IPY_MODEL_8b033d0c246145a082c43e73d1377035"
+       ],
+       "layout": "IPY_MODEL_80984aaf16ce41ce839cc4bd5c0ea202"
       }
      }
     },
     "version_major": 2,

tasks/text.py CHANGED Viewed

@@ -70,20 +70,13 @@ def bert_model(test_dataset: dict, model_type: str):
     return predictions
-@router.post("/text-bert-base", tags=["Text Task"])
-async def evauate_text_model_1(request: TextEvaluationRequest):
-    return evaluate_text(request, model_type="bert-base")
-@router.post("/text-baseline", tags=["Text Task"])
-async def evauate_text_model_2(request: TextEvaluationRequest):
-    return evaluate_text(request, model_type="baseline")
 @router.post(ROUTE, tags=["Text Task"])
 async def evaluate_text(
     request: TextEvaluationRequest,
     model_type: str = "bert-base",
 ):
     """
     Evaluate text classification for climate disinformation detection.

     return predictions
 @router.post(ROUTE, tags=["Text Task"])
 async def evaluate_text(
     request: TextEvaluationRequest,
     model_type: str = "bert-base",
+    # This should be an API query parameter, but it looks like the submission repo
+    # https://huggingface.co/spaces/frugal-ai-challenge/submission-portal
+    # is built in a way to not accept any other endpoints or parameters.
 ):
     """
     Evaluate text classification for climate disinformation detection.