text-Nonnormalizable

Sleeping

App Files Files Community

Nonnormalizable commited on Jan 22

Commit

80df7c4

1 Parent(s): ec64986

Trained all 5 sizes of bert.

Browse files

Files changed (1) hide show

Finetune BERT.ipynb +189 -164

Finetune BERT.ipynb CHANGED Viewed

@@ -14,11 +14,11 @@
    "id": "73e72549-69f2-46b5-b0f5-655777139972",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2025-01-22T14:28:40.399621Z",
-     "iopub.status.busy": "2025-01-22T14:28:40.398151Z",
-     "iopub.status.idle": "2025-01-22T14:28:43.463152Z",
-     "shell.execute_reply": "2025-01-22T14:28:43.462919Z",
-     "shell.execute_reply.started": "2025-01-22T14:28:40.399562Z"
     }
    },
    "outputs": [],
@@ -45,11 +45,11 @@
    "id": "07e0787e-c72b-41f3-baba-43cef3f8d6f8",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2025-01-22T14:28:43.463941Z",
-     "iopub.status.busy": "2025-01-22T14:28:43.463805Z",
-     "iopub.status.idle": "2025-01-22T14:28:43.465644Z",
-     "shell.execute_reply": "2025-01-22T14:28:43.465423Z",
-     "shell.execute_reply.started": "2025-01-22T14:28:43.463933Z"
     }
    },
    "outputs": [],
@@ -71,11 +71,11 @@
    "id": "d4b79fb9-5e70-4600-8885-94bc0a6e917c",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2025-01-22T14:28:44.578819Z",
-     "iopub.status.busy": "2025-01-22T14:28:44.578158Z",
-     "iopub.status.idle": "2025-01-22T14:28:44.594834Z",
-     "shell.execute_reply": "2025-01-22T14:28:44.594043Z",
-     "shell.execute_reply.started": "2025-01-22T14:28:44.578767Z"
     }
    },
    "outputs": [],
@@ -199,11 +199,11 @@
    "id": "07131bce-23ad-4787-8622-cce401f3e5ce",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2025-01-22T14:28:45.024103Z",
-     "iopub.status.busy": "2025-01-22T14:28:45.023645Z",
-     "iopub.status.idle": "2025-01-22T14:28:45.056500Z",
-     "shell.execute_reply": "2025-01-22T14:28:45.056070Z",
-     "shell.execute_reply.started": "2025-01-22T14:28:45.024069Z"
     }
    },
    "outputs": [],
@@ -223,11 +223,11 @@
    "id": "695bc080-bbd7-4937-af5b-50db1c936500",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2025-01-22T14:28:45.268069Z",
-     "iopub.status.busy": "2025-01-22T14:28:45.267170Z",
-     "iopub.status.idle": "2025-01-22T14:28:45.279492Z",
-     "shell.execute_reply": "2025-01-22T14:28:45.278723Z",
-     "shell.execute_reply.started": "2025-01-22T14:28:45.268003Z"
     }
    },
    "outputs": [],
@@ -305,6 +305,25 @@
     "# Exploration"
    ]
   },
   {
    "cell_type": "markdown",
    "id": "a847135f-ce86-46a1-9c61-3459a847cb29",
@@ -323,15 +342,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
    "id": "34a7c310-c486-4db1-b94d-4363c3d3df5b",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2025-01-22T14:28:46.360995Z",
-     "iopub.status.busy": "2025-01-22T14:28:46.360044Z",
-     "iopub.status.idle": "2025-01-22T14:28:53.023176Z",
-     "shell.execute_reply": "2025-01-22T14:28:53.022848Z",
-     "shell.execute_reply.started": "2025-01-22T14:28:46.360953Z"
     }
    },
    "outputs": [
@@ -339,35 +358,35 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2025-01-22 09:28:49 Epoch  0/3 done. Loss: Train 2.131, Test 2.135; and Acc: Train 0.118, Test 0.118\n",
-      "2025-01-22 09:28:50 Epoch  1/3 done. Loss: Train 1.952, Test 1.978; and Acc: Train 0.281, Test 0.261\n",
-      "2025-01-22 09:28:51 Epoch  2/3 done. Loss: Train 1.905, Test 1.943; and Acc: Train 0.304, Test 0.275\n",
-      "2025-01-22 09:28:53 Epoch  3/3 done. Loss: Train 1.862, Test 1.904; and Acc: Train 0.321, Test 0.283\n"
      ]
     }
    ],
    "source": [
     "model, tokenizer, regime, metrics = run_training(\n",
     "    max_dataset_size=16 * 100,\n",
-    "    bert_variety=\"google/bert_uncased_L-2_H-128_A-2\",\n",
     "    max_length=128,\n",
     "    num_epochs=3,\n",
-    "    batch_size=32,\n",
     ")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
    "id": "0aedfcca-843e-4f4c-8062-3e4625161bcc",
    "metadata": {
     "editable": true,
     "execution": {
-     "iopub.execute_input": "2025-01-22T14:28:55.671186Z",
-     "iopub.status.busy": "2025-01-22T14:28:55.670403Z",
-     "iopub.status.idle": "2025-01-22T14:28:55.789941Z",
-     "shell.execute_reply": "2025-01-22T14:28:55.789679Z",
-     "shell.execute_reply.started": "2025-01-22T14:28:55.671131Z"
     },
     "slideshow": {
      "slide_type": ""
@@ -379,7 +398,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2025-01-22 09:28:55 Predictions: tensor([0, 0, 0, 0, 0, 0, 0], device='mps:0')\n"
      ]
     }
    ],
@@ -426,10 +445,10 @@
     "Overall top performance per model. Machine: bert-base is using an Nvidia 1xL40S, no inference time cleaverness attempted.\n",
     "\n",
     "[accidentally cheating bert-base by trainging on full dataset](https://huggingface.co/datasets/frugal-ai-challenge/public-leaderboard-text/blob/main/submissions/Nonnormalizable_20250117_220350.json):\\\n",
-    "acc 0.954, energy 0.736 Wh, emissions 0.272 gco2eq\n",
     "\n",
     "[bert-base some hp tuning](https://huggingface.co/datasets/frugal-ai-challenge/public-leaderboard-text/blob/main/submissions/Nonnormalizable_20250120_231350.json):\\\n",
-    "acc 0.707, energy 0.803 Wh, emissions 0.296 gco2eq\n",
     "\n",
     "bert-tiny, Nvidia 1xL40S:\n",
     "\n",
@@ -445,8 +464,20 @@
     "Scanning max_length and batch_size with num_epochs set to 3, looks like we want 256 and 16. That gets us\\\n",
     "`2025-01-21 10:18:56 Epoch 3/3 done. Loss: Train 1.368, Test 1.432; and Acc: Train 0.499, Test 0.477`.\n",
     "\n",
-    "Then looking at num_epochs, we saturate test set performance at 15 (~3 min), giving e.g.\\\n",
-    "`2025-01-21 10:38:30 Epoch 15/20 done. Loss: Train 0.553, Test 1.157; and Acc: Train 0.833, Test 0.595`"
    ]
   },
   {
@@ -455,18 +486,18 @@
    "id": "37794952-703c-466c-9d26-ee6cb2834246",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2025-01-22T14:29:24.691783Z",
-     "iopub.status.busy": "2025-01-22T14:29:24.691195Z",
-     "iopub.status.idle": "2025-01-22T14:29:24.696800Z",
-     "shell.execute_reply": "2025-01-22T14:29:24.695895Z",
-     "shell.execute_reply.started": "2025-01-22T14:29:24.691745Z"
     }
    },
    "outputs": [],
    "source": [
     "static_hyperparams = dict(\n",
     "    max_dataset_size=\"full\",\n",
-    "    bert_variety=\"google/bert_uncased_L-2_H-128_A-2\",\n",
     "    max_length=256,\n",
     "    batch_size=16,\n",
     ")"
@@ -478,11 +509,11 @@
    "id": "28354e8c-886a-4523-8968-8c688c13f6a3",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2025-01-22T14:29:25.202258Z",
-     "iopub.status.busy": "2025-01-22T14:29:25.201292Z",
-     "iopub.status.idle": "2025-01-22T14:31:22.271954Z",
-     "shell.execute_reply": "2025-01-22T14:31:22.271647Z",
-     "shell.execute_reply.started": "2025-01-22T14:29:25.202215Z"
     }
    },
    "outputs": [
@@ -490,29 +521,17 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2025-01-22 09:29:31 Epoch  0/15 done. Loss: Train 2.104, Test 2.111; and Acc: Train 0.114, Test 0.097\n",
-      "2025-01-22 09:29:38 Epoch  1/15 done. Loss: Train 1.778, Test 1.814; and Acc: Train 0.353, Test 0.329\n",
-      "2025-01-22 09:29:45 Epoch  2/15 done. Loss: Train 1.555, Test 1.605; and Acc: Train 0.443, Test 0.422\n",
-      "2025-01-22 09:29:53 Epoch  3/15 done. Loss: Train 1.388, Test 1.451; and Acc: Train 0.519, Test 0.491\n",
-      "2025-01-22 09:30:00 Epoch  4/15 done. Loss: Train 1.274, Test 1.362; and Acc: Train 0.555, Test 0.523\n",
-      "2025-01-22 09:30:07 Epoch  5/15 done. Loss: Train 1.179, Test 1.300; and Acc: Train 0.588, Test 0.540\n",
-      "2025-01-22 09:30:15 Epoch  6/15 done. Loss: Train 1.097, Test 1.259; and Acc: Train 0.632, Test 0.550\n",
-      "2025-01-22 09:30:22 Epoch  7/15 done. Loss: Train 1.026, Test 1.225; and Acc: Train 0.659, Test 0.567\n",
-      "2025-01-22 09:30:30 Epoch  8/15 done. Loss: Train 0.947, Test 1.196; and Acc: Train 0.683, Test 0.580\n",
-      "2025-01-22 09:30:37 Epoch  9/15 done. Loss: Train 0.879, Test 1.176; and Acc: Train 0.717, Test 0.586\n",
-      "2025-01-22 09:30:44 Epoch 10/15 done. Loss: Train 0.817, Test 1.155; and Acc: Train 0.735, Test 0.600\n",
-      "2025-01-22 09:30:52 Epoch 11/15 done. Loss: Train 0.757, Test 1.148; and Acc: Train 0.763, Test 0.599\n",
-      "2025-01-22 09:30:59 Epoch 12/15 done. Loss: Train 0.700, Test 1.139; and Acc: Train 0.786, Test 0.603\n",
-      "2025-01-22 09:31:07 Epoch 13/15 done. Loss: Train 0.636, Test 1.137; and Acc: Train 0.806, Test 0.599\n",
-      "2025-01-22 09:31:14 Epoch 14/15 done. Loss: Train 0.582, Test 1.128; and Acc: Train 0.823, Test 0.604\n",
-      "2025-01-22 09:31:22 Epoch 15/15 done. Loss: Train 0.535, Test 1.134; and Acc: Train 0.837, Test 0.618\n"
      ]
     }
    ],
    "source": [
     "model, tokenizer, training_regime, testing_metrics = run_training(\n",
     "    **static_hyperparams,\n",
-    "    num_epochs=15,\n",
     ")"
    ]
   },
@@ -526,21 +545,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
    "id": "ec2516f9-79f2-4ae1-ab9a-9a51a7a50587",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2025-01-22T14:31:42.946851Z",
-     "iopub.status.busy": "2025-01-22T14:31:42.946191Z",
-     "iopub.status.idle": "2025-01-22T14:31:42.970151Z",
-     "shell.execute_reply": "2025-01-22T14:31:42.969731Z",
-     "shell.execute_reply.started": "2025-01-22T14:31:42.946804Z"
     },
     "scrolled": true
    },
    "outputs": [],
    "source": [
-    "model_and_repo_name = \"frugal-ai-text-bert-tiny\"\n",
     "card_data = ModelCardData(\n",
     "    model_name=model_and_repo_name,\n",
     "    base_model=static_hyperparams[\"bert_variety\"],\n",
@@ -552,7 +570,7 @@
     ")\n",
     "card = ModelCard.from_template(\n",
     "    card_data,\n",
-    "    model_summary=\"Classify text into 8 categories of climate misinformation.\",\n",
     "    model_description=\"Fine trained BERT for classifying climate information as part of the Frugal AI Challenge, for submission to https://huggingface.co/frugal-ai-challenge and scoring on accuracy and efficiency. Trainied on only the non-evaluation 80% of the data, so it's (non-cheating) score will be lower.\",\n",
     "    developers=\"Andre Bach\",\n",
     "    funded_by=\"N/A\",\n",
@@ -568,15 +586,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
    "id": "29d3bbf9-ab2a-48e2-a550-e16da5025720",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2025-01-22T14:31:44.266203Z",
-     "iopub.status.busy": "2025-01-22T14:31:44.265638Z",
-     "iopub.status.idle": "2025-01-22T14:31:44.271280Z",
-     "shell.execute_reply": "2025-01-22T14:31:44.270441Z",
-     "shell.execute_reply.started": "2025-01-22T14:31:44.266162Z"
     }
    },
    "outputs": [],
@@ -587,15 +605,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
    "id": "e3b099c6-6b98-473b-8797-5032213b9fcb",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2025-01-22T14:31:45.670794Z",
-     "iopub.status.busy": "2025-01-22T14:31:45.670345Z",
-     "iopub.status.idle": "2025-01-22T14:31:45.731173Z",
-     "shell.execute_reply": "2025-01-22T14:31:45.730818Z",
-     "shell.execute_reply.started": "2025-01-22T14:31:45.670769Z"
     }
    },
    "outputs": [
@@ -603,7 +621,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2025-01-22 09:31:45 Predictions: tensor([0, 0, 3, 1, 2, 4, 6], device='mps:0')\n"
      ]
     }
    ],
@@ -636,32 +654,39 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
    "id": "befb94b5-88bf-40fc-8b26-cf373d1256e0",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2025-01-22T14:37:57.412327Z",
-     "iopub.status.busy": "2025-01-22T14:37:57.411779Z",
-     "iopub.status.idle": "2025-01-22T14:37:59.349630Z",
-     "shell.execute_reply": "2025-01-22T14:37:59.348338Z",
-     "shell.execute_reply.started": "2025-01-22T14:37:57.412288Z"
     }
    },
    "outputs": [
     {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "No files have been modified since last commit. Skipping to prevent empty commit.\n"
-     ]
     },
     {
      "data": {
       "text/plain": [
-       "CommitInfo(commit_url='https://huggingface.co/Nonnormalizable/frugal-ai-text-bert-tiny/commit/69d445f90562fc738f12cfb37908fccef8925f5c', commit_message='Upload README.md with huggingface_hub', commit_description='', oid='69d445f90562fc738f12cfb37908fccef8925f5c', pr_url=None, repo_url=RepoUrl('https://huggingface.co/Nonnormalizable/frugal-ai-text-bert-tiny', endpoint='https://huggingface.co', repo_type='model', repo_id='Nonnormalizable/frugal-ai-text-bert-tiny'), pr_revision=None, pr_num=None)"
       ]
      },
-     "execution_count": 18,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -703,7 +728,17 @@
   "widgets": {
    "application/vnd.jupyter.widget-state+json": {
     "state": {
-     "04362bf5ea1540e69a8ed37243e960fe": {
       "model_module": "@jupyter-widgets/controls",
       "model_module_version": "2.0.0",
       "model_name": "HTMLStyleModel",
@@ -713,29 +748,15 @@
        "text_color": null
       }
      },
-     "1a82e23ee0b44ec78b0bb2175f2e938a": {
-      "model_module": "@jupyter-widgets/base",
-      "model_module_version": "2.0.0",
-      "model_name": "LayoutModel",
-      "state": {}
-     },
-     "3058e249f3a24b89a0946db9d46692cd": {
       "model_module": "@jupyter-widgets/controls",
       "model_module_version": "2.0.0",
-      "model_name": "HTMLModel",
       "state": {
-       "layout": "IPY_MODEL_1a82e23ee0b44ec78b0bb2175f2e938a",
-       "style": "IPY_MODEL_04362bf5ea1540e69a8ed37243e960fe",
-       "value": " 17.6M/17.6M [00:00&lt;00:00, 30.6MB/s]"
       }
      },
-     "4bcb44aa9960417da7c3e374f5015413": {
-      "model_module": "@jupyter-widgets/base",
-      "model_module_version": "2.0.0",
-      "model_name": "LayoutModel",
-      "state": {}
-     },
-     "572a4d1b74044da7a90c58c311a87eff": {
       "model_module": "@jupyter-widgets/controls",
       "model_module_version": "2.0.0",
       "model_name": "HTMLStyleModel",
@@ -745,60 +766,64 @@
        "text_color": null
       }
      },
-     "575da4c5a0b147989fc3444c95d5483b": {
-      "model_module": "@jupyter-widgets/base",
-      "model_module_version": "2.0.0",
-      "model_name": "LayoutModel",
-      "state": {}
-     },
-     "8f403fa494c246c9af5ee00397ac6cf5": {
-      "model_module": "@jupyter-widgets/base",
-      "model_module_version": "2.0.0",
-      "model_name": "LayoutModel",
-      "state": {}
-     },
-     "916778013b8d48d9acddd42e8b874c22": {
       "model_module": "@jupyter-widgets/controls",
       "model_module_version": "2.0.0",
-      "model_name": "ProgressStyleModel",
       "state": {
-       "description_width": ""
       }
      },
-     "9c74511b86c240a9afb83e5dcd16b03b": {
       "model_module": "@jupyter-widgets/controls",
       "model_module_version": "2.0.0",
       "model_name": "FloatProgressModel",
       "state": {
        "bar_style": "success",
-       "layout": "IPY_MODEL_575da4c5a0b147989fc3444c95d5483b",
-       "max": 17552376,
-       "style": "IPY_MODEL_916778013b8d48d9acddd42e8b874c22",
-       "value": 17552376
       }
      },
-     "9fb53962769d48e6a7ee640072ff1908": {
       "model_module": "@jupyter-widgets/controls",
       "model_module_version": "2.0.0",
       "model_name": "HTMLModel",
       "state": {
-       "layout": "IPY_MODEL_4bcb44aa9960417da7c3e374f5015413",
-       "style": "IPY_MODEL_572a4d1b74044da7a90c58c311a87eff",
-       "value": "model.safetensors: 100%"
       }
      },
-     "eb84b40edbab4e9d91fd6283b144492f": {
-      "model_module": "@jupyter-widgets/controls",
       "model_module_version": "2.0.0",
-      "model_name": "HBoxModel",
-      "state": {
-       "children": [
-        "IPY_MODEL_9fb53962769d48e6a7ee640072ff1908",
-        "IPY_MODEL_9c74511b86c240a9afb83e5dcd16b03b",
-        "IPY_MODEL_3058e249f3a24b89a0946db9d46692cd"
-       ],
-       "layout": "IPY_MODEL_8f403fa494c246c9af5ee00397ac6cf5"
-      }
      }
     },
     "version_major": 2,

    "id": "73e72549-69f2-46b5-b0f5-655777139972",
    "metadata": {
     "execution": {
+     "iopub.execute_input": "2025-01-22T18:16:12.117877Z",
+     "iopub.status.busy": "2025-01-22T18:16:12.117575Z",
+     "iopub.status.idle": "2025-01-22T18:16:15.083870Z",
+     "shell.execute_reply": "2025-01-22T18:16:15.083640Z",
+     "shell.execute_reply.started": "2025-01-22T18:16:12.117851Z"
     }
    },
    "outputs": [],
    "id": "07e0787e-c72b-41f3-baba-43cef3f8d6f8",
    "metadata": {
     "execution": {
+     "iopub.execute_input": "2025-01-22T18:16:15.084435Z",
+     "iopub.status.busy": "2025-01-22T18:16:15.084268Z",
+     "iopub.status.idle": "2025-01-22T18:16:15.086255Z",
+     "shell.execute_reply": "2025-01-22T18:16:15.086031Z",
+     "shell.execute_reply.started": "2025-01-22T18:16:15.084427Z"
     }
    },
    "outputs": [],
    "id": "d4b79fb9-5e70-4600-8885-94bc0a6e917c",
    "metadata": {
     "execution": {
+     "iopub.execute_input": "2025-01-22T18:16:15.086764Z",
+     "iopub.status.busy": "2025-01-22T18:16:15.086669Z",
+     "iopub.status.idle": "2025-01-22T18:16:15.091701Z",
+     "shell.execute_reply": "2025-01-22T18:16:15.091514Z",
+     "shell.execute_reply.started": "2025-01-22T18:16:15.086757Z"
     }
    },
    "outputs": [],
    "id": "07131bce-23ad-4787-8622-cce401f3e5ce",
    "metadata": {
     "execution": {
+     "iopub.execute_input": "2025-01-22T18:16:15.092028Z",
+     "iopub.status.busy": "2025-01-22T18:16:15.091969Z",
+     "iopub.status.idle": "2025-01-22T18:16:15.108312Z",
+     "shell.execute_reply": "2025-01-22T18:16:15.108075Z",
+     "shell.execute_reply.started": "2025-01-22T18:16:15.092021Z"
     }
    },
    "outputs": [],
    "id": "695bc080-bbd7-4937-af5b-50db1c936500",
    "metadata": {
     "execution": {
+     "iopub.execute_input": "2025-01-22T18:16:15.108777Z",
+     "iopub.status.busy": "2025-01-22T18:16:15.108669Z",
+     "iopub.status.idle": "2025-01-22T18:16:15.111839Z",
+     "shell.execute_reply": "2025-01-22T18:16:15.111545Z",
+     "shell.execute_reply.started": "2025-01-22T18:16:15.108767Z"
     }
    },
    "outputs": [],
     "# Exploration"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "11890d3b-8bcb-4a9b-b421-5431081cca39",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-01-22T18:16:15.113676Z",
+     "iopub.status.busy": "2025-01-22T18:16:15.113576Z",
+     "iopub.status.idle": "2025-01-22T18:16:15.115080Z",
+     "shell.execute_reply": "2025-01-22T18:16:15.114867Z",
+     "shell.execute_reply.started": "2025-01-22T18:16:15.113668Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "base_model_repo = \"google/bert_uncased_L-12_H-768_A-12\"\n",
+    "model_and_repo_name = \"frugal-ai-text-bert-base\""
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "a847135f-ce86-46a1-9c61-3459a847cb29",
   },
   {
    "cell_type": "code",
+   "execution_count": 7,
    "id": "34a7c310-c486-4db1-b94d-4363c3d3df5b",
    "metadata": {
     "execution": {
+     "iopub.execute_input": "2025-01-22T18:16:15.115472Z",
+     "iopub.status.busy": "2025-01-22T18:16:15.115400Z",
+     "iopub.status.idle": "2025-01-22T18:19:33.994125Z",
+     "shell.execute_reply": "2025-01-22T18:19:33.993854Z",
+     "shell.execute_reply.started": "2025-01-22T18:16:15.115464Z"
     }
    },
    "outputs": [
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "2025-01-22 13:16:38 Epoch  0/3 done. Loss: Train 2.066, Test 2.091; and Acc: Train 0.185, Test 0.157\n",
+      "2025-01-22 13:17:36 Epoch  1/3 done. Loss: Train 1.089, Test 1.279; and Acc: Train 0.627, Test 0.555\n",
+      "2025-01-22 13:18:35 Epoch  2/3 done. Loss: Train 0.624, Test 1.044; and Acc: Train 0.839, Test 0.642\n",
+      "2025-01-22 13:19:33 Epoch  3/3 done. Loss: Train 0.294, Test 1.047; and Acc: Train 0.928, Test 0.648\n"
      ]
     }
    ],
    "source": [
     "model, tokenizer, regime, metrics = run_training(\n",
     "    max_dataset_size=16 * 100,\n",
+    "    bert_variety=base_model_repo,\n",
     "    max_length=128,\n",
     "    num_epochs=3,\n",
+    "    batch_size=16,\n",
     ")"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 8,
    "id": "0aedfcca-843e-4f4c-8062-3e4625161bcc",
    "metadata": {
     "editable": true,
     "execution": {
+     "iopub.execute_input": "2025-01-22T18:19:33.994637Z",
+     "iopub.status.busy": "2025-01-22T18:19:33.994547Z",
+     "iopub.status.idle": "2025-01-22T18:19:34.064925Z",
+     "shell.execute_reply": "2025-01-22T18:19:34.064678Z",
+     "shell.execute_reply.started": "2025-01-22T18:19:33.994628Z"
     },
     "slideshow": {
      "slide_type": ""
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "2025-01-22 13:19:34 Predictions: tensor([0, 0, 3, 6, 2, 4, 6], device='mps:0')\n"
      ]
     }
    ],
     "Overall top performance per model. Machine: bert-base is using an Nvidia 1xL40S, no inference time cleaverness attempted.\n",
     "\n",
     "[accidentally cheating bert-base by trainging on full dataset](https://huggingface.co/datasets/frugal-ai-challenge/public-leaderboard-text/blob/main/submissions/Nonnormalizable_20250117_220350.json):\\\n",
+    "acc 0.954, energy 0.736 Wh\n",
     "\n",
     "[bert-base some hp tuning](https://huggingface.co/datasets/frugal-ai-challenge/public-leaderboard-text/blob/main/submissions/Nonnormalizable_20250120_231350.json):\\\n",
+    "acc 0.707, energy 0.803 Wh\n",
     "\n",
     "bert-tiny, Nvidia 1xL40S:\n",
     "\n",
     "Scanning max_length and batch_size with num_epochs set to 3, looks like we want 256 and 16. That gets us\\\n",
     "`2025-01-21 10:18:56 Epoch 3/3 done. Loss: Train 1.368, Test 1.432; and Acc: Train 0.499, Test 0.477`.\n",
     "\n",
+    "Then looking at num_epochs, we saturate test set performance at 15 (~3 minutes), giving e.g.\\\n",
+    "`2025-01-21 10:38:30 Epoch 15/20 done. Loss: Train 0.553, Test 1.157; and Acc: Train 0.833, Test 0.595`\n",
+    "\n",
+    "For bert-mini, just looking at num_epochs, we choose 8\\\n",
+    "`2025-01-22 10:56:12 Epoch  8/20 done. Loss: Train 0.305, Test 1.090; and Acc: Train 0.920, Test 0.646`\n",
+    "\n",
+    "For bert-small, 4\\\n",
+    "`2025-01-22 11:39:41 Epoch  4/15 done. Loss: Train 0.301, Test 0.978; and Acc: Train 0.920, Test 0.664`\n",
+    "\n",
+    "For bert-medium, 4\\\n",
+    "`2025-01-22 12:09:51 Epoch  4/10 done. Loss: Train 0.294, Test 1.020; and Acc: Train 0.922, Test 0.660`\n",
+    "\n",
+    "For bert-base, 3 does happen to be correct, just checking for completeness\\\n",
+    "`2025-01-22 12:59:10 Epoch  3/7 done. Loss: Train 0.156, Test 0.930; and Acc: Train 0.964, Test 0.703`"
    ]
   },
   {
    "id": "37794952-703c-466c-9d26-ee6cb2834246",
    "metadata": {
     "execution": {
+     "iopub.execute_input": "2025-01-22T18:19:34.065427Z",
+     "iopub.status.busy": "2025-01-22T18:19:34.065327Z",
+     "iopub.status.idle": "2025-01-22T18:19:34.066925Z",
+     "shell.execute_reply": "2025-01-22T18:19:34.066714Z",
+     "shell.execute_reply.started": "2025-01-22T18:19:34.065418Z"
     }
    },
    "outputs": [],
    "source": [
     "static_hyperparams = dict(\n",
     "    max_dataset_size=\"full\",\n",
+    "    bert_variety=base_model_repo,\n",
     "    max_length=256,\n",
     "    batch_size=16,\n",
     ")"
    "id": "28354e8c-886a-4523-8968-8c688c13f6a3",
    "metadata": {
     "execution": {
+     "iopub.execute_input": "2025-01-22T18:19:34.067286Z",
+     "iopub.status.busy": "2025-01-22T18:19:34.067206Z",
+     "iopub.status.idle": "2025-01-22T18:38:14.108104Z",
+     "shell.execute_reply": "2025-01-22T18:38:14.107193Z",
+     "shell.execute_reply.started": "2025-01-22T18:19:34.067278Z"
     }
    },
    "outputs": [
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "2025-01-22 13:21:10 Epoch  0/3 done. Loss: Train 2.088, Test 2.085; and Acc: Train 0.137, Test 0.135\n",
+      "2025-01-22 13:26:50 Epoch  1/3 done. Loss: Train 0.780, Test 1.012; and Acc: Train 0.747, Test 0.648\n",
+      "2025-01-22 13:32:30 Epoch  2/3 done. Loss: Train 0.346, Test 0.890; and Acc: Train 0.904, Test 0.689\n",
+      "2025-01-22 13:38:14 Epoch  3/3 done. Loss: Train 0.167, Test 0.968; and Acc: Train 0.959, Test 0.691\n"
      ]
     }
    ],
    "source": [
     "model, tokenizer, training_regime, testing_metrics = run_training(\n",
     "    **static_hyperparams,\n",
+    "    num_epochs=3,\n",
     ")"
    ]
   },
   },
   {
    "cell_type": "code",
+   "execution_count": 11,
    "id": "ec2516f9-79f2-4ae1-ab9a-9a51a7a50587",
    "metadata": {
     "execution": {
+     "iopub.execute_input": "2025-01-22T18:38:14.109094Z",
+     "iopub.status.busy": "2025-01-22T18:38:14.108996Z",
+     "iopub.status.idle": "2025-01-22T18:38:14.124982Z",
+     "shell.execute_reply": "2025-01-22T18:38:14.124768Z",
+     "shell.execute_reply.started": "2025-01-22T18:38:14.109081Z"
     },
     "scrolled": true
    },
    "outputs": [],
    "source": [
     "card_data = ModelCardData(\n",
     "    model_name=model_and_repo_name,\n",
     "    base_model=static_hyperparams[\"bert_variety\"],\n",
     ")\n",
     "card = ModelCard.from_template(\n",
     "    card_data,\n",
+    "    model_summary=f\"Classify text into 8 categories of climate misinformation using {base_model_repo}.\",\n",
     "    model_description=\"Fine trained BERT for classifying climate information as part of the Frugal AI Challenge, for submission to https://huggingface.co/frugal-ai-challenge and scoring on accuracy and efficiency. Trainied on only the non-evaluation 80% of the data, so it's (non-cheating) score will be lower.\",\n",
     "    developers=\"Andre Bach\",\n",
     "    funded_by=\"N/A\",\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 12,
    "id": "29d3bbf9-ab2a-48e2-a550-e16da5025720",
    "metadata": {
     "execution": {
+     "iopub.execute_input": "2025-01-22T18:38:14.125523Z",
+     "iopub.status.busy": "2025-01-22T18:38:14.125395Z",
+     "iopub.status.idle": "2025-01-22T18:38:14.126978Z",
+     "shell.execute_reply": "2025-01-22T18:38:14.126771Z",
+     "shell.execute_reply.started": "2025-01-22T18:38:14.125514Z"
     }
    },
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 13,
    "id": "e3b099c6-6b98-473b-8797-5032213b9fcb",
    "metadata": {
     "execution": {
+     "iopub.execute_input": "2025-01-22T18:38:14.127531Z",
+     "iopub.status.busy": "2025-01-22T18:38:14.127415Z",
+     "iopub.status.idle": "2025-01-22T18:38:14.157055Z",
+     "shell.execute_reply": "2025-01-22T18:38:14.156821Z",
+     "shell.execute_reply.started": "2025-01-22T18:38:14.127524Z"
     }
    },
    "outputs": [
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "2025-01-22 13:38:14 Predictions: tensor([0, 0, 3, 1, 2, 4, 6], device='mps:0')\n"
      ]
     }
    ],
   },
   {
    "cell_type": "code",
+   "execution_count": 14,
    "id": "befb94b5-88bf-40fc-8b26-cf373d1256e0",
    "metadata": {
     "execution": {
+     "iopub.execute_input": "2025-01-22T18:38:14.157429Z",
+     "iopub.status.busy": "2025-01-22T18:38:14.157356Z",
+     "iopub.status.idle": "2025-01-22T18:38:53.948196Z",
+     "shell.execute_reply": "2025-01-22T18:38:53.947738Z",
+     "shell.execute_reply.started": "2025-01-22T18:38:14.157421Z"
     }
    },
    "outputs": [
     {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "54e4f39d398f45ceb760107e5b57744a",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
     },
     {
      "data": {
       "text/plain": [
+       "CommitInfo(commit_url='https://huggingface.co/Nonnormalizable/frugal-ai-text-bert-base/commit/46ba6471d612d348636c07c47f57d90dd14c9f74', commit_message='Upload README.md with huggingface_hub', commit_description='', oid='46ba6471d612d348636c07c47f57d90dd14c9f74', pr_url=None, repo_url=RepoUrl('https://huggingface.co/Nonnormalizable/frugal-ai-text-bert-base', endpoint='https://huggingface.co', repo_type='model', repo_id='Nonnormalizable/frugal-ai-text-bert-base'), pr_revision=None, pr_num=None)"
       ]
      },
+     "execution_count": 14,
      "metadata": {},
      "output_type": "execute_result"
     }
   "widgets": {
    "application/vnd.jupyter.widget-state+json": {
     "state": {
+     "2d2b267cd60649cdb6fcce93640ba8d6": {
+      "model_module": "@jupyter-widgets/controls",
+      "model_module_version": "2.0.0",
+      "model_name": "HTMLModel",
+      "state": {
+       "layout": "IPY_MODEL_b3c2c88f904a424c96704cc4b9514f98",
+       "style": "IPY_MODEL_337bc700fce14480a640a1ae545db5f5",
+       "value": "model.safetensors: 100%"
+      }
+     },
+     "337bc700fce14480a640a1ae545db5f5": {
       "model_module": "@jupyter-widgets/controls",
       "model_module_version": "2.0.0",
       "model_name": "HTMLStyleModel",
        "text_color": null
       }
      },
+     "40666b0d750d4caf8fbaeeef11eb58c1": {
       "model_module": "@jupyter-widgets/controls",
       "model_module_version": "2.0.0",
+      "model_name": "ProgressStyleModel",
       "state": {
+       "description_width": ""
       }
      },
+     "4d9ae3c7a72a4f4aa5974fb0649cb42c": {
       "model_module": "@jupyter-widgets/controls",
       "model_module_version": "2.0.0",
       "model_name": "HTMLStyleModel",
        "text_color": null
       }
      },
+     "54e4f39d398f45ceb760107e5b57744a": {
       "model_module": "@jupyter-widgets/controls",
       "model_module_version": "2.0.0",
+      "model_name": "HBoxModel",
       "state": {
+       "children": [
+        "IPY_MODEL_2d2b267cd60649cdb6fcce93640ba8d6",
+        "IPY_MODEL_575f3681680a4cbeb1f95547a40bdc93",
+        "IPY_MODEL_91cbef62c3b84632949a24dbad475b10"
+       ],
+       "layout": "IPY_MODEL_f2feb8c3b4cc4ee29091b9aab78ff4aa"
       }
      },
+     "575f3681680a4cbeb1f95547a40bdc93": {
       "model_module": "@jupyter-widgets/controls",
       "model_module_version": "2.0.0",
       "model_name": "FloatProgressModel",
       "state": {
        "bar_style": "success",
+       "layout": "IPY_MODEL_dcc805dd65774cd2b863c2c4bb8f3f1c",
+       "max": 437977072,
+       "style": "IPY_MODEL_40666b0d750d4caf8fbaeeef11eb58c1",
+       "value": 437977072
       }
      },
+     "91cbef62c3b84632949a24dbad475b10": {
       "model_module": "@jupyter-widgets/controls",
       "model_module_version": "2.0.0",
       "model_name": "HTMLModel",
       "state": {
+       "layout": "IPY_MODEL_fe68949bcf9b42508368dd03f6506d57",
+       "style": "IPY_MODEL_4d9ae3c7a72a4f4aa5974fb0649cb42c",
+       "value": " 438M/438M [00:36&lt;00:00, 12.1MB/s]"
       }
      },
+     "b3c2c88f904a424c96704cc4b9514f98": {
+      "model_module": "@jupyter-widgets/base",
       "model_module_version": "2.0.0",
+      "model_name": "LayoutModel",
+      "state": {}
+     },
+     "dcc805dd65774cd2b863c2c4bb8f3f1c": {
+      "model_module": "@jupyter-widgets/base",
+      "model_module_version": "2.0.0",
+      "model_name": "LayoutModel",
+      "state": {}
+     },
+     "f2feb8c3b4cc4ee29091b9aab78ff4aa": {
+      "model_module": "@jupyter-widgets/base",
+      "model_module_version": "2.0.0",
+      "model_name": "LayoutModel",
+      "state": {}
+     },
+     "fe68949bcf9b42508368dd03f6506d57": {
+      "model_module": "@jupyter-widgets/base",
+      "model_module_version": "2.0.0",
+      "model_name": "LayoutModel",
+      "state": {}
      }
     },
     "version_major": 2,