frugal-ai-submission

Paused

App Files Files Community

Nonnormalizable commited on Jan 17

Commit

ab18efc

1 Parent(s): 3ec6adb

Point submission at my first bert model in HF.

Browse files

Files changed (2) hide show

Finetune BERT.ipynb +402 -83
tasks/text.py +41 -3

Finetune BERT.ipynb CHANGED Viewed

@@ -6,11 +6,11 @@
    "id": "73e72549-69f2-46b5-b0f5-655777139972",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2025-01-17T04:45:37.715126Z",
-     "iopub.status.busy": "2025-01-17T04:45:37.714808Z",
-     "iopub.status.idle": "2025-01-17T04:45:41.232154Z",
-     "shell.execute_reply": "2025-01-17T04:45:41.231851Z",
-     "shell.execute_reply.started": "2025-01-17T04:45:37.715090Z"
     }
    },
    "outputs": [],
@@ -20,6 +20,7 @@
     "import torch\n",
     "from torch import nn\n",
     "from transformers import BertTokenizer, BertModel\n",
     "from torch.utils.data import Dataset, DataLoader\n",
     "from datasets import load_dataset"
    ]
@@ -27,14 +28,32 @@
   {
    "cell_type": "code",
    "execution_count": 2,
    "id": "d4b79fb9-5e70-4600-8885-94bc0a6e917c",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2025-01-17T04:45:41.232694Z",
-     "iopub.status.busy": "2025-01-17T04:45:41.232554Z",
-     "iopub.status.idle": "2025-01-17T04:45:41.236434Z",
-     "shell.execute_reply": "2025-01-17T04:45:41.236218Z",
-     "shell.execute_reply.started": "2025-01-17T04:45:41.232685Z"
     }
    },
    "outputs": [],
@@ -43,12 +62,12 @@
     "    time_str = datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\")\n",
     "    print(time_str, x)\n",
     "\n",
-    "class BertClassifier(nn.Module):\n",
-    "    def __init__(self, num_classes: int = 8, bert_variety='bert-base-uncased'):\n",
     "        super().__init__()\n",
     "        self.bert = BertModel.from_pretrained(bert_variety)\n",
     "        self.dropout = nn.Dropout(0.05)\n",
-    "        self.classifier = nn.Linear(self.bert.pooler.dense.out_features, num_classes)\n",
     "\n",
     "    def forward(self, input_ids, attention_mask):\n",
     "        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)\n",
@@ -58,7 +77,7 @@
     "        return logits\n",
     "\n",
     "class TextDataset(Dataset):\n",
-    "    def __init__(self, texts, labels, tokenizer, max_length=200):\n",
     "        self.encodings = tokenizer(\n",
     "            texts,\n",
     "            truncation=True,\n",
@@ -104,15 +123,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
    "id": "07131bce-23ad-4787-8622-cce401f3e5ce",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2025-01-17T04:45:41.237451Z",
-     "iopub.status.busy": "2025-01-17T04:45:41.237358Z",
-     "iopub.status.idle": "2025-01-17T04:45:41.252075Z",
-     "shell.execute_reply": "2025-01-17T04:45:41.251851Z",
-     "shell.execute_reply.started": "2025-01-17T04:45:41.237443Z"
     }
    },
    "outputs": [],
@@ -128,15 +147,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
    "id": "695bc080-bbd7-4937-af5b-50db1c936500",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2025-01-17T04:45:41.252581Z",
-     "iopub.status.busy": "2025-01-17T04:45:41.252476Z",
-     "iopub.status.idle": "2025-01-17T04:45:41.255279Z",
-     "shell.execute_reply": "2025-01-17T04:45:41.255045Z",
-     "shell.execute_reply.started": "2025-01-17T04:45:41.252572Z"
     }
    },
    "outputs": [],
@@ -179,15 +198,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
    "id": "792fd13f-e7cc-4d90-832d-c0da15e193cd",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2025-01-17T04:45:41.255750Z",
-     "iopub.status.busy": "2025-01-17T04:45:41.255661Z",
-     "iopub.status.idle": "2025-01-17T04:47:17.151654Z",
-     "shell.execute_reply": "2025-01-17T04:47:17.149076Z",
-     "shell.execute_reply.started": "2025-01-17T04:45:41.255742Z"
     }
    },
    "outputs": [
@@ -195,18 +214,18 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2025-01-16 20:45:45 Starting epoch 1.\n",
-      "2025-01-16 20:46:15 Epoch 1/3 done, Average Loss: 1.9223\n",
-      "2025-01-16 20:46:46 Epoch 2/3 done, Average Loss: 1.6052\n",
-      "2025-01-16 20:47:17 Epoch 3/3 done, Average Loss: 1.2876\n"
      ]
     }
    ],
    "source": [
     "model, tokenizer = run_training(\n",
-    "    max_dataset_size=16 * 50,\n",
     "    bert_variety='bert-base-uncased',\n",
-    "    max_length=200,\n",
     "    num_epochs=3,\n",
     "    batch_size=32,\n",
     ")"
@@ -214,15 +233,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
    "id": "0aedfcca-843e-4f4c-8062-3e4625161bcc",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2025-01-17T04:47:17.158101Z",
-     "iopub.status.busy": "2025-01-17T04:47:17.157305Z",
-     "iopub.status.idle": "2025-01-17T04:47:17.333568Z",
-     "shell.execute_reply": "2025-01-17T04:47:17.333317Z",
-     "shell.execute_reply.started": "2025-01-17T04:47:17.157437Z"
     }
    },
    "outputs": [
@@ -230,7 +249,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2025-01-16 20:47:17 Predictions: tensor([6, 1, 1, 6, 1, 6, 6], device='mps:0')\n"
      ]
     }
    ],
@@ -367,15 +386,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
    "id": "28354e8c-886a-4523-8968-8c688c13f6a3",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2025-01-17T05:05:36.905668Z",
-     "iopub.status.busy": "2025-01-17T05:05:36.905353Z",
-     "iopub.status.idle": "2025-01-17T05:21:10.045463Z",
-     "shell.execute_reply": "2025-01-17T05:21:10.044788Z",
-     "shell.execute_reply.started": "2025-01-17T05:05:36.905630Z"
     }
    },
    "outputs": [
@@ -383,10 +402,10 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2025-01-16 21:05:43 Starting epoch 1.\n",
-      "2025-01-16 21:10:53 Epoch 1/3 done, Average Loss: 1.3415\n",
-      "2025-01-16 21:16:02 Epoch 2/3 done, Average Loss: 0.7216\n",
-      "2025-01-16 21:21:10 Epoch 3/3 done, Average Loss: 0.3978\n"
      ]
     }
    ],
@@ -400,17 +419,60 @@
     ")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
    "id": "e3b099c6-6b98-473b-8797-5032213b9fcb",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2025-01-17T05:21:10.059844Z",
-     "iopub.status.busy": "2025-01-17T05:21:10.058980Z",
-     "iopub.status.idle": "2025-01-17T05:21:10.164116Z",
-     "shell.execute_reply": "2025-01-17T05:21:10.163826Z",
-     "shell.execute_reply.started": "2025-01-17T05:21:10.059552Z"
     }
    },
    "outputs": [
@@ -418,12 +480,12 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2025-01-16 21:21:10 Predictions: tensor([0, 0, 3, 6, 2, 4, 6], device='mps:0')\n"
      ]
     }
    ],
    "source": [
-    "model.eval()\n",
     "test_text = [\n",
     "    'This was a great experience!', # 0_not_relevant\n",
     "    'My favorite hike is Laguna de los Tres.', # 0_not_relevant\n",
@@ -433,7 +495,7 @@
     "    'Solar panels emit bad vibes.', # 4_solutions_harmful_unnecessary\n",
     "    'All those so-called scientists are Democrats.', # 6_proponents_biased\n",
     "]\n",
-    "test_encoding = tokenizer(\n",
     "    test_text,\n",
     "    truncation=True,\n",
     "    padding=True,\n",
@@ -443,46 +505,101 @@
     "with torch.no_grad():\n",
     "    test_input_ids = test_encoding['input_ids'].to(device)\n",
     "    test_attention_mask = test_encoding['attention_mask'].to(device)\n",
-    "    outputs = model(test_input_ids, test_attention_mask)\n",
     "    predictions = torch.argmax(outputs, dim=1)\n",
     "    my_print(f'Predictions: {predictions}')"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
    "id": "befb94b5-88bf-40fc-8b26-cf373d1256e0",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2025-01-17T05:27:58.042752Z",
-     "iopub.status.busy": "2025-01-17T05:27:58.042151Z",
-     "iopub.status.idle": "2025-01-17T05:27:58.454054Z",
-     "shell.execute_reply": "2025-01-17T05:27:58.453644Z",
-     "shell.execute_reply.started": "2025-01-17T05:27:58.042662Z"
     }
    },
    "outputs": [
     {
-     "ename": "AttributeError",
-     "evalue": "'BertClassifier' object has no attribute 'push_to_hub'",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[12], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpush_to_hub\u001b[49m()\n",
-      "File \u001b[0;32m~/miniconda3/envs/py313/lib/python3.13/site-packages/torch/nn/modules/module.py:1931\u001b[0m, in \u001b[0;36mModule.__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m   1929\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m name \u001b[38;5;129;01min\u001b[39;00m modules:\n\u001b[1;32m   1930\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m modules[name]\n\u001b[0;32m-> 1931\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m(\n\u001b[1;32m   1932\u001b[0m     \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mtype\u001b[39m(\u001b[38;5;28mself\u001b[39m)\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m object has no attribute \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1933\u001b[0m )\n",
-      "\u001b[0;31mAttributeError\u001b[0m: 'BertClassifier' object has no attribute 'push_to_hub'"
-     ]
     }
    ],
    "source": [
-    "model.push_to_hub()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "251ef9ee-8ba3-495f-8fe6-a93aa63168ce",
    "metadata": {},
    "outputs": [],
    "source": []
@@ -505,6 +622,208 @@
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
    "version": "3.13.1"
   }
  },
  "nbformat": 4,

    "id": "73e72549-69f2-46b5-b0f5-655777139972",
    "metadata": {
     "execution": {
+     "iopub.execute_input": "2025-01-17T18:17:50.964659Z",
+     "iopub.status.busy": "2025-01-17T18:17:50.964450Z",
+     "iopub.status.idle": "2025-01-17T18:17:53.646932Z",
+     "shell.execute_reply": "2025-01-17T18:17:53.646697Z",
+     "shell.execute_reply.started": "2025-01-17T18:17:50.964637Z"
     }
    },
    "outputs": [],
     "import torch\n",
     "from torch import nn\n",
     "from transformers import BertTokenizer, BertModel\n",
+    "from huggingface_hub import PyTorchModelHubMixin, notebook_login\n",
     "from torch.utils.data import Dataset, DataLoader\n",
     "from datasets import load_dataset"
    ]
   {
    "cell_type": "code",
    "execution_count": 2,
+   "id": "07e0787e-c72b-41f3-baba-43cef3f8d6f8",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-01-17T18:17:53.648499Z",
+     "iopub.status.busy": "2025-01-17T18:17:53.648417Z",
+     "iopub.status.idle": "2025-01-17T18:17:53.650284Z",
+     "shell.execute_reply": "2025-01-17T18:17:53.650113Z",
+     "shell.execute_reply.started": "2025-01-17T18:17:53.648489Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "notebook_login(new_session=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
    "id": "d4b79fb9-5e70-4600-8885-94bc0a6e917c",
    "metadata": {
     "execution": {
+     "iopub.execute_input": "2025-01-17T18:35:15.421761Z",
+     "iopub.status.busy": "2025-01-17T18:35:15.421353Z",
+     "iopub.status.idle": "2025-01-17T18:35:15.433782Z",
+     "shell.execute_reply": "2025-01-17T18:35:15.433001Z",
+     "shell.execute_reply.started": "2025-01-17T18:35:15.421734Z"
     }
    },
    "outputs": [],
     "    time_str = datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\")\n",
     "    print(time_str, x)\n",
     "\n",
+    "class BertClassifier(nn.Module, PyTorchModelHubMixin):\n",
+    "    def __init__(self, num_labels=8, bert_variety='bert-base-uncased'):\n",
     "        super().__init__()\n",
     "        self.bert = BertModel.from_pretrained(bert_variety)\n",
     "        self.dropout = nn.Dropout(0.05)\n",
+    "        self.classifier = nn.Linear(self.bert.pooler.dense.out_features, num_labels)\n",
     "\n",
     "    def forward(self, input_ids, attention_mask):\n",
     "        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)\n",
     "        return logits\n",
     "\n",
     "class TextDataset(Dataset):\n",
+    "    def __init__(self, texts, labels, tokenizer, max_length=512):\n",
     "        self.encodings = tokenizer(\n",
     "            texts,\n",
     "            truncation=True,\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 4,
    "id": "07131bce-23ad-4787-8622-cce401f3e5ce",
    "metadata": {
     "execution": {
+     "iopub.execute_input": "2025-01-17T18:17:57.885732Z",
+     "iopub.status.busy": "2025-01-17T18:17:57.884455Z",
+     "iopub.status.idle": "2025-01-17T18:17:57.919509Z",
+     "shell.execute_reply": "2025-01-17T18:17:57.919081Z",
+     "shell.execute_reply.started": "2025-01-17T18:17:57.885667Z"
     }
    },
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 5,
    "id": "695bc080-bbd7-4937-af5b-50db1c936500",
    "metadata": {
     "execution": {
+     "iopub.execute_input": "2025-01-17T18:17:58.556031Z",
+     "iopub.status.busy": "2025-01-17T18:17:58.555349Z",
+     "iopub.status.idle": "2025-01-17T18:17:58.564519Z",
+     "shell.execute_reply": "2025-01-17T18:17:58.563640Z",
+     "shell.execute_reply.started": "2025-01-17T18:17:58.555979Z"
     }
    },
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 19,
    "id": "792fd13f-e7cc-4d90-832d-c0da15e193cd",
    "metadata": {
     "execution": {
+     "iopub.execute_input": "2025-01-17T15:22:41.286449Z",
+     "iopub.status.busy": "2025-01-17T15:22:41.285811Z",
+     "iopub.status.idle": "2025-01-17T15:24:35.507909Z",
+     "shell.execute_reply": "2025-01-17T15:24:35.506587Z",
+     "shell.execute_reply.started": "2025-01-17T15:22:41.286404Z"
     }
    },
    "outputs": [
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "2025-01-17 07:22:44 Starting epoch 1.\n",
+      "2025-01-17 07:23:21 Epoch 1/3 done, Average Loss: 1.8129\n",
+      "2025-01-17 07:23:58 Epoch 2/3 done, Average Loss: 1.3089\n",
+      "2025-01-17 07:24:35 Epoch 3/3 done, Average Loss: 0.8916\n"
      ]
     }
    ],
    "source": [
     "model, tokenizer = run_training(\n",
+    "    max_dataset_size=16 * 100,\n",
     "    bert_variety='bert-base-uncased',\n",
+    "    max_length=128,\n",
     "    num_epochs=3,\n",
     "    batch_size=32,\n",
     ")"
   },
   {
    "cell_type": "code",
+   "execution_count": 21,
    "id": "0aedfcca-843e-4f4c-8062-3e4625161bcc",
    "metadata": {
     "execution": {
+     "iopub.execute_input": "2025-01-17T15:24:46.754460Z",
+     "iopub.status.busy": "2025-01-17T15:24:46.753753Z",
+     "iopub.status.idle": "2025-01-17T15:24:47.249458Z",
+     "shell.execute_reply": "2025-01-17T15:24:47.249207Z",
+     "shell.execute_reply.started": "2025-01-17T15:24:46.754391Z"
     }
    },
    "outputs": [
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "2025-01-17 07:24:47 Predictions: tensor([0, 1, 3, 6, 2, 3, 6], device='mps:0')\n"
      ]
     }
    ],
   },
   {
    "cell_type": "code",
+   "execution_count": 12,
    "id": "28354e8c-886a-4523-8968-8c688c13f6a3",
    "metadata": {
     "execution": {
+     "iopub.execute_input": "2025-01-17T18:35:15.434902Z",
+     "iopub.status.busy": "2025-01-17T18:35:15.434668Z",
+     "iopub.status.idle": "2025-01-17T18:50:43.167167Z",
+     "shell.execute_reply": "2025-01-17T18:50:43.166720Z",
+     "shell.execute_reply.started": "2025-01-17T18:35:15.434880Z"
     }
    },
    "outputs": [
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "2025-01-17 10:35:20 Starting epoch 1.\n",
+      "2025-01-17 10:40:29 Epoch 1/3 done, Average Loss: 1.2876\n",
+      "2025-01-17 10:45:37 Epoch 2/3 done, Average Loss: 0.7289\n",
+      "2025-01-17 10:50:43 Epoch 3/3 done, Average Loss: 0.3990\n"
      ]
     }
    ],
     ")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "982ba556-c589-4cbb-b392-614942a64ab3",
+   "metadata": {},
+   "source": [
+    "# Model to upload"
+   ]
+  },
   {
    "cell_type": "code",
+   "execution_count": 6,
+   "id": "ac5f412c-a745-4327-9303-acf4c5b1efcd",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-01-17T18:19:11.590514Z",
+     "iopub.status.busy": "2025-01-17T18:19:11.589753Z",
+     "iopub.status.idle": "2025-01-17T18:26:45.645104Z",
+     "shell.execute_reply": "2025-01-17T18:26:45.644631Z",
+     "shell.execute_reply.started": "2025-01-17T18:19:11.590428Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2025-01-17 10:19:17 Starting epoch 1.\n",
+      "2025-01-17 10:21:47 Epoch 1/3 done, Average Loss: 1.2608\n",
+      "2025-01-17 10:24:16 Epoch 2/3 done, Average Loss: 0.7134\n",
+      "2025-01-17 10:26:45 Epoch 3/3 done, Average Loss: 0.3931\n"
+     ]
+    }
+   ],
+   "source": [
+    "model_final, tokenizer_final = run_training(\n",
+    "    max_dataset_size='full',\n",
+    "    bert_variety='bert-base-uncased',\n",
+    "    max_length=128,\n",
+    "    num_epochs=3,\n",
+    "    batch_size=16,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
    "id": "e3b099c6-6b98-473b-8797-5032213b9fcb",
    "metadata": {
     "execution": {
+     "iopub.execute_input": "2025-01-17T18:26:45.646178Z",
+     "iopub.status.busy": "2025-01-17T18:26:45.646081Z",
+     "iopub.status.idle": "2025-01-17T18:26:45.722052Z",
+     "shell.execute_reply": "2025-01-17T18:26:45.721803Z",
+     "shell.execute_reply.started": "2025-01-17T18:26:45.646168Z"
     }
    },
    "outputs": [
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "2025-01-17 10:26:45 Predictions: tensor([0, 0, 3, 1, 2, 4, 6], device='mps:0')\n"
      ]
     }
    ],
    "source": [
+    "model_final.eval()\n",
     "test_text = [\n",
     "    'This was a great experience!', # 0_not_relevant\n",
     "    'My favorite hike is Laguna de los Tres.', # 0_not_relevant\n",
     "    'Solar panels emit bad vibes.', # 4_solutions_harmful_unnecessary\n",
     "    'All those so-called scientists are Democrats.', # 6_proponents_biased\n",
     "]\n",
+    "test_encoding = tokenizer_final(\n",
     "    test_text,\n",
     "    truncation=True,\n",
     "    padding=True,\n",
     "with torch.no_grad():\n",
     "    test_input_ids = test_encoding['input_ids'].to(device)\n",
     "    test_attention_mask = test_encoding['attention_mask'].to(device)\n",
+    "    outputs = model_final(test_input_ids, test_attention_mask)\n",
     "    predictions = torch.argmax(outputs, dim=1)\n",
     "    my_print(f'Predictions: {predictions}')"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 10,
    "id": "befb94b5-88bf-40fc-8b26-cf373d1256e0",
    "metadata": {
     "execution": {
+     "iopub.execute_input": "2025-01-17T18:32:40.094019Z",
+     "iopub.status.busy": "2025-01-17T18:32:40.093429Z",
+     "iopub.status.idle": "2025-01-17T18:35:15.419578Z",
+     "shell.execute_reply": "2025-01-17T18:35:15.418848Z",
+     "shell.execute_reply.started": "2025-01-17T18:32:40.093970Z"
     }
    },
    "outputs": [
     {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "7dd2d0eb08624920b345ca85712f0169",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "CommitInfo(commit_url='https://huggingface.co/Nonnormalizable/frugal-ai-text-bert-base/commit/bd94aa1344798fcf671ddd5f8a7bd4f4dc0b20c4', commit_message='Push model using huggingface_hub.', commit_description='', oid='bd94aa1344798fcf671ddd5f8a7bd4f4dc0b20c4', pr_url=None, repo_url=RepoUrl('https://huggingface.co/Nonnormalizable/frugal-ai-text-bert-base', endpoint='https://huggingface.co', repo_type='model', repo_id='Nonnormalizable/frugal-ai-text-bert-base'), pr_revision=None, pr_num=None)"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
+    "model_final.push_to_hub('frugal-ai-text-bert-base')"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 9,
    "id": "251ef9ee-8ba3-495f-8fe6-a93aa63168ce",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-01-17T18:31:37.682978Z",
+     "iopub.status.busy": "2025-01-17T18:31:37.682009Z",
+     "iopub.status.idle": "2025-01-17T18:31:39.578706Z",
+     "shell.execute_reply": "2025-01-17T18:31:39.577664Z",
+     "shell.execute_reply.started": "2025-01-17T18:31:37.682910Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "b62ae26d30534f8fa6057824124e9c95",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "README.md:   0%|          | 0.00/320 [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "CommitInfo(commit_url='https://huggingface.co/Nonnormalizable/frugal-ai-text-bert-base/commit/9814436ad5f77cd8c607aa5dba9b67e7983e8ca7', commit_message='Upload tokenizer', commit_description='', oid='9814436ad5f77cd8c607aa5dba9b67e7983e8ca7', pr_url=None, repo_url=RepoUrl('https://huggingface.co/Nonnormalizable/frugal-ai-text-bert-base', endpoint='https://huggingface.co', repo_type='model', repo_id='Nonnormalizable/frugal-ai-text-bert-base'), pr_revision=None, pr_num=None)"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "tokenizer_final.push_to_hub('frugal-ai-text-bert-base')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "863d3553-89a6-4188-a8d0-eaa0b6bccb6c",
    "metadata": {},
    "outputs": [],
    "source": []
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
    "version": "3.13.1"
+  },
+  "widgets": {
+   "application/vnd.jupyter.widget-state+json": {
+    "state": {
+     "25776d7aede3476da6f33fc15fe300c8": {
+      "model_module": "@jupyter-widgets/controls",
+      "model_module_version": "2.0.0",
+      "model_name": "ProgressStyleModel",
+      "state": {
+       "description_width": ""
+      }
+     },
+     "3a03347251c644bd9b5f58bac49ba2b7": {
+      "model_module": "@jupyter-widgets/base",
+      "model_module_version": "2.0.0",
+      "model_name": "LayoutModel",
+      "state": {}
+     },
+     "3f7dd449d7f84420a836adb899c3b374": {
+      "model_module": "@jupyter-widgets/controls",
+      "model_module_version": "2.0.0",
+      "model_name": "HTMLStyleModel",
+      "state": {
+       "description_width": "",
+       "font_size": null,
+       "text_color": null
+      }
+     },
+     "47f3b8da36704934acf81f357a9da6c3": {
+      "model_module": "@jupyter-widgets/controls",
+      "model_module_version": "2.0.0",
+      "model_name": "FloatProgressModel",
+      "state": {
+       "bar_style": "success",
+       "layout": "IPY_MODEL_ae0e1835546645cd85915a133bd0b578",
+       "max": 437977072,
+       "style": "IPY_MODEL_25776d7aede3476da6f33fc15fe300c8",
+       "value": 437977072
+      }
+     },
+     "4eff913c8c554820b957c2192d04a8cd": {
+      "model_module": "@jupyter-widgets/controls",
+      "model_module_version": "2.0.0",
+      "model_name": "HTMLModel",
+      "state": {
+       "layout": "IPY_MODEL_54b8a0d455794f8881e6d9ceddcac787",
+       "style": "IPY_MODEL_3f7dd449d7f84420a836adb899c3b374",
+       "value": " 438M/438M [02:32&lt;00:00, 3.02MB/s]"
+      }
+     },
+     "54b8a0d455794f8881e6d9ceddcac787": {
+      "model_module": "@jupyter-widgets/base",
+      "model_module_version": "2.0.0",
+      "model_name": "LayoutModel",
+      "state": {}
+     },
+     "5c96c3617819467d9fb70aa3b716106e": {
+      "model_module": "@jupyter-widgets/base",
+      "model_module_version": "2.0.0",
+      "model_name": "LayoutModel",
+      "state": {}
+     },
+     "62f9a837c04142b5a2fd66097be6fb6e": {
+      "model_module": "@jupyter-widgets/base",
+      "model_module_version": "2.0.0",
+      "model_name": "LayoutModel",
+      "state": {}
+     },
+     "68c0e93ffde14a40b3599dff15512174": {
+      "model_module": "@jupyter-widgets/controls",
+      "model_module_version": "2.0.0",
+      "model_name": "HTMLStyleModel",
+      "state": {
+       "description_width": "",
+       "font_size": null,
+       "text_color": null
+      }
+     },
+     "6f679b19e9824e1cac8545d7244ec83a": {
+      "model_module": "@jupyter-widgets/controls",
+      "model_module_version": "2.0.0",
+      "model_name": "FloatProgressModel",
+      "state": {
+       "bar_style": "success",
+       "layout": "IPY_MODEL_9785d5bb51544986b4c51b63a39d46cf",
+       "max": 320,
+       "style": "IPY_MODEL_88bc5db626a242af8879201d263d9eef",
+       "value": 320
+      }
+     },
+     "7dd2d0eb08624920b345ca85712f0169": {
+      "model_module": "@jupyter-widgets/controls",
+      "model_module_version": "2.0.0",
+      "model_name": "HBoxModel",
+      "state": {
+       "children": [
+        "IPY_MODEL_bdca6adbcf2347729287c1d2dc44fa2e",
+        "IPY_MODEL_47f3b8da36704934acf81f357a9da6c3",
+        "IPY_MODEL_4eff913c8c554820b957c2192d04a8cd"
+       ],
+       "layout": "IPY_MODEL_3a03347251c644bd9b5f58bac49ba2b7"
+      }
+     },
+     "88bc5db626a242af8879201d263d9eef": {
+      "model_module": "@jupyter-widgets/controls",
+      "model_module_version": "2.0.0",
+      "model_name": "ProgressStyleModel",
+      "state": {
+       "description_width": ""
+      }
+     },
+     "9396575ac43b4832bb12e246801a2316": {
+      "model_module": "@jupyter-widgets/controls",
+      "model_module_version": "2.0.0",
+      "model_name": "HTMLModel",
+      "state": {
+       "layout": "IPY_MODEL_c16752a4cf734193accaae9835d55aab",
+       "style": "IPY_MODEL_c1b70a1ce9d149cf87169838a18f2e58",
+       "value": "README.md: 100%"
+      }
+     },
+     "9785d5bb51544986b4c51b63a39d46cf": {
+      "model_module": "@jupyter-widgets/base",
+      "model_module_version": "2.0.0",
+      "model_name": "LayoutModel",
+      "state": {}
+     },
+     "ae0e1835546645cd85915a133bd0b578": {
+      "model_module": "@jupyter-widgets/base",
+      "model_module_version": "2.0.0",
+      "model_name": "LayoutModel",
+      "state": {}
+     },
+     "b62ae26d30534f8fa6057824124e9c95": {
+      "model_module": "@jupyter-widgets/controls",
+      "model_module_version": "2.0.0",
+      "model_name": "HBoxModel",
+      "state": {
+       "children": [
+        "IPY_MODEL_9396575ac43b4832bb12e246801a2316",
+        "IPY_MODEL_6f679b19e9824e1cac8545d7244ec83a",
+        "IPY_MODEL_ce85ada4df3c41e9a9b35b7401cd1883"
+       ],
+       "layout": "IPY_MODEL_62f9a837c04142b5a2fd66097be6fb6e"
+      }
+     },
+     "bdca6adbcf2347729287c1d2dc44fa2e": {
+      "model_module": "@jupyter-widgets/controls",
+      "model_module_version": "2.0.0",
+      "model_name": "HTMLModel",
+      "state": {
+       "layout": "IPY_MODEL_5c96c3617819467d9fb70aa3b716106e",
+       "style": "IPY_MODEL_c18dc3ed330d4d97a0c9d7dba32a9217",
+       "value": "model.safetensors: 100%"
+      }
+     },
+     "c16752a4cf734193accaae9835d55aab": {
+      "model_module": "@jupyter-widgets/base",
+      "model_module_version": "2.0.0",
+      "model_name": "LayoutModel",
+      "state": {}
+     },
+     "c18dc3ed330d4d97a0c9d7dba32a9217": {
+      "model_module": "@jupyter-widgets/controls",
+      "model_module_version": "2.0.0",
+      "model_name": "HTMLStyleModel",
+      "state": {
+       "description_width": "",
+       "font_size": null,
+       "text_color": null
+      }
+     },
+     "c1b70a1ce9d149cf87169838a18f2e58": {
+      "model_module": "@jupyter-widgets/controls",
+      "model_module_version": "2.0.0",
+      "model_name": "HTMLStyleModel",
+      "state": {
+       "description_width": "",
+       "font_size": null,
+       "text_color": null
+      }
+     },
+     "ce85ada4df3c41e9a9b35b7401cd1883": {
+      "model_module": "@jupyter-widgets/controls",
+      "model_module_version": "2.0.0",
+      "model_name": "HTMLModel",
+      "state": {
+       "layout": "IPY_MODEL_dae692ab00184ab190368530f21dcad9",
+       "style": "IPY_MODEL_68c0e93ffde14a40b3599dff15512174",
+       "value": " 320/320 [00:00&lt;00:00, 21.4kB/s]"
+      }
+     },
+     "dae692ab00184ab190368530f21dcad9": {
+      "model_module": "@jupyter-widgets/base",
+      "model_module_version": "2.0.0",
+      "model_name": "LayoutModel",
+      "state": {}
+     }
+    },
+    "version_major": 2,
+    "version_minor": 0
+   }
   }
  },
  "nbformat": 4,

tasks/text.py CHANGED Viewed

@@ -3,15 +3,18 @@ from datetime import datetime
 from datasets import load_dataset
 from sklearn.metrics import accuracy_score
 import random
 from .utils.evaluation import TextEvaluationRequest
 from .utils.emissions import tracker, clean_emissions_data, get_space_info
 router = APIRouter()
-DESCRIPTION = "Most common class baseline"
 ROUTE = "/text"
 def baseline_model(dataset_length: int):
     # Make random predictions (placeholder for actual model inference)
     #predictions = [random.randint(0, 7) for _ in range(dataset_length)]
@@ -22,6 +25,40 @@ def baseline_model(dataset_length: int):
     return predictions
 @router.post(ROUTE, tags=["Text Task"],
              description=DESCRIPTION)
 async def evaluate_text(request: TextEvaluationRequest):
@@ -67,8 +104,9 @@ async def evaluate_text(request: TextEvaluationRequest):
     #--------------------------------------------------------------------------------------------
     true_labels = test_dataset["label"]
-    predictions = baseline_model(len(true_labels))
     #--------------------------------------------------------------------------------------------
     # YOUR MODEL INFERENCE STOPS HERE
     #--------------------------------------------------------------------------------------------

 from datasets import load_dataset
 from sklearn.metrics import accuracy_score
 import random
+import torch
+from transformers import AutoConfig, AutoModelForSequenceClassification, AutoTokenizer
 from .utils.evaluation import TextEvaluationRequest
 from .utils.emissions import tracker, clean_emissions_data, get_space_info
 router = APIRouter()
+DESCRIPTION = "bert base finetuned"
 ROUTE = "/text"
 def baseline_model(dataset_length: int):
     # Make random predictions (placeholder for actual model inference)
     #predictions = [random.randint(0, 7) for _ in range(dataset_length)]
     return predictions
+def bert_model(test_dataset):
+    print('Starting my code block.')
+    texts = test_dataset["quote"]
+    model_repo = 'Nonnormalizable/frugal-ai-text-bert-base'
+    config = AutoConfig.from_pretrained(model_repo)
+    model = AutoModelForSequenceClassification.from_pretrained(model_repo)
+    tokenizer = AutoTokenizer.from_pretrained(model_repo)
+    if torch.cuda.is_available():
+        device = torch.device('cuda')
+    else:
+        device = torch.device('cpu')
+    print('device:', device)
+    test_encoding = tokenizer(
+        texts,
+        truncation=True,
+        padding=True,
+        return_tensors='pt',
+        )
+    model.eval()
+    with torch.no_grad():
+        test_input_ids = test_encoding['input_ids'].to(device)
+        test_attention_mask = test_encoding['attention_mask'].to(device)
+        print('Starting model run.')
+        outputs = model(test_input_ids, test_attention_mask)
+        print('End of model run.')
+        predictions = torch.argmax(outputs.logits, dim=1)
+    print('End of my code block.')
+    return predictions
 @router.post(ROUTE, tags=["Text Task"],
              description=DESCRIPTION)
 async def evaluate_text(request: TextEvaluationRequest):
     #--------------------------------------------------------------------------------------------
     true_labels = test_dataset["label"]
+    #predictions = baseline_model(len(true_labels))
+    predictions = bert_model(test_dataset)
     #--------------------------------------------------------------------------------------------
     # YOUR MODEL INFERENCE STOPS HERE
     #--------------------------------------------------------------------------------------------