Nonnormalizable commited on
Commit
c329742
·
1 Parent(s): 7bc734f

Point submission at bert tiny.

Browse files
Files changed (2) hide show
  1. Finetune BERT.ipynb +566 -149
  2. tasks/text.py +2 -2
Finetune BERT.ipynb CHANGED
@@ -14,11 +14,11 @@
14
  "id": "73e72549-69f2-46b5-b0f5-655777139972",
15
  "metadata": {
16
  "execution": {
17
- "iopub.execute_input": "2025-01-21T19:25:48.302003Z",
18
- "iopub.status.busy": "2025-01-21T19:25:48.301808Z",
19
- "iopub.status.idle": "2025-01-21T19:25:50.698806Z",
20
- "shell.execute_reply": "2025-01-21T19:25:50.698535Z",
21
- "shell.execute_reply.started": "2025-01-21T19:25:48.301982Z"
22
  }
23
  },
24
  "outputs": [],
@@ -45,11 +45,11 @@
45
  "id": "07e0787e-c72b-41f3-baba-43cef3f8d6f8",
46
  "metadata": {
47
  "execution": {
48
- "iopub.execute_input": "2025-01-21T19:25:50.699344Z",
49
- "iopub.status.busy": "2025-01-21T19:25:50.699200Z",
50
- "iopub.status.idle": "2025-01-21T19:25:50.701241Z",
51
- "shell.execute_reply": "2025-01-21T19:25:50.700993Z",
52
- "shell.execute_reply.started": "2025-01-21T19:25:50.699335Z"
53
  }
54
  },
55
  "outputs": [],
@@ -67,15 +67,15 @@
67
  },
68
  {
69
  "cell_type": "code",
70
- "execution_count": 3,
71
  "id": "d4b79fb9-5e70-4600-8885-94bc0a6e917c",
72
  "metadata": {
73
  "execution": {
74
- "iopub.execute_input": "2025-01-21T19:25:50.701789Z",
75
- "iopub.status.busy": "2025-01-21T19:25:50.701708Z",
76
- "iopub.status.idle": "2025-01-21T19:25:50.707095Z",
77
- "shell.execute_reply": "2025-01-21T19:25:50.706788Z",
78
- "shell.execute_reply.started": "2025-01-21T19:25:50.701781Z"
79
  }
80
  },
81
  "outputs": [],
@@ -188,20 +188,20 @@
188
  " metrics = print_model_status(\n",
189
  " epoch, num_epochs, model, train_dataloader, test_dataloader\n",
190
  " )\n",
191
- " return metrics"
192
  ]
193
  },
194
  {
195
  "cell_type": "code",
196
- "execution_count": 4,
197
  "id": "07131bce-23ad-4787-8622-cce401f3e5ce",
198
  "metadata": {
199
  "execution": {
200
- "iopub.execute_input": "2025-01-21T19:25:50.707655Z",
201
- "iopub.status.busy": "2025-01-21T19:25:50.707519Z",
202
- "iopub.status.idle": "2025-01-21T19:25:50.718311Z",
203
- "shell.execute_reply": "2025-01-21T19:25:50.718037Z",
204
- "shell.execute_reply.started": "2025-01-21T19:25:50.707646Z"
205
  }
206
  },
207
  "outputs": [],
@@ -217,15 +217,15 @@
217
  },
218
  {
219
  "cell_type": "code",
220
- "execution_count": 5,
221
  "id": "695bc080-bbd7-4937-af5b-50db1c936500",
222
  "metadata": {
223
  "execution": {
224
- "iopub.execute_input": "2025-01-21T19:25:50.718754Z",
225
- "iopub.status.busy": "2025-01-21T19:25:50.718677Z",
226
- "iopub.status.idle": "2025-01-21T19:25:50.721834Z",
227
- "shell.execute_reply": "2025-01-21T19:25:50.721583Z",
228
- "shell.execute_reply.started": "2025-01-21T19:25:50.718746Z"
229
  }
230
  },
231
  "outputs": [],
@@ -321,57 +321,66 @@
321
  },
322
  {
323
  "cell_type": "code",
324
- "execution_count": null,
325
  "id": "34a7c310-c486-4db1-b94d-4363c3d3df5b",
326
  "metadata": {
327
  "execution": {
328
- "iopub.execute_input": "2025-01-21T19:25:50.724036Z",
329
- "iopub.status.busy": "2025-01-21T19:25:50.723968Z"
 
 
 
330
  }
331
  },
332
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
333
  "source": [
334
  "model, tokenizer, regime, metrics = run_training(\n",
335
- " max_dataset_size=16 * 10,\n",
336
  " bert_variety=\"google/bert_uncased_L-2_H-128_A-2\",\n",
337
  " max_length=128,\n",
338
- " num_epochs=4,\n",
339
  " batch_size=32,\n",
340
  ")"
341
  ]
342
  },
343
  {
344
  "cell_type": "code",
345
- "execution_count": null,
346
- "id": "32abaa1b-11f4-4793-97b8-36bb2dc29d56",
347
- "metadata": {},
348
- "outputs": [],
349
- "source": [
350
- "regime"
351
- ]
352
- },
353
- {
354
- "cell_type": "code",
355
- "execution_count": null,
356
- "id": "fe108690-bcc1-4667-9f8e-907a1a8ac2ec",
357
- "metadata": {},
358
- "outputs": [],
359
- "source": [
360
- "metrics"
361
- ]
362
- },
363
- {
364
- "cell_type": "code",
365
- "execution_count": null,
366
  "id": "0aedfcca-843e-4f4c-8062-3e4625161bcc",
367
  "metadata": {
368
  "editable": true,
 
 
 
 
 
 
 
369
  "slideshow": {
370
  "slide_type": ""
371
  },
372
  "tags": []
373
  },
374
- "outputs": [],
 
 
 
 
 
 
 
 
375
  "source": [
376
  "model.eval()\n",
377
  "test_text = [\n",
@@ -436,15 +445,15 @@
436
  },
437
  {
438
  "cell_type": "code",
439
- "execution_count": 32,
440
  "id": "37794952-703c-466c-9d26-ee6cb2834246",
441
  "metadata": {
442
  "execution": {
443
- "iopub.execute_input": "2025-01-21T18:35:29.897653Z",
444
- "iopub.status.busy": "2025-01-21T18:35:29.897020Z",
445
- "iopub.status.idle": "2025-01-21T18:35:29.901748Z",
446
- "shell.execute_reply": "2025-01-21T18:35:29.901032Z",
447
- "shell.execute_reply.started": "2025-01-21T18:35:29.897609Z"
448
  }
449
  },
450
  "outputs": [],
@@ -459,15 +468,15 @@
459
  },
460
  {
461
  "cell_type": "code",
462
- "execution_count": 34,
463
  "id": "28354e8c-886a-4523-8968-8c688c13f6a3",
464
  "metadata": {
465
  "execution": {
466
- "iopub.execute_input": "2025-01-21T18:42:35.614137Z",
467
- "iopub.status.busy": "2025-01-21T18:42:35.613694Z",
468
- "iopub.status.idle": "2025-01-21T18:45:35.341816Z",
469
- "shell.execute_reply": "2025-01-21T18:45:35.341535Z",
470
- "shell.execute_reply.started": "2025-01-21T18:42:35.614111Z"
471
  }
472
  },
473
  "outputs": [
@@ -475,22 +484,22 @@
475
  "name": "stdout",
476
  "output_type": "stream",
477
  "text": [
478
- "2025-01-21 10:43:44 Epoch 0/15 done. Loss: Train 2.177, Test 2.172; and Acc: Train 0.063, Test 0.071\n",
479
- "2025-01-21 10:43:52 Epoch 1/15 done. Loss: Train 1.786, Test 1.823; and Acc: Train 0.383, Test 0.354\n",
480
- "2025-01-21 10:44:00 Epoch 2/15 done. Loss: Train 1.579, Test 1.628; and Acc: Train 0.465, Test 0.436\n",
481
- "2025-01-21 10:44:07 Epoch 3/15 done. Loss: Train 1.431, Test 1.498; and Acc: Train 0.510, Test 0.484\n",
482
- "2025-01-21 10:44:14 Epoch 4/15 done. Loss: Train 1.304, Test 1.402; and Acc: Train 0.555, Test 0.515\n",
483
- "2025-01-21 10:44:22 Epoch 5/15 done. Loss: Train 1.212, Test 1.339; and Acc: Train 0.585, Test 0.535\n",
484
- "2025-01-21 10:44:29 Epoch 6/15 done. Loss: Train 1.128, Test 1.288; and Acc: Train 0.611, Test 0.546\n",
485
- "2025-01-21 10:44:36 Epoch 7/15 done. Loss: Train 1.039, Test 1.241; and Acc: Train 0.643, Test 0.559\n",
486
- "2025-01-21 10:44:44 Epoch 8/15 done. Loss: Train 1.003, Test 1.236; and Acc: Train 0.665, Test 0.555\n",
487
- "2025-01-21 10:44:51 Epoch 9/15 done. Loss: Train 0.897, Test 1.183; and Acc: Train 0.708, Test 0.568\n",
488
- "2025-01-21 10:44:58 Epoch 10/15 done. Loss: Train 0.852, Test 1.187; and Acc: Train 0.724, Test 0.572\n",
489
- "2025-01-21 10:45:06 Epoch 11/15 done. Loss: Train 0.769, Test 1.154; and Acc: Train 0.755, Test 0.581\n",
490
- "2025-01-21 10:45:13 Epoch 12/15 done. Loss: Train 0.764, Test 1.197; and Acc: Train 0.752, Test 0.573\n",
491
- "2025-01-21 10:45:20 Epoch 13/15 done. Loss: Train 0.660, Test 1.153; and Acc: Train 0.797, Test 0.590\n",
492
- "2025-01-21 10:45:28 Epoch 14/15 done. Loss: Train 0.588, Test 1.143; and Acc: Train 0.820, Test 0.594\n",
493
- "2025-01-21 10:45:35 Epoch 15/15 done. Loss: Train 0.579, Test 1.200; and Acc: Train 0.822, Test 0.575\n"
494
  ]
495
  }
496
  ],
@@ -511,25 +520,232 @@
511
  },
512
  {
513
  "cell_type": "code",
514
- "execution_count": 35,
515
  "id": "ec2516f9-79f2-4ae1-ab9a-9a51a7a50587",
516
  "metadata": {
517
  "execution": {
518
- "iopub.execute_input": "2025-01-21T18:57:29.278360Z",
519
- "iopub.status.busy": "2025-01-21T18:57:29.276985Z",
520
- "iopub.status.idle": "2025-01-21T18:57:29.289810Z",
521
- "shell.execute_reply": "2025-01-21T18:57:29.288574Z",
522
- "shell.execute_reply.started": "2025-01-21T18:57:29.278315Z"
523
  },
524
  "scrolled": true
525
  },
526
  "outputs": [
527
  {
528
- "ename": "SyntaxError",
529
- "evalue": "invalid syntax. Perhaps you forgot a comma? (3495586751.py, line 4)",
530
- "output_type": "error",
531
- "traceback": [
532
- "\u001b[0;36m Cell \u001b[0;32mIn[35], line 4\u001b[0;36m\u001b[0m\n\u001b[0;31m base_model=static_hyperparams[],\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax. Perhaps you forgot a comma?\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
533
  ]
534
  }
535
  ],
@@ -553,16 +769,8 @@
553
  " shared_by=\"Andre Bach\",\n",
554
  " model_type=\"Text classification\",\n",
555
  " repo=model_and_repo_name,\n",
556
- " training_regime=dict(\n",
557
- " max_dataset_size=\"full\",\n",
558
- " bert_variety=\"bert-base-uncased\",\n",
559
- " max_length=256,\n",
560
- " num_epochs=3,\n",
561
- " batch_size=16,\n",
562
- " ),\n",
563
- " testing_metrics=dict(\n",
564
- " loss_train=0.154, loss_test=0.978, acc_train=0.959, acc_test=0.705\n",
565
- " ),\n",
566
  ")\n",
567
  "# print(card_data.to_yaml())\n",
568
  "print(card)"
@@ -570,15 +778,15 @@
570
  },
571
  {
572
  "cell_type": "code",
573
- "execution_count": 17,
574
  "id": "29d3bbf9-ab2a-48e2-a550-e16da5025720",
575
  "metadata": {
576
  "execution": {
577
- "iopub.execute_input": "2025-01-20T22:11:59.827681Z",
578
- "iopub.status.busy": "2025-01-20T22:11:59.827001Z",
579
- "iopub.status.idle": "2025-01-20T22:11:59.831852Z",
580
- "shell.execute_reply": "2025-01-20T22:11:59.831047Z",
581
- "shell.execute_reply.started": "2025-01-20T22:11:59.827635Z"
582
  }
583
  },
584
  "outputs": [],
@@ -589,15 +797,15 @@
589
  },
590
  {
591
  "cell_type": "code",
592
- "execution_count": 18,
593
  "id": "e3b099c6-6b98-473b-8797-5032213b9fcb",
594
  "metadata": {
595
  "execution": {
596
- "iopub.execute_input": "2025-01-20T22:12:00.576369Z",
597
- "iopub.status.busy": "2025-01-20T22:12:00.575421Z",
598
- "iopub.status.idle": "2025-01-20T22:12:01.065512Z",
599
- "shell.execute_reply": "2025-01-20T22:12:01.065237Z",
600
- "shell.execute_reply.started": "2025-01-20T22:12:00.576294Z"
601
  }
602
  },
603
  "outputs": [
@@ -605,7 +813,7 @@
605
  "name": "stdout",
606
  "output_type": "stream",
607
  "text": [
608
- "2025-01-20 14:12:01 Predictions: tensor([0, 0, 3, 6, 2, 4, 6], device='mps:0')\n"
609
  ]
610
  }
611
  ],
@@ -638,27 +846,27 @@
638
  },
639
  {
640
  "cell_type": "code",
641
- "execution_count": 19,
642
  "id": "befb94b5-88bf-40fc-8b26-cf373d1256e0",
643
  "metadata": {
644
  "execution": {
645
- "iopub.execute_input": "2025-01-20T22:12:15.099356Z",
646
- "iopub.status.busy": "2025-01-20T22:12:15.098818Z",
647
- "iopub.status.idle": "2025-01-20T22:12:33.175760Z",
648
- "shell.execute_reply": "2025-01-20T22:12:33.174719Z",
649
- "shell.execute_reply.started": "2025-01-20T22:12:15.099315Z"
650
  }
651
  },
652
  "outputs": [
653
  {
654
  "data": {
655
  "application/vnd.jupyter.widget-view+json": {
656
- "model_id": "fbc09ae2c5614831a2fb02fa48a44fd1",
657
  "version_major": 2,
658
  "version_minor": 0
659
  },
660
  "text/plain": [
661
- "model.safetensors: 0%| | 0.00/438M [00:00<?, ?B/s]"
662
  ]
663
  },
664
  "metadata": {},
@@ -667,74 +875,88 @@
667
  {
668
  "data": {
669
  "text/plain": [
670
- "CommitInfo(commit_url='https://huggingface.co/Nonnormalizable/frugal-ai-text-bert-base/commit/bdc2daf80d9647566ef56297f2cdc32f898170df', commit_message='Push model using huggingface_hub.', commit_description='', oid='bdc2daf80d9647566ef56297f2cdc32f898170df', pr_url=None, repo_url=RepoUrl('https://huggingface.co/Nonnormalizable/frugal-ai-text-bert-base', endpoint='https://huggingface.co', repo_type='model', repo_id='Nonnormalizable/frugal-ai-text-bert-base'), pr_revision=None, pr_num=None)"
671
  ]
672
  },
673
- "execution_count": 19,
674
  "metadata": {},
675
  "output_type": "execute_result"
676
  }
677
  ],
678
  "source": [
679
- "model_final.push_to_hub(\"frugal-ai-text-bert-base\")"
680
  ]
681
  },
682
  {
683
  "cell_type": "code",
684
- "execution_count": 20,
685
  "id": "251ef9ee-8ba3-495f-8fe6-a93aa63168ce",
686
  "metadata": {
687
  "execution": {
688
- "iopub.execute_input": "2025-01-20T22:12:33.178424Z",
689
- "iopub.status.busy": "2025-01-20T22:12:33.178028Z",
690
- "iopub.status.idle": "2025-01-20T22:12:34.321979Z",
691
- "shell.execute_reply": "2025-01-20T22:12:34.320974Z",
692
- "shell.execute_reply.started": "2025-01-20T22:12:33.178397Z"
693
  }
694
  },
695
  "outputs": [
696
  {
697
  "data": {
 
 
 
 
 
698
  "text/plain": [
699
- "CommitInfo(commit_url='https://huggingface.co/Nonnormalizable/frugal-ai-text-bert-base/commit/9081285a20fa0d62c5c1580aa17884de2b3bc236', commit_message='Upload tokenizer', commit_description='', oid='9081285a20fa0d62c5c1580aa17884de2b3bc236', pr_url=None, repo_url=RepoUrl('https://huggingface.co/Nonnormalizable/frugal-ai-text-bert-base', endpoint='https://huggingface.co', repo_type='model', repo_id='Nonnormalizable/frugal-ai-text-bert-base'), pr_revision=None, pr_num=None)"
700
  ]
701
  },
702
- "execution_count": 20,
 
 
 
 
 
 
 
 
 
703
  "metadata": {},
704
  "output_type": "execute_result"
705
  }
706
  ],
707
  "source": [
708
- "tokenizer_final.push_to_hub(\"frugal-ai-text-bert-base\")"
709
  ]
710
  },
711
  {
712
  "cell_type": "code",
713
- "execution_count": 21,
714
  "id": "863d3553-89a6-4188-a8d0-eaa0b6bccb6c",
715
  "metadata": {
716
  "execution": {
717
- "iopub.execute_input": "2025-01-20T22:12:34.324003Z",
718
- "iopub.status.busy": "2025-01-20T22:12:34.323725Z",
719
- "iopub.status.idle": "2025-01-20T22:12:35.350962Z",
720
- "shell.execute_reply": "2025-01-20T22:12:35.350482Z",
721
- "shell.execute_reply.started": "2025-01-20T22:12:34.323976Z"
722
  }
723
  },
724
  "outputs": [
725
  {
726
  "data": {
727
  "text/plain": [
728
- "CommitInfo(commit_url='https://huggingface.co/Nonnormalizable/frugal-ai-text-bert-base/commit/b3078a95ea36d71c1d1bf0d153e069b83f74bddf', commit_message='Upload README.md with huggingface_hub', commit_description='', oid='b3078a95ea36d71c1d1bf0d153e069b83f74bddf', pr_url=None, repo_url=RepoUrl('https://huggingface.co/Nonnormalizable/frugal-ai-text-bert-base', endpoint='https://huggingface.co', repo_type='model', repo_id='Nonnormalizable/frugal-ai-text-bert-base'), pr_revision=None, pr_num=None)"
729
  ]
730
  },
731
- "execution_count": 21,
732
  "metadata": {},
733
  "output_type": "execute_result"
734
  }
735
  ],
736
  "source": [
737
- "card.push_to_hub(\"Nonnormalizable/frugal-ai-text-bert-base\")"
738
  ]
739
  },
740
  {
@@ -766,7 +988,202 @@
766
  },
767
  "widgets": {
768
  "application/vnd.jupyter.widget-state+json": {
769
- "state": {},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
770
  "version_major": 2,
771
  "version_minor": 0
772
  }
 
14
  "id": "73e72549-69f2-46b5-b0f5-655777139972",
15
  "metadata": {
16
  "execution": {
17
+ "iopub.execute_input": "2025-01-22T00:15:28.938894Z",
18
+ "iopub.status.busy": "2025-01-22T00:15:28.938077Z",
19
+ "iopub.status.idle": "2025-01-22T00:15:34.317293Z",
20
+ "shell.execute_reply": "2025-01-22T00:15:34.316942Z",
21
+ "shell.execute_reply.started": "2025-01-22T00:15:28.938839Z"
22
  }
23
  },
24
  "outputs": [],
 
45
  "id": "07e0787e-c72b-41f3-baba-43cef3f8d6f8",
46
  "metadata": {
47
  "execution": {
48
+ "iopub.execute_input": "2025-01-22T00:15:34.318082Z",
49
+ "iopub.status.busy": "2025-01-22T00:15:34.317923Z",
50
+ "iopub.status.idle": "2025-01-22T00:15:34.320079Z",
51
+ "shell.execute_reply": "2025-01-22T00:15:34.319851Z",
52
+ "shell.execute_reply.started": "2025-01-22T00:15:34.318073Z"
53
  }
54
  },
55
  "outputs": [],
 
67
  },
68
  {
69
  "cell_type": "code",
70
+ "execution_count": 15,
71
  "id": "d4b79fb9-5e70-4600-8885-94bc0a6e917c",
72
  "metadata": {
73
  "execution": {
74
+ "iopub.execute_input": "2025-01-22T00:18:10.466025Z",
75
+ "iopub.status.busy": "2025-01-22T00:18:10.465289Z",
76
+ "iopub.status.idle": "2025-01-22T00:18:10.482505Z",
77
+ "shell.execute_reply": "2025-01-22T00:18:10.481605Z",
78
+ "shell.execute_reply.started": "2025-01-22T00:18:10.465973Z"
79
  }
80
  },
81
  "outputs": [],
 
188
  " metrics = print_model_status(\n",
189
  " epoch, num_epochs, model, train_dataloader, test_dataloader\n",
190
  " )\n",
191
+ " return metrics"
192
  ]
193
  },
194
  {
195
  "cell_type": "code",
196
+ "execution_count": 16,
197
  "id": "07131bce-23ad-4787-8622-cce401f3e5ce",
198
  "metadata": {
199
  "execution": {
200
+ "iopub.execute_input": "2025-01-22T00:18:10.964716Z",
201
+ "iopub.status.busy": "2025-01-22T00:18:10.963608Z",
202
+ "iopub.status.idle": "2025-01-22T00:18:10.971834Z",
203
+ "shell.execute_reply": "2025-01-22T00:18:10.970949Z",
204
+ "shell.execute_reply.started": "2025-01-22T00:18:10.964671Z"
205
  }
206
  },
207
  "outputs": [],
 
217
  },
218
  {
219
  "cell_type": "code",
220
+ "execution_count": 17,
221
  "id": "695bc080-bbd7-4937-af5b-50db1c936500",
222
  "metadata": {
223
  "execution": {
224
+ "iopub.execute_input": "2025-01-22T00:18:11.117610Z",
225
+ "iopub.status.busy": "2025-01-22T00:18:11.117201Z",
226
+ "iopub.status.idle": "2025-01-22T00:18:11.128421Z",
227
+ "shell.execute_reply": "2025-01-22T00:18:11.127145Z",
228
+ "shell.execute_reply.started": "2025-01-22T00:18:11.117580Z"
229
  }
230
  },
231
  "outputs": [],
 
321
  },
322
  {
323
  "cell_type": "code",
324
+ "execution_count": 21,
325
  "id": "34a7c310-c486-4db1-b94d-4363c3d3df5b",
326
  "metadata": {
327
  "execution": {
328
+ "iopub.execute_input": "2025-01-22T00:18:31.584691Z",
329
+ "iopub.status.busy": "2025-01-22T00:18:31.584113Z",
330
+ "iopub.status.idle": "2025-01-22T00:18:38.462642Z",
331
+ "shell.execute_reply": "2025-01-22T00:18:38.462384Z",
332
+ "shell.execute_reply.started": "2025-01-22T00:18:31.584650Z"
333
  }
334
  },
335
+ "outputs": [
336
+ {
337
+ "name": "stdout",
338
+ "output_type": "stream",
339
+ "text": [
340
+ "2025-01-21 19:18:35 Epoch 0/3 done. Loss: Train 2.184, Test 2.190; and Acc: Train 0.131, Test 0.129\n",
341
+ "2025-01-21 19:18:36 Epoch 1/3 done. Loss: Train 1.979, Test 2.002; and Acc: Train 0.244, Test 0.222\n",
342
+ "2025-01-21 19:18:37 Epoch 2/3 done. Loss: Train 1.915, Test 1.949; and Acc: Train 0.277, Test 0.258\n",
343
+ "2025-01-21 19:18:38 Epoch 3/3 done. Loss: Train 1.873, Test 1.917; and Acc: Train 0.276, Test 0.259\n"
344
+ ]
345
+ }
346
+ ],
347
  "source": [
348
  "model, tokenizer, regime, metrics = run_training(\n",
349
+ " max_dataset_size=16 * 100,\n",
350
  " bert_variety=\"google/bert_uncased_L-2_H-128_A-2\",\n",
351
  " max_length=128,\n",
352
+ " num_epochs=3,\n",
353
  " batch_size=32,\n",
354
  ")"
355
  ]
356
  },
357
  {
358
  "cell_type": "code",
359
+ "execution_count": 23,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
360
  "id": "0aedfcca-843e-4f4c-8062-3e4625161bcc",
361
  "metadata": {
362
  "editable": true,
363
+ "execution": {
364
+ "iopub.execute_input": "2025-01-22T00:18:46.417009Z",
365
+ "iopub.status.busy": "2025-01-22T00:18:46.416419Z",
366
+ "iopub.status.idle": "2025-01-22T00:18:46.529320Z",
367
+ "shell.execute_reply": "2025-01-22T00:18:46.529078Z",
368
+ "shell.execute_reply.started": "2025-01-22T00:18:46.416962Z"
369
+ },
370
  "slideshow": {
371
  "slide_type": ""
372
  },
373
  "tags": []
374
  },
375
+ "outputs": [
376
+ {
377
+ "name": "stdout",
378
+ "output_type": "stream",
379
+ "text": [
380
+ "2025-01-21 19:18:46 Predictions: tensor([0, 0, 0, 0, 0, 0, 0], device='mps:0')\n"
381
+ ]
382
+ }
383
+ ],
384
  "source": [
385
  "model.eval()\n",
386
  "test_text = [\n",
 
445
  },
446
  {
447
  "cell_type": "code",
448
+ "execution_count": 24,
449
  "id": "37794952-703c-466c-9d26-ee6cb2834246",
450
  "metadata": {
451
  "execution": {
452
+ "iopub.execute_input": "2025-01-22T00:19:05.789872Z",
453
+ "iopub.status.busy": "2025-01-22T00:19:05.789108Z",
454
+ "iopub.status.idle": "2025-01-22T00:19:05.796074Z",
455
+ "shell.execute_reply": "2025-01-22T00:19:05.794974Z",
456
+ "shell.execute_reply.started": "2025-01-22T00:19:05.789815Z"
457
  }
458
  },
459
  "outputs": [],
 
468
  },
469
  {
470
  "cell_type": "code",
471
+ "execution_count": 25,
472
  "id": "28354e8c-886a-4523-8968-8c688c13f6a3",
473
  "metadata": {
474
  "execution": {
475
+ "iopub.execute_input": "2025-01-22T00:19:06.183379Z",
476
+ "iopub.status.busy": "2025-01-22T00:19:06.182544Z",
477
+ "iopub.status.idle": "2025-01-22T00:21:02.201321Z",
478
+ "shell.execute_reply": "2025-01-22T00:21:02.201016Z",
479
+ "shell.execute_reply.started": "2025-01-22T00:19:06.183320Z"
480
  }
481
  },
482
  "outputs": [
 
484
  "name": "stdout",
485
  "output_type": "stream",
486
  "text": [
487
+ "2025-01-21 19:19:11 Epoch 0/15 done. Loss: Train 2.055, Test 2.058; and Acc: Train 0.189, Test 0.191\n",
488
+ "2025-01-21 19:19:19 Epoch 1/15 done. Loss: Train 1.772, Test 1.805; and Acc: Train 0.354, Test 0.321\n",
489
+ "2025-01-21 19:19:26 Epoch 2/15 done. Loss: Train 1.530, Test 1.578; and Acc: Train 0.468, Test 0.446\n",
490
+ "2025-01-21 19:19:33 Epoch 3/15 done. Loss: Train 1.373, Test 1.437; and Acc: Train 0.518, Test 0.500\n",
491
+ "2025-01-21 19:19:41 Epoch 4/15 done. Loss: Train 1.254, Test 1.353; and Acc: Train 0.572, Test 0.541\n",
492
+ "2025-01-21 19:19:48 Epoch 5/15 done. Loss: Train 1.159, Test 1.289; and Acc: Train 0.597, Test 0.568\n",
493
+ "2025-01-21 19:19:55 Epoch 6/15 done. Loss: Train 1.068, Test 1.241; and Acc: Train 0.634, Test 0.567\n",
494
+ "2025-01-21 19:20:03 Epoch 7/15 done. Loss: Train 0.988, Test 1.199; and Acc: Train 0.668, Test 0.589\n",
495
+ "2025-01-21 19:20:10 Epoch 8/15 done. Loss: Train 0.911, Test 1.176; and Acc: Train 0.700, Test 0.587\n",
496
+ "2025-01-21 19:20:18 Epoch 9/15 done. Loss: Train 0.858, Test 1.169; and Acc: Train 0.721, Test 0.587\n",
497
+ "2025-01-21 19:20:25 Epoch 10/15 done. Loss: Train 0.782, Test 1.151; and Acc: Train 0.747, Test 0.599\n",
498
+ "2025-01-21 19:20:32 Epoch 11/15 done. Loss: Train 0.717, Test 1.143; and Acc: Train 0.771, Test 0.604\n",
499
+ "2025-01-21 19:20:40 Epoch 12/15 done. Loss: Train 0.657, Test 1.135; and Acc: Train 0.794, Test 0.610\n",
500
+ "2025-01-21 19:20:47 Epoch 13/15 done. Loss: Train 0.612, Test 1.147; and Acc: Train 0.819, Test 0.597\n",
501
+ "2025-01-21 19:20:54 Epoch 14/15 done. Loss: Train 0.553, Test 1.152; and Acc: Train 0.835, Test 0.599\n",
502
+ "2025-01-21 19:21:02 Epoch 15/15 done. Loss: Train 0.509, Test 1.166; and Acc: Train 0.857, Test 0.597\n"
503
  ]
504
  }
505
  ],
 
520
  },
521
  {
522
  "cell_type": "code",
523
+ "execution_count": 28,
524
  "id": "ec2516f9-79f2-4ae1-ab9a-9a51a7a50587",
525
  "metadata": {
526
  "execution": {
527
+ "iopub.execute_input": "2025-01-22T00:23:23.018234Z",
528
+ "iopub.status.busy": "2025-01-22T00:23:23.017592Z",
529
+ "iopub.status.idle": "2025-01-22T00:23:23.049365Z",
530
+ "shell.execute_reply": "2025-01-22T00:23:23.048870Z",
531
+ "shell.execute_reply.started": "2025-01-22T00:23:23.018186Z"
532
  },
533
  "scrolled": true
534
  },
535
  "outputs": [
536
  {
537
+ "name": "stdout",
538
+ "output_type": "stream",
539
+ "text": [
540
+ "---\n",
541
+ "base_model: google/bert_uncased_L-2_H-128_A-2\n",
542
+ "datasets:\n",
543
+ "- QuotaClimat/frugalaichallenge-text-train\n",
544
+ "language:\n",
545
+ "- en\n",
546
+ "license: apache-2.0\n",
547
+ "model_name: frugal-ai-text-bert-tiny\n",
548
+ "pipeline_tag: text-classification\n",
549
+ "tags:\n",
550
+ "- model_hub_mixin\n",
551
+ "- pytorch_model_hub_mixin\n",
552
+ "- climate\n",
553
+ "---\n",
554
+ "\n",
555
+ "# Model Card for Model ID\n",
556
+ "\n",
557
+ "<!-- Provide a quick summary of what the model is/does. -->\n",
558
+ "\n",
559
+ "Classify text into 8 categories of climate misinformation.\n",
560
+ "\n",
561
+ "## Model Details\n",
562
+ "\n",
563
+ "### Model Description\n",
564
+ "\n",
565
+ "<!-- Provide a longer summary of what this model is. -->\n",
566
+ "\n",
567
+ "Fine trained BERT for classifying climate information as part of the Frugal AI Challenge, for submission to https://huggingface.co/frugal-ai-challenge and scoring on accuracy and efficiency. Trainied on only the non-evaluation 80% of the data, so it's (non-cheating) score will be lower.\n",
568
+ "\n",
569
+ "- **Developed by:** Andre Bach\n",
570
+ "- **Funded by [optional]:** N/A\n",
571
+ "- **Shared by [optional]:** Andre Bach\n",
572
+ "- **Model type:** Text classification\n",
573
+ "- **Language(s) (NLP):** ['en']\n",
574
+ "- **License:** apache-2.0\n",
575
+ "- **Finetuned from model [optional]:** google/bert_uncased_L-2_H-128_A-2\n",
576
+ "\n",
577
+ "### Model Sources [optional]\n",
578
+ "\n",
579
+ "<!-- Provide the basic links for the model. -->\n",
580
+ "\n",
581
+ "- **Repository:** frugal-ai-text-bert-tiny\n",
582
+ "- **Paper [optional]:** [More Information Needed]\n",
583
+ "- **Demo [optional]:** [More Information Needed]\n",
584
+ "\n",
585
+ "## Uses\n",
586
+ "\n",
587
+ "<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->\n",
588
+ "\n",
589
+ "### Direct Use\n",
590
+ "\n",
591
+ "<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->\n",
592
+ "\n",
593
+ "[More Information Needed]\n",
594
+ "\n",
595
+ "### Downstream Use [optional]\n",
596
+ "\n",
597
+ "<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->\n",
598
+ "\n",
599
+ "[More Information Needed]\n",
600
+ "\n",
601
+ "### Out-of-Scope Use\n",
602
+ "\n",
603
+ "<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->\n",
604
+ "\n",
605
+ "[More Information Needed]\n",
606
+ "\n",
607
+ "## Bias, Risks, and Limitations\n",
608
+ "\n",
609
+ "<!-- This section is meant to convey both technical and sociotechnical limitations. -->\n",
610
+ "\n",
611
+ "[More Information Needed]\n",
612
+ "\n",
613
+ "### Recommendations\n",
614
+ "\n",
615
+ "<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->\n",
616
+ "\n",
617
+ "Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.\n",
618
+ "\n",
619
+ "## How to Get Started with the Model\n",
620
+ "\n",
621
+ "Use the code below to get started with the model.\n",
622
+ "\n",
623
+ "[More Information Needed]\n",
624
+ "\n",
625
+ "## Training Details\n",
626
+ "\n",
627
+ "### Training Data\n",
628
+ "\n",
629
+ "<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->\n",
630
+ "\n",
631
+ "[More Information Needed]\n",
632
+ "\n",
633
+ "### Training Procedure\n",
634
+ "\n",
635
+ "<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->\n",
636
+ "\n",
637
+ "#### Preprocessing [optional]\n",
638
+ "\n",
639
+ "[More Information Needed]\n",
640
+ "\n",
641
+ "\n",
642
+ "#### Training Hyperparameters\n",
643
+ "\n",
644
+ "- **Training regime:** {'max_dataset_size': 'full', 'bert_variety': 'google/bert_uncased_L-2_H-128_A-2', 'max_length': 256, 'num_epochs': 15, 'batch_size': 16} <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->\n",
645
+ "\n",
646
+ "#### Speeds, Sizes, Times [optional]\n",
647
+ "\n",
648
+ "<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->\n",
649
+ "\n",
650
+ "[More Information Needed]\n",
651
+ "\n",
652
+ "## Evaluation\n",
653
+ "\n",
654
+ "<!-- This section describes the evaluation protocols and provides the results. -->\n",
655
+ "\n",
656
+ "### Testing Data, Factors & Metrics\n",
657
+ "\n",
658
+ "#### Testing Data\n",
659
+ "\n",
660
+ "<!-- This should link to a Dataset Card if possible. -->\n",
661
+ "\n",
662
+ "[More Information Needed]\n",
663
+ "\n",
664
+ "#### Factors\n",
665
+ "\n",
666
+ "<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->\n",
667
+ "\n",
668
+ "[More Information Needed]\n",
669
+ "\n",
670
+ "#### Metrics\n",
671
+ "\n",
672
+ "<!-- These are the evaluation metrics being used, ideally with a description of why. -->\n",
673
+ "\n",
674
+ "{'train_loss': 0.5085738757594687, 'train_acc': 0.8565270935960592, 'test_loss': 1.1659069603139705, 'test_acc': 0.5972108285479901}\n",
675
+ "\n",
676
+ "### Results\n",
677
+ "\n",
678
+ "[More Information Needed]\n",
679
+ "\n",
680
+ "#### Summary\n",
681
+ "\n",
682
+ "\n",
683
+ "\n",
684
+ "## Model Examination [optional]\n",
685
+ "\n",
686
+ "<!-- Relevant interpretability work for the model goes here -->\n",
687
+ "\n",
688
+ "[More Information Needed]\n",
689
+ "\n",
690
+ "## Environmental Impact\n",
691
+ "\n",
692
+ "<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->\n",
693
+ "\n",
694
+ "Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).\n",
695
+ "\n",
696
+ "- **Hardware Type:** [More Information Needed]\n",
697
+ "- **Hours used:** [More Information Needed]\n",
698
+ "- **Cloud Provider:** [More Information Needed]\n",
699
+ "- **Compute Region:** [More Information Needed]\n",
700
+ "- **Carbon Emitted:** [More Information Needed]\n",
701
+ "\n",
702
+ "## Technical Specifications [optional]\n",
703
+ "\n",
704
+ "### Model Architecture and Objective\n",
705
+ "\n",
706
+ "[More Information Needed]\n",
707
+ "\n",
708
+ "### Compute Infrastructure\n",
709
+ "\n",
710
+ "[More Information Needed]\n",
711
+ "\n",
712
+ "#### Hardware\n",
713
+ "\n",
714
+ "[More Information Needed]\n",
715
+ "\n",
716
+ "#### Software\n",
717
+ "\n",
718
+ "[More Information Needed]\n",
719
+ "\n",
720
+ "## Citation [optional]\n",
721
+ "\n",
722
+ "<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->\n",
723
+ "\n",
724
+ "**BibTeX:**\n",
725
+ "\n",
726
+ "[More Information Needed]\n",
727
+ "\n",
728
+ "**APA:**\n",
729
+ "\n",
730
+ "[More Information Needed]\n",
731
+ "\n",
732
+ "## Glossary [optional]\n",
733
+ "\n",
734
+ "<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->\n",
735
+ "\n",
736
+ "[More Information Needed]\n",
737
+ "\n",
738
+ "## More Information [optional]\n",
739
+ "\n",
740
+ "[More Information Needed]\n",
741
+ "\n",
742
+ "## Model Card Authors [optional]\n",
743
+ "\n",
744
+ "[More Information Needed]\n",
745
+ "\n",
746
+ "## Model Card Contact\n",
747
+ "\n",
748
+ "[More Information Needed]\n"
749
  ]
750
  }
751
  ],
 
769
  " shared_by=\"Andre Bach\",\n",
770
  " model_type=\"Text classification\",\n",
771
  " repo=model_and_repo_name,\n",
772
+ " training_regime=training_regime,\n",
773
+ " testing_metrics=testing_metrics,\n",
 
 
 
 
 
 
 
 
774
  ")\n",
775
  "# print(card_data.to_yaml())\n",
776
  "print(card)"
 
778
  },
779
  {
780
  "cell_type": "code",
781
+ "execution_count": 30,
782
  "id": "29d3bbf9-ab2a-48e2-a550-e16da5025720",
783
  "metadata": {
784
  "execution": {
785
+ "iopub.execute_input": "2025-01-22T00:23:51.131078Z",
786
+ "iopub.status.busy": "2025-01-22T00:23:51.130578Z",
787
+ "iopub.status.idle": "2025-01-22T00:23:51.135440Z",
788
+ "shell.execute_reply": "2025-01-22T00:23:51.134263Z",
789
+ "shell.execute_reply.started": "2025-01-22T00:23:51.131042Z"
790
  }
791
  },
792
  "outputs": [],
 
797
  },
798
  {
799
  "cell_type": "code",
800
+ "execution_count": 34,
801
  "id": "e3b099c6-6b98-473b-8797-5032213b9fcb",
802
  "metadata": {
803
  "execution": {
804
+ "iopub.execute_input": "2025-01-22T00:24:18.616547Z",
805
+ "iopub.status.busy": "2025-01-22T00:24:18.615990Z",
806
+ "iopub.status.idle": "2025-01-22T00:24:18.669435Z",
807
+ "shell.execute_reply": "2025-01-22T00:24:18.669063Z",
808
+ "shell.execute_reply.started": "2025-01-22T00:24:18.616509Z"
809
  }
810
  },
811
  "outputs": [
 
813
  "name": "stdout",
814
  "output_type": "stream",
815
  "text": [
816
+ "2025-01-21 19:24:18 Predictions: tensor([0, 0, 3, 1, 2, 6, 6], device='mps:0')\n"
817
  ]
818
  }
819
  ],
 
846
  },
847
  {
848
  "cell_type": "code",
849
+ "execution_count": 35,
850
  "id": "befb94b5-88bf-40fc-8b26-cf373d1256e0",
851
  "metadata": {
852
  "execution": {
853
+ "iopub.execute_input": "2025-01-22T00:24:41.153062Z",
854
+ "iopub.status.busy": "2025-01-22T00:24:41.152049Z",
855
+ "iopub.status.idle": "2025-01-22T00:24:43.436376Z",
856
+ "shell.execute_reply": "2025-01-22T00:24:43.435250Z",
857
+ "shell.execute_reply.started": "2025-01-22T00:24:41.153018Z"
858
  }
859
  },
860
  "outputs": [
861
  {
862
  "data": {
863
  "application/vnd.jupyter.widget-view+json": {
864
+ "model_id": "ef4fd0b071034f7d9cba6aa0ab69d148",
865
  "version_major": 2,
866
  "version_minor": 0
867
  },
868
  "text/plain": [
869
+ "model.safetensors: 0%| | 0.00/17.6M [00:00<?, ?B/s]"
870
  ]
871
  },
872
  "metadata": {},
 
875
  {
876
  "data": {
877
  "text/plain": [
878
+ "CommitInfo(commit_url='https://huggingface.co/Nonnormalizable/frugal-ai-text-bert-tiny/commit/512fbc46e1cfc7456f4e9f2331a30d66a9052d88', commit_message='Push model using huggingface_hub.', commit_description='', oid='512fbc46e1cfc7456f4e9f2331a30d66a9052d88', pr_url=None, repo_url=RepoUrl('https://huggingface.co/Nonnormalizable/frugal-ai-text-bert-tiny', endpoint='https://huggingface.co', repo_type='model', repo_id='Nonnormalizable/frugal-ai-text-bert-tiny'), pr_revision=None, pr_num=None)"
879
  ]
880
  },
881
+ "execution_count": 35,
882
  "metadata": {},
883
  "output_type": "execute_result"
884
  }
885
  ],
886
  "source": [
887
+ "model_final.push_to_hub(model_and_repo_name)"
888
  ]
889
  },
890
  {
891
  "cell_type": "code",
892
+ "execution_count": 36,
893
  "id": "251ef9ee-8ba3-495f-8fe6-a93aa63168ce",
894
  "metadata": {
895
  "execution": {
896
+ "iopub.execute_input": "2025-01-22T00:24:48.887758Z",
897
+ "iopub.status.busy": "2025-01-22T00:24:48.887178Z",
898
+ "iopub.status.idle": "2025-01-22T00:24:49.581460Z",
899
+ "shell.execute_reply": "2025-01-22T00:24:49.580127Z",
900
+ "shell.execute_reply.started": "2025-01-22T00:24:48.887716Z"
901
  }
902
  },
903
  "outputs": [
904
  {
905
  "data": {
906
+ "application/vnd.jupyter.widget-view+json": {
907
+ "model_id": "f3f6ee50ec314983a492ab7f4f5ef1bc",
908
+ "version_major": 2,
909
+ "version_minor": 0
910
+ },
911
  "text/plain": [
912
+ "README.md: 0%| | 0.00/320 [00:00<?, ?B/s]"
913
  ]
914
  },
915
+ "metadata": {},
916
+ "output_type": "display_data"
917
+ },
918
+ {
919
+ "data": {
920
+ "text/plain": [
921
+ "CommitInfo(commit_url='https://huggingface.co/Nonnormalizable/frugal-ai-text-bert-tiny/commit/1870eebdb54a6a636d7dd32a5d923abc8d1baaec', commit_message='Upload tokenizer', commit_description='', oid='1870eebdb54a6a636d7dd32a5d923abc8d1baaec', pr_url=None, repo_url=RepoUrl('https://huggingface.co/Nonnormalizable/frugal-ai-text-bert-tiny', endpoint='https://huggingface.co', repo_type='model', repo_id='Nonnormalizable/frugal-ai-text-bert-tiny'), pr_revision=None, pr_num=None)"
922
+ ]
923
+ },
924
+ "execution_count": 36,
925
  "metadata": {},
926
  "output_type": "execute_result"
927
  }
928
  ],
929
  "source": [
930
+ "tokenizer_final.push_to_hub(model_and_repo_name)"
931
  ]
932
  },
933
  {
934
  "cell_type": "code",
935
+ "execution_count": 38,
936
  "id": "863d3553-89a6-4188-a8d0-eaa0b6bccb6c",
937
  "metadata": {
938
  "execution": {
939
+ "iopub.execute_input": "2025-01-22T00:25:24.402856Z",
940
+ "iopub.status.busy": "2025-01-22T00:25:24.402275Z",
941
+ "iopub.status.idle": "2025-01-22T00:25:25.011133Z",
942
+ "shell.execute_reply": "2025-01-22T00:25:25.009553Z",
943
+ "shell.execute_reply.started": "2025-01-22T00:25:24.402817Z"
944
  }
945
  },
946
  "outputs": [
947
  {
948
  "data": {
949
  "text/plain": [
950
+ "CommitInfo(commit_url='https://huggingface.co/Nonnormalizable/frugal-ai-text-bert-tiny/commit/62d658978d97614f10c3d69b6595a0fb6b8a2d4c', commit_message='Upload README.md with huggingface_hub', commit_description='', oid='62d658978d97614f10c3d69b6595a0fb6b8a2d4c', pr_url=None, repo_url=RepoUrl('https://huggingface.co/Nonnormalizable/frugal-ai-text-bert-tiny', endpoint='https://huggingface.co', repo_type='model', repo_id='Nonnormalizable/frugal-ai-text-bert-tiny'), pr_revision=None, pr_num=None)"
951
  ]
952
  },
953
+ "execution_count": 38,
954
  "metadata": {},
955
  "output_type": "execute_result"
956
  }
957
  ],
958
  "source": [
959
+ "card.push_to_hub(f\"Nonnormalizable/{model_and_repo_name}\")"
960
  ]
961
  },
962
  {
 
988
  },
989
  "widgets": {
990
  "application/vnd.jupyter.widget-state+json": {
991
+ "state": {
992
+ "05dc5be1c7754da082079d8c8fdd3a2a": {
993
+ "model_module": "@jupyter-widgets/controls",
994
+ "model_module_version": "2.0.0",
995
+ "model_name": "HTMLStyleModel",
996
+ "state": {
997
+ "description_width": "",
998
+ "font_size": null,
999
+ "text_color": null
1000
+ }
1001
+ },
1002
+ "07fc7cde058940a29aba80b9b1f18247": {
1003
+ "model_module": "@jupyter-widgets/base",
1004
+ "model_module_version": "2.0.0",
1005
+ "model_name": "LayoutModel",
1006
+ "state": {}
1007
+ },
1008
+ "14a18f1cb1f244b28fa35c504023c27d": {
1009
+ "model_module": "@jupyter-widgets/controls",
1010
+ "model_module_version": "2.0.0",
1011
+ "model_name": "HTMLModel",
1012
+ "state": {
1013
+ "layout": "IPY_MODEL_07fc7cde058940a29aba80b9b1f18247",
1014
+ "style": "IPY_MODEL_05dc5be1c7754da082079d8c8fdd3a2a",
1015
+ "value": "model.safetensors: 100%"
1016
+ }
1017
+ },
1018
+ "1aedcea94f5a43919f4b0dd14eb54ab0": {
1019
+ "model_module": "@jupyter-widgets/controls",
1020
+ "model_module_version": "2.0.0",
1021
+ "model_name": "FloatProgressModel",
1022
+ "state": {
1023
+ "bar_style": "success",
1024
+ "layout": "IPY_MODEL_3a0167722aae49eb87c0cc1a0b381a7c",
1025
+ "max": 320,
1026
+ "style": "IPY_MODEL_75738a2d58574271b291cf982f86074a",
1027
+ "value": 320
1028
+ }
1029
+ },
1030
+ "350a27df5da248b3b06dbad6b28bc789": {
1031
+ "model_module": "@jupyter-widgets/controls",
1032
+ "model_module_version": "2.0.0",
1033
+ "model_name": "FloatProgressModel",
1034
+ "state": {
1035
+ "bar_style": "success",
1036
+ "layout": "IPY_MODEL_eaf57ba3273f42d4900ce2ebae398892",
1037
+ "max": 17552376,
1038
+ "style": "IPY_MODEL_aa86a038e70b43368dd5ccb65b653f86",
1039
+ "value": 17552376
1040
+ }
1041
+ },
1042
+ "3a0167722aae49eb87c0cc1a0b381a7c": {
1043
+ "model_module": "@jupyter-widgets/base",
1044
+ "model_module_version": "2.0.0",
1045
+ "model_name": "LayoutModel",
1046
+ "state": {}
1047
+ },
1048
+ "40d60079070e4bd2b81fbcfd695cd759": {
1049
+ "model_module": "@jupyter-widgets/base",
1050
+ "model_module_version": "2.0.0",
1051
+ "model_name": "LayoutModel",
1052
+ "state": {}
1053
+ },
1054
+ "605845a520ee426fa2330ded19d8a617": {
1055
+ "model_module": "@jupyter-widgets/controls",
1056
+ "model_module_version": "2.0.0",
1057
+ "model_name": "HTMLStyleModel",
1058
+ "state": {
1059
+ "description_width": "",
1060
+ "font_size": null,
1061
+ "text_color": null
1062
+ }
1063
+ },
1064
+ "7402f6bafb5f485e9f172cdf017572c7": {
1065
+ "model_module": "@jupyter-widgets/base",
1066
+ "model_module_version": "2.0.0",
1067
+ "model_name": "LayoutModel",
1068
+ "state": {}
1069
+ },
1070
+ "75738a2d58574271b291cf982f86074a": {
1071
+ "model_module": "@jupyter-widgets/controls",
1072
+ "model_module_version": "2.0.0",
1073
+ "model_name": "ProgressStyleModel",
1074
+ "state": {
1075
+ "description_width": ""
1076
+ }
1077
+ },
1078
+ "7a56b64874594d50bb09ee06bb656e54": {
1079
+ "model_module": "@jupyter-widgets/controls",
1080
+ "model_module_version": "2.0.0",
1081
+ "model_name": "HTMLStyleModel",
1082
+ "state": {
1083
+ "description_width": "",
1084
+ "font_size": null,
1085
+ "text_color": null
1086
+ }
1087
+ },
1088
+ "7bc93085c8f54642b6005f0f701772fa": {
1089
+ "model_module": "@jupyter-widgets/controls",
1090
+ "model_module_version": "2.0.0",
1091
+ "model_name": "HTMLStyleModel",
1092
+ "state": {
1093
+ "description_width": "",
1094
+ "font_size": null,
1095
+ "text_color": null
1096
+ }
1097
+ },
1098
+ "8bdfbdb11197418d894a54ec6d487906": {
1099
+ "model_module": "@jupyter-widgets/controls",
1100
+ "model_module_version": "2.0.0",
1101
+ "model_name": "HTMLModel",
1102
+ "state": {
1103
+ "layout": "IPY_MODEL_ae882ab226794053a4f906ac3b5e49b9",
1104
+ "style": "IPY_MODEL_605845a520ee426fa2330ded19d8a617",
1105
+ "value": " 17.6M/17.6M [00:00&lt;00:00, 27.8MB/s]"
1106
+ }
1107
+ },
1108
+ "9a5aac888e72447fa2f379cd3e25a11c": {
1109
+ "model_module": "@jupyter-widgets/base",
1110
+ "model_module_version": "2.0.0",
1111
+ "model_name": "LayoutModel",
1112
+ "state": {}
1113
+ },
1114
+ "aa86a038e70b43368dd5ccb65b653f86": {
1115
+ "model_module": "@jupyter-widgets/controls",
1116
+ "model_module_version": "2.0.0",
1117
+ "model_name": "ProgressStyleModel",
1118
+ "state": {
1119
+ "description_width": ""
1120
+ }
1121
+ },
1122
+ "acfe7063ca0f4ea6acd6f308071f4418": {
1123
+ "model_module": "@jupyter-widgets/controls",
1124
+ "model_module_version": "2.0.0",
1125
+ "model_name": "HTMLModel",
1126
+ "state": {
1127
+ "layout": "IPY_MODEL_9a5aac888e72447fa2f379cd3e25a11c",
1128
+ "style": "IPY_MODEL_7bc93085c8f54642b6005f0f701772fa",
1129
+ "value": "README.md: 100%"
1130
+ }
1131
+ },
1132
+ "ae882ab226794053a4f906ac3b5e49b9": {
1133
+ "model_module": "@jupyter-widgets/base",
1134
+ "model_module_version": "2.0.0",
1135
+ "model_name": "LayoutModel",
1136
+ "state": {}
1137
+ },
1138
+ "d84082e3eb2e4ababf340e8262c5489c": {
1139
+ "model_module": "@jupyter-widgets/base",
1140
+ "model_module_version": "2.0.0",
1141
+ "model_name": "LayoutModel",
1142
+ "state": {}
1143
+ },
1144
+ "eaf57ba3273f42d4900ce2ebae398892": {
1145
+ "model_module": "@jupyter-widgets/base",
1146
+ "model_module_version": "2.0.0",
1147
+ "model_name": "LayoutModel",
1148
+ "state": {}
1149
+ },
1150
+ "ef4fd0b071034f7d9cba6aa0ab69d148": {
1151
+ "model_module": "@jupyter-widgets/controls",
1152
+ "model_module_version": "2.0.0",
1153
+ "model_name": "HBoxModel",
1154
+ "state": {
1155
+ "children": [
1156
+ "IPY_MODEL_14a18f1cb1f244b28fa35c504023c27d",
1157
+ "IPY_MODEL_350a27df5da248b3b06dbad6b28bc789",
1158
+ "IPY_MODEL_8bdfbdb11197418d894a54ec6d487906"
1159
+ ],
1160
+ "layout": "IPY_MODEL_7402f6bafb5f485e9f172cdf017572c7"
1161
+ }
1162
+ },
1163
+ "f3f6ee50ec314983a492ab7f4f5ef1bc": {
1164
+ "model_module": "@jupyter-widgets/controls",
1165
+ "model_module_version": "2.0.0",
1166
+ "model_name": "HBoxModel",
1167
+ "state": {
1168
+ "children": [
1169
+ "IPY_MODEL_acfe7063ca0f4ea6acd6f308071f4418",
1170
+ "IPY_MODEL_1aedcea94f5a43919f4b0dd14eb54ab0",
1171
+ "IPY_MODEL_f8f939982d7542969718ad06c692c488"
1172
+ ],
1173
+ "layout": "IPY_MODEL_d84082e3eb2e4ababf340e8262c5489c"
1174
+ }
1175
+ },
1176
+ "f8f939982d7542969718ad06c692c488": {
1177
+ "model_module": "@jupyter-widgets/controls",
1178
+ "model_module_version": "2.0.0",
1179
+ "model_name": "HTMLModel",
1180
+ "state": {
1181
+ "layout": "IPY_MODEL_40d60079070e4bd2b81fbcfd695cd759",
1182
+ "style": "IPY_MODEL_7a56b64874594d50bb09ee06bb656e54",
1183
+ "value": " 320/320 [00:00&lt;00:00, 25.4kB/s]"
1184
+ }
1185
+ }
1186
+ },
1187
  "version_major": 2,
1188
  "version_minor": 0
1189
  }
tasks/text.py CHANGED
@@ -18,7 +18,7 @@ DESCRIPTIONS = {
18
  "bert-medium": "to be implemented",
19
  "bert-small": "to be implemented",
20
  "bert-mini": "to be implemented",
21
- "bert-tiny": "to be implemented",
22
  }
23
 
24
  ROUTE = "/text"
@@ -73,7 +73,7 @@ def bert_model(test_dataset: dict, model_type: str):
73
  @router.post(ROUTE, tags=["Text Task"])
74
  async def evaluate_text(
75
  request: TextEvaluationRequest,
76
- model_type: str = "bert-base",
77
  # This should be an API query parameter, but it looks like the submission repo
78
  # https://huggingface.co/spaces/frugal-ai-challenge/submission-portal
79
  # is built in a way to not accept any other endpoints or parameters.
 
18
  "bert-medium": "to be implemented",
19
  "bert-small": "to be implemented",
20
  "bert-mini": "to be implemented",
21
+ "bert-tiny": "bert tiny finetuned",
22
  }
23
 
24
  ROUTE = "/text"
 
73
  @router.post(ROUTE, tags=["Text Task"])
74
  async def evaluate_text(
75
  request: TextEvaluationRequest,
76
+ model_type: str = "bert-tiny",
77
  # This should be an API query parameter, but it looks like the submission repo
78
  # https://huggingface.co/spaces/frugal-ai-challenge/submission-portal
79
  # is built in a way to not accept any other endpoints or parameters.