Nonnormalizable commited on
Commit
ec64986
·
1 Parent(s): c329742

Store model config.

Browse files
Files changed (1) hide show
  1. Finetune BERT.ipynb +146 -529
Finetune BERT.ipynb CHANGED
@@ -14,11 +14,11 @@
14
  "id": "73e72549-69f2-46b5-b0f5-655777139972",
15
  "metadata": {
16
  "execution": {
17
- "iopub.execute_input": "2025-01-22T00:15:28.938894Z",
18
- "iopub.status.busy": "2025-01-22T00:15:28.938077Z",
19
- "iopub.status.idle": "2025-01-22T00:15:34.317293Z",
20
- "shell.execute_reply": "2025-01-22T00:15:34.316942Z",
21
- "shell.execute_reply.started": "2025-01-22T00:15:28.938839Z"
22
  }
23
  },
24
  "outputs": [],
@@ -45,11 +45,11 @@
45
  "id": "07e0787e-c72b-41f3-baba-43cef3f8d6f8",
46
  "metadata": {
47
  "execution": {
48
- "iopub.execute_input": "2025-01-22T00:15:34.318082Z",
49
- "iopub.status.busy": "2025-01-22T00:15:34.317923Z",
50
- "iopub.status.idle": "2025-01-22T00:15:34.320079Z",
51
- "shell.execute_reply": "2025-01-22T00:15:34.319851Z",
52
- "shell.execute_reply.started": "2025-01-22T00:15:34.318073Z"
53
  }
54
  },
55
  "outputs": [],
@@ -67,15 +67,15 @@
67
  },
68
  {
69
  "cell_type": "code",
70
- "execution_count": 15,
71
  "id": "d4b79fb9-5e70-4600-8885-94bc0a6e917c",
72
  "metadata": {
73
  "execution": {
74
- "iopub.execute_input": "2025-01-22T00:18:10.466025Z",
75
- "iopub.status.busy": "2025-01-22T00:18:10.465289Z",
76
- "iopub.status.idle": "2025-01-22T00:18:10.482505Z",
77
- "shell.execute_reply": "2025-01-22T00:18:10.481605Z",
78
- "shell.execute_reply.started": "2025-01-22T00:18:10.465973Z"
79
  }
80
  },
81
  "outputs": [],
@@ -131,6 +131,8 @@
131
  " def __init__(self, num_labels=8, bert_variety=\"bert-base-uncased\"):\n",
132
  " super().__init__()\n",
133
  " self.bert = BertModel.from_pretrained(bert_variety)\n",
 
 
134
  " self.dropout = nn.Dropout(0.05)\n",
135
  " self.classifier = nn.Linear(self.bert.pooler.dense.out_features, num_labels)\n",
136
  "\n",
@@ -193,15 +195,15 @@
193
  },
194
  {
195
  "cell_type": "code",
196
- "execution_count": 16,
197
  "id": "07131bce-23ad-4787-8622-cce401f3e5ce",
198
  "metadata": {
199
  "execution": {
200
- "iopub.execute_input": "2025-01-22T00:18:10.964716Z",
201
- "iopub.status.busy": "2025-01-22T00:18:10.963608Z",
202
- "iopub.status.idle": "2025-01-22T00:18:10.971834Z",
203
- "shell.execute_reply": "2025-01-22T00:18:10.970949Z",
204
- "shell.execute_reply.started": "2025-01-22T00:18:10.964671Z"
205
  }
206
  },
207
  "outputs": [],
@@ -217,15 +219,15 @@
217
  },
218
  {
219
  "cell_type": "code",
220
- "execution_count": 17,
221
  "id": "695bc080-bbd7-4937-af5b-50db1c936500",
222
  "metadata": {
223
  "execution": {
224
- "iopub.execute_input": "2025-01-22T00:18:11.117610Z",
225
- "iopub.status.busy": "2025-01-22T00:18:11.117201Z",
226
- "iopub.status.idle": "2025-01-22T00:18:11.128421Z",
227
- "shell.execute_reply": "2025-01-22T00:18:11.127145Z",
228
- "shell.execute_reply.started": "2025-01-22T00:18:11.117580Z"
229
  }
230
  },
231
  "outputs": [],
@@ -321,15 +323,15 @@
321
  },
322
  {
323
  "cell_type": "code",
324
- "execution_count": 21,
325
  "id": "34a7c310-c486-4db1-b94d-4363c3d3df5b",
326
  "metadata": {
327
  "execution": {
328
- "iopub.execute_input": "2025-01-22T00:18:31.584691Z",
329
- "iopub.status.busy": "2025-01-22T00:18:31.584113Z",
330
- "iopub.status.idle": "2025-01-22T00:18:38.462642Z",
331
- "shell.execute_reply": "2025-01-22T00:18:38.462384Z",
332
- "shell.execute_reply.started": "2025-01-22T00:18:31.584650Z"
333
  }
334
  },
335
  "outputs": [
@@ -337,10 +339,10 @@
337
  "name": "stdout",
338
  "output_type": "stream",
339
  "text": [
340
- "2025-01-21 19:18:35 Epoch 0/3 done. Loss: Train 2.184, Test 2.190; and Acc: Train 0.131, Test 0.129\n",
341
- "2025-01-21 19:18:36 Epoch 1/3 done. Loss: Train 1.979, Test 2.002; and Acc: Train 0.244, Test 0.222\n",
342
- "2025-01-21 19:18:37 Epoch 2/3 done. Loss: Train 1.915, Test 1.949; and Acc: Train 0.277, Test 0.258\n",
343
- "2025-01-21 19:18:38 Epoch 3/3 done. Loss: Train 1.873, Test 1.917; and Acc: Train 0.276, Test 0.259\n"
344
  ]
345
  }
346
  ],
@@ -356,16 +358,16 @@
356
  },
357
  {
358
  "cell_type": "code",
359
- "execution_count": 23,
360
  "id": "0aedfcca-843e-4f4c-8062-3e4625161bcc",
361
  "metadata": {
362
  "editable": true,
363
  "execution": {
364
- "iopub.execute_input": "2025-01-22T00:18:46.417009Z",
365
- "iopub.status.busy": "2025-01-22T00:18:46.416419Z",
366
- "iopub.status.idle": "2025-01-22T00:18:46.529320Z",
367
- "shell.execute_reply": "2025-01-22T00:18:46.529078Z",
368
- "shell.execute_reply.started": "2025-01-22T00:18:46.416962Z"
369
  },
370
  "slideshow": {
371
  "slide_type": ""
@@ -377,7 +379,7 @@
377
  "name": "stdout",
378
  "output_type": "stream",
379
  "text": [
380
- "2025-01-21 19:18:46 Predictions: tensor([0, 0, 0, 0, 0, 0, 0], device='mps:0')\n"
381
  ]
382
  }
383
  ],
@@ -427,7 +429,11 @@
427
  "acc 0.954, energy 0.736 Wh, emissions 0.272 gco2eq\n",
428
  "\n",
429
  "[bert-base some hp tuning](https://huggingface.co/datasets/frugal-ai-challenge/public-leaderboard-text/blob/main/submissions/Nonnormalizable_20250120_231350.json):\\\n",
430
- "acc 0.707, energy 0.803 Wh, emissions 0.296 gco2eq\n"
 
 
 
 
431
  ]
432
  },
433
  {
@@ -445,15 +451,15 @@
445
  },
446
  {
447
  "cell_type": "code",
448
- "execution_count": 24,
449
  "id": "37794952-703c-466c-9d26-ee6cb2834246",
450
  "metadata": {
451
  "execution": {
452
- "iopub.execute_input": "2025-01-22T00:19:05.789872Z",
453
- "iopub.status.busy": "2025-01-22T00:19:05.789108Z",
454
- "iopub.status.idle": "2025-01-22T00:19:05.796074Z",
455
- "shell.execute_reply": "2025-01-22T00:19:05.794974Z",
456
- "shell.execute_reply.started": "2025-01-22T00:19:05.789815Z"
457
  }
458
  },
459
  "outputs": [],
@@ -468,15 +474,15 @@
468
  },
469
  {
470
  "cell_type": "code",
471
- "execution_count": 25,
472
  "id": "28354e8c-886a-4523-8968-8c688c13f6a3",
473
  "metadata": {
474
  "execution": {
475
- "iopub.execute_input": "2025-01-22T00:19:06.183379Z",
476
- "iopub.status.busy": "2025-01-22T00:19:06.182544Z",
477
- "iopub.status.idle": "2025-01-22T00:21:02.201321Z",
478
- "shell.execute_reply": "2025-01-22T00:21:02.201016Z",
479
- "shell.execute_reply.started": "2025-01-22T00:19:06.183320Z"
480
  }
481
  },
482
  "outputs": [
@@ -484,22 +490,22 @@
484
  "name": "stdout",
485
  "output_type": "stream",
486
  "text": [
487
- "2025-01-21 19:19:11 Epoch 0/15 done. Loss: Train 2.055, Test 2.058; and Acc: Train 0.189, Test 0.191\n",
488
- "2025-01-21 19:19:19 Epoch 1/15 done. Loss: Train 1.772, Test 1.805; and Acc: Train 0.354, Test 0.321\n",
489
- "2025-01-21 19:19:26 Epoch 2/15 done. Loss: Train 1.530, Test 1.578; and Acc: Train 0.468, Test 0.446\n",
490
- "2025-01-21 19:19:33 Epoch 3/15 done. Loss: Train 1.373, Test 1.437; and Acc: Train 0.518, Test 0.500\n",
491
- "2025-01-21 19:19:41 Epoch 4/15 done. Loss: Train 1.254, Test 1.353; and Acc: Train 0.572, Test 0.541\n",
492
- "2025-01-21 19:19:48 Epoch 5/15 done. Loss: Train 1.159, Test 1.289; and Acc: Train 0.597, Test 0.568\n",
493
- "2025-01-21 19:19:55 Epoch 6/15 done. Loss: Train 1.068, Test 1.241; and Acc: Train 0.634, Test 0.567\n",
494
- "2025-01-21 19:20:03 Epoch 7/15 done. Loss: Train 0.988, Test 1.199; and Acc: Train 0.668, Test 0.589\n",
495
- "2025-01-21 19:20:10 Epoch 8/15 done. Loss: Train 0.911, Test 1.176; and Acc: Train 0.700, Test 0.587\n",
496
- "2025-01-21 19:20:18 Epoch 9/15 done. Loss: Train 0.858, Test 1.169; and Acc: Train 0.721, Test 0.587\n",
497
- "2025-01-21 19:20:25 Epoch 10/15 done. Loss: Train 0.782, Test 1.151; and Acc: Train 0.747, Test 0.599\n",
498
- "2025-01-21 19:20:32 Epoch 11/15 done. Loss: Train 0.717, Test 1.143; and Acc: Train 0.771, Test 0.604\n",
499
- "2025-01-21 19:20:40 Epoch 12/15 done. Loss: Train 0.657, Test 1.135; and Acc: Train 0.794, Test 0.610\n",
500
- "2025-01-21 19:20:47 Epoch 13/15 done. Loss: Train 0.612, Test 1.147; and Acc: Train 0.819, Test 0.597\n",
501
- "2025-01-21 19:20:54 Epoch 14/15 done. Loss: Train 0.553, Test 1.152; and Acc: Train 0.835, Test 0.599\n",
502
- "2025-01-21 19:21:02 Epoch 15/15 done. Loss: Train 0.509, Test 1.166; and Acc: Train 0.857, Test 0.597\n"
503
  ]
504
  }
505
  ],
@@ -520,235 +526,19 @@
520
  },
521
  {
522
  "cell_type": "code",
523
- "execution_count": 28,
524
  "id": "ec2516f9-79f2-4ae1-ab9a-9a51a7a50587",
525
  "metadata": {
526
  "execution": {
527
- "iopub.execute_input": "2025-01-22T00:23:23.018234Z",
528
- "iopub.status.busy": "2025-01-22T00:23:23.017592Z",
529
- "iopub.status.idle": "2025-01-22T00:23:23.049365Z",
530
- "shell.execute_reply": "2025-01-22T00:23:23.048870Z",
531
- "shell.execute_reply.started": "2025-01-22T00:23:23.018186Z"
532
  },
533
  "scrolled": true
534
  },
535
- "outputs": [
536
- {
537
- "name": "stdout",
538
- "output_type": "stream",
539
- "text": [
540
- "---\n",
541
- "base_model: google/bert_uncased_L-2_H-128_A-2\n",
542
- "datasets:\n",
543
- "- QuotaClimat/frugalaichallenge-text-train\n",
544
- "language:\n",
545
- "- en\n",
546
- "license: apache-2.0\n",
547
- "model_name: frugal-ai-text-bert-tiny\n",
548
- "pipeline_tag: text-classification\n",
549
- "tags:\n",
550
- "- model_hub_mixin\n",
551
- "- pytorch_model_hub_mixin\n",
552
- "- climate\n",
553
- "---\n",
554
- "\n",
555
- "# Model Card for Model ID\n",
556
- "\n",
557
- "<!-- Provide a quick summary of what the model is/does. -->\n",
558
- "\n",
559
- "Classify text into 8 categories of climate misinformation.\n",
560
- "\n",
561
- "## Model Details\n",
562
- "\n",
563
- "### Model Description\n",
564
- "\n",
565
- "<!-- Provide a longer summary of what this model is. -->\n",
566
- "\n",
567
- "Fine trained BERT for classifying climate information as part of the Frugal AI Challenge, for submission to https://huggingface.co/frugal-ai-challenge and scoring on accuracy and efficiency. Trainied on only the non-evaluation 80% of the data, so it's (non-cheating) score will be lower.\n",
568
- "\n",
569
- "- **Developed by:** Andre Bach\n",
570
- "- **Funded by [optional]:** N/A\n",
571
- "- **Shared by [optional]:** Andre Bach\n",
572
- "- **Model type:** Text classification\n",
573
- "- **Language(s) (NLP):** ['en']\n",
574
- "- **License:** apache-2.0\n",
575
- "- **Finetuned from model [optional]:** google/bert_uncased_L-2_H-128_A-2\n",
576
- "\n",
577
- "### Model Sources [optional]\n",
578
- "\n",
579
- "<!-- Provide the basic links for the model. -->\n",
580
- "\n",
581
- "- **Repository:** frugal-ai-text-bert-tiny\n",
582
- "- **Paper [optional]:** [More Information Needed]\n",
583
- "- **Demo [optional]:** [More Information Needed]\n",
584
- "\n",
585
- "## Uses\n",
586
- "\n",
587
- "<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->\n",
588
- "\n",
589
- "### Direct Use\n",
590
- "\n",
591
- "<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->\n",
592
- "\n",
593
- "[More Information Needed]\n",
594
- "\n",
595
- "### Downstream Use [optional]\n",
596
- "\n",
597
- "<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->\n",
598
- "\n",
599
- "[More Information Needed]\n",
600
- "\n",
601
- "### Out-of-Scope Use\n",
602
- "\n",
603
- "<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->\n",
604
- "\n",
605
- "[More Information Needed]\n",
606
- "\n",
607
- "## Bias, Risks, and Limitations\n",
608
- "\n",
609
- "<!-- This section is meant to convey both technical and sociotechnical limitations. -->\n",
610
- "\n",
611
- "[More Information Needed]\n",
612
- "\n",
613
- "### Recommendations\n",
614
- "\n",
615
- "<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->\n",
616
- "\n",
617
- "Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.\n",
618
- "\n",
619
- "## How to Get Started with the Model\n",
620
- "\n",
621
- "Use the code below to get started with the model.\n",
622
- "\n",
623
- "[More Information Needed]\n",
624
- "\n",
625
- "## Training Details\n",
626
- "\n",
627
- "### Training Data\n",
628
- "\n",
629
- "<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->\n",
630
- "\n",
631
- "[More Information Needed]\n",
632
- "\n",
633
- "### Training Procedure\n",
634
- "\n",
635
- "<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->\n",
636
- "\n",
637
- "#### Preprocessing [optional]\n",
638
- "\n",
639
- "[More Information Needed]\n",
640
- "\n",
641
- "\n",
642
- "#### Training Hyperparameters\n",
643
- "\n",
644
- "- **Training regime:** {'max_dataset_size': 'full', 'bert_variety': 'google/bert_uncased_L-2_H-128_A-2', 'max_length': 256, 'num_epochs': 15, 'batch_size': 16} <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->\n",
645
- "\n",
646
- "#### Speeds, Sizes, Times [optional]\n",
647
- "\n",
648
- "<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->\n",
649
- "\n",
650
- "[More Information Needed]\n",
651
- "\n",
652
- "## Evaluation\n",
653
- "\n",
654
- "<!-- This section describes the evaluation protocols and provides the results. -->\n",
655
- "\n",
656
- "### Testing Data, Factors & Metrics\n",
657
- "\n",
658
- "#### Testing Data\n",
659
- "\n",
660
- "<!-- This should link to a Dataset Card if possible. -->\n",
661
- "\n",
662
- "[More Information Needed]\n",
663
- "\n",
664
- "#### Factors\n",
665
- "\n",
666
- "<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->\n",
667
- "\n",
668
- "[More Information Needed]\n",
669
- "\n",
670
- "#### Metrics\n",
671
- "\n",
672
- "<!-- These are the evaluation metrics being used, ideally with a description of why. -->\n",
673
- "\n",
674
- "{'train_loss': 0.5085738757594687, 'train_acc': 0.8565270935960592, 'test_loss': 1.1659069603139705, 'test_acc': 0.5972108285479901}\n",
675
- "\n",
676
- "### Results\n",
677
- "\n",
678
- "[More Information Needed]\n",
679
- "\n",
680
- "#### Summary\n",
681
- "\n",
682
- "\n",
683
- "\n",
684
- "## Model Examination [optional]\n",
685
- "\n",
686
- "<!-- Relevant interpretability work for the model goes here -->\n",
687
- "\n",
688
- "[More Information Needed]\n",
689
- "\n",
690
- "## Environmental Impact\n",
691
- "\n",
692
- "<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->\n",
693
- "\n",
694
- "Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).\n",
695
- "\n",
696
- "- **Hardware Type:** [More Information Needed]\n",
697
- "- **Hours used:** [More Information Needed]\n",
698
- "- **Cloud Provider:** [More Information Needed]\n",
699
- "- **Compute Region:** [More Information Needed]\n",
700
- "- **Carbon Emitted:** [More Information Needed]\n",
701
- "\n",
702
- "## Technical Specifications [optional]\n",
703
- "\n",
704
- "### Model Architecture and Objective\n",
705
- "\n",
706
- "[More Information Needed]\n",
707
- "\n",
708
- "### Compute Infrastructure\n",
709
- "\n",
710
- "[More Information Needed]\n",
711
- "\n",
712
- "#### Hardware\n",
713
- "\n",
714
- "[More Information Needed]\n",
715
- "\n",
716
- "#### Software\n",
717
- "\n",
718
- "[More Information Needed]\n",
719
- "\n",
720
- "## Citation [optional]\n",
721
- "\n",
722
- "<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->\n",
723
- "\n",
724
- "**BibTeX:**\n",
725
- "\n",
726
- "[More Information Needed]\n",
727
- "\n",
728
- "**APA:**\n",
729
- "\n",
730
- "[More Information Needed]\n",
731
- "\n",
732
- "## Glossary [optional]\n",
733
- "\n",
734
- "<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->\n",
735
- "\n",
736
- "[More Information Needed]\n",
737
- "\n",
738
- "## More Information [optional]\n",
739
- "\n",
740
- "[More Information Needed]\n",
741
- "\n",
742
- "## Model Card Authors [optional]\n",
743
- "\n",
744
- "[More Information Needed]\n",
745
- "\n",
746
- "## Model Card Contact\n",
747
- "\n",
748
- "[More Information Needed]\n"
749
- ]
750
- }
751
- ],
752
  "source": [
753
  "model_and_repo_name = \"frugal-ai-text-bert-tiny\"\n",
754
  "card_data = ModelCardData(\n",
@@ -773,20 +563,20 @@
773
  " testing_metrics=testing_metrics,\n",
774
  ")\n",
775
  "# print(card_data.to_yaml())\n",
776
- "print(card)"
777
  ]
778
  },
779
  {
780
  "cell_type": "code",
781
- "execution_count": 30,
782
  "id": "29d3bbf9-ab2a-48e2-a550-e16da5025720",
783
  "metadata": {
784
  "execution": {
785
- "iopub.execute_input": "2025-01-22T00:23:51.131078Z",
786
- "iopub.status.busy": "2025-01-22T00:23:51.130578Z",
787
- "iopub.status.idle": "2025-01-22T00:23:51.135440Z",
788
- "shell.execute_reply": "2025-01-22T00:23:51.134263Z",
789
- "shell.execute_reply.started": "2025-01-22T00:23:51.131042Z"
790
  }
791
  },
792
  "outputs": [],
@@ -797,15 +587,15 @@
797
  },
798
  {
799
  "cell_type": "code",
800
- "execution_count": 34,
801
  "id": "e3b099c6-6b98-473b-8797-5032213b9fcb",
802
  "metadata": {
803
  "execution": {
804
- "iopub.execute_input": "2025-01-22T00:24:18.616547Z",
805
- "iopub.status.busy": "2025-01-22T00:24:18.615990Z",
806
- "iopub.status.idle": "2025-01-22T00:24:18.669435Z",
807
- "shell.execute_reply": "2025-01-22T00:24:18.669063Z",
808
- "shell.execute_reply.started": "2025-01-22T00:24:18.616509Z"
809
  }
810
  },
811
  "outputs": [
@@ -813,7 +603,7 @@
813
  "name": "stdout",
814
  "output_type": "stream",
815
  "text": [
816
- "2025-01-21 19:24:18 Predictions: tensor([0, 0, 3, 1, 2, 6, 6], device='mps:0')\n"
817
  ]
818
  }
819
  ],
@@ -846,123 +636,47 @@
846
  },
847
  {
848
  "cell_type": "code",
849
- "execution_count": 35,
850
  "id": "befb94b5-88bf-40fc-8b26-cf373d1256e0",
851
  "metadata": {
852
  "execution": {
853
- "iopub.execute_input": "2025-01-22T00:24:41.153062Z",
854
- "iopub.status.busy": "2025-01-22T00:24:41.152049Z",
855
- "iopub.status.idle": "2025-01-22T00:24:43.436376Z",
856
- "shell.execute_reply": "2025-01-22T00:24:43.435250Z",
857
- "shell.execute_reply.started": "2025-01-22T00:24:41.153018Z"
858
- }
859
- },
860
- "outputs": [
861
- {
862
- "data": {
863
- "application/vnd.jupyter.widget-view+json": {
864
- "model_id": "ef4fd0b071034f7d9cba6aa0ab69d148",
865
- "version_major": 2,
866
- "version_minor": 0
867
- },
868
- "text/plain": [
869
- "model.safetensors: 0%| | 0.00/17.6M [00:00<?, ?B/s]"
870
- ]
871
- },
872
- "metadata": {},
873
- "output_type": "display_data"
874
- },
875
- {
876
- "data": {
877
- "text/plain": [
878
- "CommitInfo(commit_url='https://huggingface.co/Nonnormalizable/frugal-ai-text-bert-tiny/commit/512fbc46e1cfc7456f4e9f2331a30d66a9052d88', commit_message='Push model using huggingface_hub.', commit_description='', oid='512fbc46e1cfc7456f4e9f2331a30d66a9052d88', pr_url=None, repo_url=RepoUrl('https://huggingface.co/Nonnormalizable/frugal-ai-text-bert-tiny', endpoint='https://huggingface.co', repo_type='model', repo_id='Nonnormalizable/frugal-ai-text-bert-tiny'), pr_revision=None, pr_num=None)"
879
- ]
880
- },
881
- "execution_count": 35,
882
- "metadata": {},
883
- "output_type": "execute_result"
884
- }
885
- ],
886
- "source": [
887
- "model_final.push_to_hub(model_and_repo_name)"
888
- ]
889
- },
890
- {
891
- "cell_type": "code",
892
- "execution_count": 36,
893
- "id": "251ef9ee-8ba3-495f-8fe6-a93aa63168ce",
894
- "metadata": {
895
- "execution": {
896
- "iopub.execute_input": "2025-01-22T00:24:48.887758Z",
897
- "iopub.status.busy": "2025-01-22T00:24:48.887178Z",
898
- "iopub.status.idle": "2025-01-22T00:24:49.581460Z",
899
- "shell.execute_reply": "2025-01-22T00:24:49.580127Z",
900
- "shell.execute_reply.started": "2025-01-22T00:24:48.887716Z"
901
  }
902
  },
903
  "outputs": [
904
  {
905
- "data": {
906
- "application/vnd.jupyter.widget-view+json": {
907
- "model_id": "f3f6ee50ec314983a492ab7f4f5ef1bc",
908
- "version_major": 2,
909
- "version_minor": 0
910
- },
911
- "text/plain": [
912
- "README.md: 0%| | 0.00/320 [00:00<?, ?B/s]"
913
- ]
914
- },
915
- "metadata": {},
916
- "output_type": "display_data"
917
  },
918
  {
919
  "data": {
920
  "text/plain": [
921
- "CommitInfo(commit_url='https://huggingface.co/Nonnormalizable/frugal-ai-text-bert-tiny/commit/1870eebdb54a6a636d7dd32a5d923abc8d1baaec', commit_message='Upload tokenizer', commit_description='', oid='1870eebdb54a6a636d7dd32a5d923abc8d1baaec', pr_url=None, repo_url=RepoUrl('https://huggingface.co/Nonnormalizable/frugal-ai-text-bert-tiny', endpoint='https://huggingface.co', repo_type='model', repo_id='Nonnormalizable/frugal-ai-text-bert-tiny'), pr_revision=None, pr_num=None)"
922
  ]
923
  },
924
- "execution_count": 36,
925
- "metadata": {},
926
- "output_type": "execute_result"
927
- }
928
- ],
929
- "source": [
930
- "tokenizer_final.push_to_hub(model_and_repo_name)"
931
- ]
932
- },
933
- {
934
- "cell_type": "code",
935
- "execution_count": 38,
936
- "id": "863d3553-89a6-4188-a8d0-eaa0b6bccb6c",
937
- "metadata": {
938
- "execution": {
939
- "iopub.execute_input": "2025-01-22T00:25:24.402856Z",
940
- "iopub.status.busy": "2025-01-22T00:25:24.402275Z",
941
- "iopub.status.idle": "2025-01-22T00:25:25.011133Z",
942
- "shell.execute_reply": "2025-01-22T00:25:25.009553Z",
943
- "shell.execute_reply.started": "2025-01-22T00:25:24.402817Z"
944
- }
945
- },
946
- "outputs": [
947
- {
948
- "data": {
949
- "text/plain": [
950
- "CommitInfo(commit_url='https://huggingface.co/Nonnormalizable/frugal-ai-text-bert-tiny/commit/62d658978d97614f10c3d69b6595a0fb6b8a2d4c', commit_message='Upload README.md with huggingface_hub', commit_description='', oid='62d658978d97614f10c3d69b6595a0fb6b8a2d4c', pr_url=None, repo_url=RepoUrl('https://huggingface.co/Nonnormalizable/frugal-ai-text-bert-tiny', endpoint='https://huggingface.co', repo_type='model', repo_id='Nonnormalizable/frugal-ai-text-bert-tiny'), pr_revision=None, pr_num=None)"
951
- ]
952
- },
953
- "execution_count": 38,
954
  "metadata": {},
955
  "output_type": "execute_result"
956
  }
957
  ],
958
  "source": [
 
 
 
959
  "card.push_to_hub(f\"Nonnormalizable/{model_and_repo_name}\")"
960
  ]
961
  },
962
  {
963
  "cell_type": "code",
964
  "execution_count": null,
965
- "id": "2c22cc30-7578-4aad-b7db-1ffe4954c46c",
966
  "metadata": {},
967
  "outputs": [],
968
  "source": []
@@ -989,7 +703,7 @@
989
  "widgets": {
990
  "application/vnd.jupyter.widget-state+json": {
991
  "state": {
992
- "05dc5be1c7754da082079d8c8fdd3a2a": {
993
  "model_module": "@jupyter-widgets/controls",
994
  "model_module_version": "2.0.0",
995
  "model_name": "HTMLStyleModel",
@@ -999,59 +713,29 @@
999
  "text_color": null
1000
  }
1001
  },
1002
- "07fc7cde058940a29aba80b9b1f18247": {
1003
  "model_module": "@jupyter-widgets/base",
1004
  "model_module_version": "2.0.0",
1005
  "model_name": "LayoutModel",
1006
  "state": {}
1007
  },
1008
- "14a18f1cb1f244b28fa35c504023c27d": {
1009
  "model_module": "@jupyter-widgets/controls",
1010
  "model_module_version": "2.0.0",
1011
  "model_name": "HTMLModel",
1012
  "state": {
1013
- "layout": "IPY_MODEL_07fc7cde058940a29aba80b9b1f18247",
1014
- "style": "IPY_MODEL_05dc5be1c7754da082079d8c8fdd3a2a",
1015
- "value": "model.safetensors: 100%"
1016
- }
1017
- },
1018
- "1aedcea94f5a43919f4b0dd14eb54ab0": {
1019
- "model_module": "@jupyter-widgets/controls",
1020
- "model_module_version": "2.0.0",
1021
- "model_name": "FloatProgressModel",
1022
- "state": {
1023
- "bar_style": "success",
1024
- "layout": "IPY_MODEL_3a0167722aae49eb87c0cc1a0b381a7c",
1025
- "max": 320,
1026
- "style": "IPY_MODEL_75738a2d58574271b291cf982f86074a",
1027
- "value": 320
1028
- }
1029
- },
1030
- "350a27df5da248b3b06dbad6b28bc789": {
1031
- "model_module": "@jupyter-widgets/controls",
1032
- "model_module_version": "2.0.0",
1033
- "model_name": "FloatProgressModel",
1034
- "state": {
1035
- "bar_style": "success",
1036
- "layout": "IPY_MODEL_eaf57ba3273f42d4900ce2ebae398892",
1037
- "max": 17552376,
1038
- "style": "IPY_MODEL_aa86a038e70b43368dd5ccb65b653f86",
1039
- "value": 17552376
1040
  }
1041
  },
1042
- "3a0167722aae49eb87c0cc1a0b381a7c": {
1043
  "model_module": "@jupyter-widgets/base",
1044
  "model_module_version": "2.0.0",
1045
  "model_name": "LayoutModel",
1046
  "state": {}
1047
  },
1048
- "40d60079070e4bd2b81fbcfd695cd759": {
1049
- "model_module": "@jupyter-widgets/base",
1050
- "model_module_version": "2.0.0",
1051
- "model_name": "LayoutModel",
1052
- "state": {}
1053
- },
1054
- "605845a520ee426fa2330ded19d8a617": {
1055
  "model_module": "@jupyter-widgets/controls",
1056
  "model_module_version": "2.0.0",
1057
  "model_name": "HTMLStyleModel",
@@ -1061,57 +745,19 @@
1061
  "text_color": null
1062
  }
1063
  },
1064
- "7402f6bafb5f485e9f172cdf017572c7": {
1065
  "model_module": "@jupyter-widgets/base",
1066
  "model_module_version": "2.0.0",
1067
  "model_name": "LayoutModel",
1068
  "state": {}
1069
  },
1070
- "75738a2d58574271b291cf982f86074a": {
1071
- "model_module": "@jupyter-widgets/controls",
1072
- "model_module_version": "2.0.0",
1073
- "model_name": "ProgressStyleModel",
1074
- "state": {
1075
- "description_width": ""
1076
- }
1077
- },
1078
- "7a56b64874594d50bb09ee06bb656e54": {
1079
- "model_module": "@jupyter-widgets/controls",
1080
- "model_module_version": "2.0.0",
1081
- "model_name": "HTMLStyleModel",
1082
- "state": {
1083
- "description_width": "",
1084
- "font_size": null,
1085
- "text_color": null
1086
- }
1087
- },
1088
- "7bc93085c8f54642b6005f0f701772fa": {
1089
- "model_module": "@jupyter-widgets/controls",
1090
- "model_module_version": "2.0.0",
1091
- "model_name": "HTMLStyleModel",
1092
- "state": {
1093
- "description_width": "",
1094
- "font_size": null,
1095
- "text_color": null
1096
- }
1097
- },
1098
- "8bdfbdb11197418d894a54ec6d487906": {
1099
- "model_module": "@jupyter-widgets/controls",
1100
- "model_module_version": "2.0.0",
1101
- "model_name": "HTMLModel",
1102
- "state": {
1103
- "layout": "IPY_MODEL_ae882ab226794053a4f906ac3b5e49b9",
1104
- "style": "IPY_MODEL_605845a520ee426fa2330ded19d8a617",
1105
- "value": " 17.6M/17.6M [00:00&lt;00:00, 27.8MB/s]"
1106
- }
1107
- },
1108
- "9a5aac888e72447fa2f379cd3e25a11c": {
1109
  "model_module": "@jupyter-widgets/base",
1110
  "model_module_version": "2.0.0",
1111
  "model_name": "LayoutModel",
1112
  "state": {}
1113
  },
1114
- "aa86a038e70b43368dd5ccb65b653f86": {
1115
  "model_module": "@jupyter-widgets/controls",
1116
  "model_module_version": "2.0.0",
1117
  "model_name": "ProgressStyleModel",
@@ -1119,68 +765,39 @@
1119
  "description_width": ""
1120
  }
1121
  },
1122
- "acfe7063ca0f4ea6acd6f308071f4418": {
1123
  "model_module": "@jupyter-widgets/controls",
1124
  "model_module_version": "2.0.0",
1125
- "model_name": "HTMLModel",
1126
  "state": {
1127
- "layout": "IPY_MODEL_9a5aac888e72447fa2f379cd3e25a11c",
1128
- "style": "IPY_MODEL_7bc93085c8f54642b6005f0f701772fa",
1129
- "value": "README.md: 100%"
 
 
1130
  }
1131
  },
1132
- "ae882ab226794053a4f906ac3b5e49b9": {
1133
- "model_module": "@jupyter-widgets/base",
1134
- "model_module_version": "2.0.0",
1135
- "model_name": "LayoutModel",
1136
- "state": {}
1137
- },
1138
- "d84082e3eb2e4ababf340e8262c5489c": {
1139
- "model_module": "@jupyter-widgets/base",
1140
- "model_module_version": "2.0.0",
1141
- "model_name": "LayoutModel",
1142
- "state": {}
1143
- },
1144
- "eaf57ba3273f42d4900ce2ebae398892": {
1145
- "model_module": "@jupyter-widgets/base",
1146
- "model_module_version": "2.0.0",
1147
- "model_name": "LayoutModel",
1148
- "state": {}
1149
- },
1150
- "ef4fd0b071034f7d9cba6aa0ab69d148": {
1151
  "model_module": "@jupyter-widgets/controls",
1152
  "model_module_version": "2.0.0",
1153
- "model_name": "HBoxModel",
1154
  "state": {
1155
- "children": [
1156
- "IPY_MODEL_14a18f1cb1f244b28fa35c504023c27d",
1157
- "IPY_MODEL_350a27df5da248b3b06dbad6b28bc789",
1158
- "IPY_MODEL_8bdfbdb11197418d894a54ec6d487906"
1159
- ],
1160
- "layout": "IPY_MODEL_7402f6bafb5f485e9f172cdf017572c7"
1161
  }
1162
  },
1163
- "f3f6ee50ec314983a492ab7f4f5ef1bc": {
1164
  "model_module": "@jupyter-widgets/controls",
1165
  "model_module_version": "2.0.0",
1166
  "model_name": "HBoxModel",
1167
  "state": {
1168
  "children": [
1169
- "IPY_MODEL_acfe7063ca0f4ea6acd6f308071f4418",
1170
- "IPY_MODEL_1aedcea94f5a43919f4b0dd14eb54ab0",
1171
- "IPY_MODEL_f8f939982d7542969718ad06c692c488"
1172
  ],
1173
- "layout": "IPY_MODEL_d84082e3eb2e4ababf340e8262c5489c"
1174
- }
1175
- },
1176
- "f8f939982d7542969718ad06c692c488": {
1177
- "model_module": "@jupyter-widgets/controls",
1178
- "model_module_version": "2.0.0",
1179
- "model_name": "HTMLModel",
1180
- "state": {
1181
- "layout": "IPY_MODEL_40d60079070e4bd2b81fbcfd695cd759",
1182
- "style": "IPY_MODEL_7a56b64874594d50bb09ee06bb656e54",
1183
- "value": " 320/320 [00:00&lt;00:00, 25.4kB/s]"
1184
  }
1185
  }
1186
  },
 
14
  "id": "73e72549-69f2-46b5-b0f5-655777139972",
15
  "metadata": {
16
  "execution": {
17
+ "iopub.execute_input": "2025-01-22T14:28:40.399621Z",
18
+ "iopub.status.busy": "2025-01-22T14:28:40.398151Z",
19
+ "iopub.status.idle": "2025-01-22T14:28:43.463152Z",
20
+ "shell.execute_reply": "2025-01-22T14:28:43.462919Z",
21
+ "shell.execute_reply.started": "2025-01-22T14:28:40.399562Z"
22
  }
23
  },
24
  "outputs": [],
 
45
  "id": "07e0787e-c72b-41f3-baba-43cef3f8d6f8",
46
  "metadata": {
47
  "execution": {
48
+ "iopub.execute_input": "2025-01-22T14:28:43.463941Z",
49
+ "iopub.status.busy": "2025-01-22T14:28:43.463805Z",
50
+ "iopub.status.idle": "2025-01-22T14:28:43.465644Z",
51
+ "shell.execute_reply": "2025-01-22T14:28:43.465423Z",
52
+ "shell.execute_reply.started": "2025-01-22T14:28:43.463933Z"
53
  }
54
  },
55
  "outputs": [],
 
67
  },
68
  {
69
  "cell_type": "code",
70
+ "execution_count": 3,
71
  "id": "d4b79fb9-5e70-4600-8885-94bc0a6e917c",
72
  "metadata": {
73
  "execution": {
74
+ "iopub.execute_input": "2025-01-22T14:28:44.578819Z",
75
+ "iopub.status.busy": "2025-01-22T14:28:44.578158Z",
76
+ "iopub.status.idle": "2025-01-22T14:28:44.594834Z",
77
+ "shell.execute_reply": "2025-01-22T14:28:44.594043Z",
78
+ "shell.execute_reply.started": "2025-01-22T14:28:44.578767Z"
79
  }
80
  },
81
  "outputs": [],
 
131
  " def __init__(self, num_labels=8, bert_variety=\"bert-base-uncased\"):\n",
132
  " super().__init__()\n",
133
  " self.bert = BertModel.from_pretrained(bert_variety)\n",
134
+ " self.config = self.bert.config\n",
135
+ " self.config.num_labels = num_labels\n",
136
  " self.dropout = nn.Dropout(0.05)\n",
137
  " self.classifier = nn.Linear(self.bert.pooler.dense.out_features, num_labels)\n",
138
  "\n",
 
195
  },
196
  {
197
  "cell_type": "code",
198
+ "execution_count": 4,
199
  "id": "07131bce-23ad-4787-8622-cce401f3e5ce",
200
  "metadata": {
201
  "execution": {
202
+ "iopub.execute_input": "2025-01-22T14:28:45.024103Z",
203
+ "iopub.status.busy": "2025-01-22T14:28:45.023645Z",
204
+ "iopub.status.idle": "2025-01-22T14:28:45.056500Z",
205
+ "shell.execute_reply": "2025-01-22T14:28:45.056070Z",
206
+ "shell.execute_reply.started": "2025-01-22T14:28:45.024069Z"
207
  }
208
  },
209
  "outputs": [],
 
219
  },
220
  {
221
  "cell_type": "code",
222
+ "execution_count": 5,
223
  "id": "695bc080-bbd7-4937-af5b-50db1c936500",
224
  "metadata": {
225
  "execution": {
226
+ "iopub.execute_input": "2025-01-22T14:28:45.268069Z",
227
+ "iopub.status.busy": "2025-01-22T14:28:45.267170Z",
228
+ "iopub.status.idle": "2025-01-22T14:28:45.279492Z",
229
+ "shell.execute_reply": "2025-01-22T14:28:45.278723Z",
230
+ "shell.execute_reply.started": "2025-01-22T14:28:45.268003Z"
231
  }
232
  },
233
  "outputs": [],
 
323
  },
324
  {
325
  "cell_type": "code",
326
+ "execution_count": 6,
327
  "id": "34a7c310-c486-4db1-b94d-4363c3d3df5b",
328
  "metadata": {
329
  "execution": {
330
+ "iopub.execute_input": "2025-01-22T14:28:46.360995Z",
331
+ "iopub.status.busy": "2025-01-22T14:28:46.360044Z",
332
+ "iopub.status.idle": "2025-01-22T14:28:53.023176Z",
333
+ "shell.execute_reply": "2025-01-22T14:28:53.022848Z",
334
+ "shell.execute_reply.started": "2025-01-22T14:28:46.360953Z"
335
  }
336
  },
337
  "outputs": [
 
339
  "name": "stdout",
340
  "output_type": "stream",
341
  "text": [
342
+ "2025-01-22 09:28:49 Epoch 0/3 done. Loss: Train 2.131, Test 2.135; and Acc: Train 0.118, Test 0.118\n",
343
+ "2025-01-22 09:28:50 Epoch 1/3 done. Loss: Train 1.952, Test 1.978; and Acc: Train 0.281, Test 0.261\n",
344
+ "2025-01-22 09:28:51 Epoch 2/3 done. Loss: Train 1.905, Test 1.943; and Acc: Train 0.304, Test 0.275\n",
345
+ "2025-01-22 09:28:53 Epoch 3/3 done. Loss: Train 1.862, Test 1.904; and Acc: Train 0.321, Test 0.283\n"
346
  ]
347
  }
348
  ],
 
358
  },
359
  {
360
  "cell_type": "code",
361
+ "execution_count": 7,
362
  "id": "0aedfcca-843e-4f4c-8062-3e4625161bcc",
363
  "metadata": {
364
  "editable": true,
365
  "execution": {
366
+ "iopub.execute_input": "2025-01-22T14:28:55.671186Z",
367
+ "iopub.status.busy": "2025-01-22T14:28:55.670403Z",
368
+ "iopub.status.idle": "2025-01-22T14:28:55.789941Z",
369
+ "shell.execute_reply": "2025-01-22T14:28:55.789679Z",
370
+ "shell.execute_reply.started": "2025-01-22T14:28:55.671131Z"
371
  },
372
  "slideshow": {
373
  "slide_type": ""
 
379
  "name": "stdout",
380
  "output_type": "stream",
381
  "text": [
382
+ "2025-01-22 09:28:55 Predictions: tensor([0, 0, 0, 0, 0, 0, 0], device='mps:0')\n"
383
  ]
384
  }
385
  ],
 
429
  "acc 0.954, energy 0.736 Wh, emissions 0.272 gco2eq\n",
430
  "\n",
431
  "[bert-base some hp tuning](https://huggingface.co/datasets/frugal-ai-challenge/public-leaderboard-text/blob/main/submissions/Nonnormalizable_20250120_231350.json):\\\n",
432
+ "acc 0.707, energy 0.803 Wh, emissions 0.296 gco2eq\n",
433
+ "\n",
434
+ "bert-tiny, Nvidia 1xL40S:\n",
435
+ "\n",
436
+ "bert-tiny, "
437
  ]
438
  },
439
  {
 
451
  },
452
  {
453
  "cell_type": "code",
454
+ "execution_count": 9,
455
  "id": "37794952-703c-466c-9d26-ee6cb2834246",
456
  "metadata": {
457
  "execution": {
458
+ "iopub.execute_input": "2025-01-22T14:29:24.691783Z",
459
+ "iopub.status.busy": "2025-01-22T14:29:24.691195Z",
460
+ "iopub.status.idle": "2025-01-22T14:29:24.696800Z",
461
+ "shell.execute_reply": "2025-01-22T14:29:24.695895Z",
462
+ "shell.execute_reply.started": "2025-01-22T14:29:24.691745Z"
463
  }
464
  },
465
  "outputs": [],
 
474
  },
475
  {
476
  "cell_type": "code",
477
+ "execution_count": 10,
478
  "id": "28354e8c-886a-4523-8968-8c688c13f6a3",
479
  "metadata": {
480
  "execution": {
481
+ "iopub.execute_input": "2025-01-22T14:29:25.202258Z",
482
+ "iopub.status.busy": "2025-01-22T14:29:25.201292Z",
483
+ "iopub.status.idle": "2025-01-22T14:31:22.271954Z",
484
+ "shell.execute_reply": "2025-01-22T14:31:22.271647Z",
485
+ "shell.execute_reply.started": "2025-01-22T14:29:25.202215Z"
486
  }
487
  },
488
  "outputs": [
 
490
  "name": "stdout",
491
  "output_type": "stream",
492
  "text": [
493
+ "2025-01-22 09:29:31 Epoch 0/15 done. Loss: Train 2.104, Test 2.111; and Acc: Train 0.114, Test 0.097\n",
494
+ "2025-01-22 09:29:38 Epoch 1/15 done. Loss: Train 1.778, Test 1.814; and Acc: Train 0.353, Test 0.329\n",
495
+ "2025-01-22 09:29:45 Epoch 2/15 done. Loss: Train 1.555, Test 1.605; and Acc: Train 0.443, Test 0.422\n",
496
+ "2025-01-22 09:29:53 Epoch 3/15 done. Loss: Train 1.388, Test 1.451; and Acc: Train 0.519, Test 0.491\n",
497
+ "2025-01-22 09:30:00 Epoch 4/15 done. Loss: Train 1.274, Test 1.362; and Acc: Train 0.555, Test 0.523\n",
498
+ "2025-01-22 09:30:07 Epoch 5/15 done. Loss: Train 1.179, Test 1.300; and Acc: Train 0.588, Test 0.540\n",
499
+ "2025-01-22 09:30:15 Epoch 6/15 done. Loss: Train 1.097, Test 1.259; and Acc: Train 0.632, Test 0.550\n",
500
+ "2025-01-22 09:30:22 Epoch 7/15 done. Loss: Train 1.026, Test 1.225; and Acc: Train 0.659, Test 0.567\n",
501
+ "2025-01-22 09:30:30 Epoch 8/15 done. Loss: Train 0.947, Test 1.196; and Acc: Train 0.683, Test 0.580\n",
502
+ "2025-01-22 09:30:37 Epoch 9/15 done. Loss: Train 0.879, Test 1.176; and Acc: Train 0.717, Test 0.586\n",
503
+ "2025-01-22 09:30:44 Epoch 10/15 done. Loss: Train 0.817, Test 1.155; and Acc: Train 0.735, Test 0.600\n",
504
+ "2025-01-22 09:30:52 Epoch 11/15 done. Loss: Train 0.757, Test 1.148; and Acc: Train 0.763, Test 0.599\n",
505
+ "2025-01-22 09:30:59 Epoch 12/15 done. Loss: Train 0.700, Test 1.139; and Acc: Train 0.786, Test 0.603\n",
506
+ "2025-01-22 09:31:07 Epoch 13/15 done. Loss: Train 0.636, Test 1.137; and Acc: Train 0.806, Test 0.599\n",
507
+ "2025-01-22 09:31:14 Epoch 14/15 done. Loss: Train 0.582, Test 1.128; and Acc: Train 0.823, Test 0.604\n",
508
+ "2025-01-22 09:31:22 Epoch 15/15 done. Loss: Train 0.535, Test 1.134; and Acc: Train 0.837, Test 0.618\n"
509
  ]
510
  }
511
  ],
 
526
  },
527
  {
528
  "cell_type": "code",
529
+ "execution_count": 12,
530
  "id": "ec2516f9-79f2-4ae1-ab9a-9a51a7a50587",
531
  "metadata": {
532
  "execution": {
533
+ "iopub.execute_input": "2025-01-22T14:31:42.946851Z",
534
+ "iopub.status.busy": "2025-01-22T14:31:42.946191Z",
535
+ "iopub.status.idle": "2025-01-22T14:31:42.970151Z",
536
+ "shell.execute_reply": "2025-01-22T14:31:42.969731Z",
537
+ "shell.execute_reply.started": "2025-01-22T14:31:42.946804Z"
538
  },
539
  "scrolled": true
540
  },
541
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
542
  "source": [
543
  "model_and_repo_name = \"frugal-ai-text-bert-tiny\"\n",
544
  "card_data = ModelCardData(\n",
 
563
  " testing_metrics=testing_metrics,\n",
564
  ")\n",
565
  "# print(card_data.to_yaml())\n",
566
+ "# print(card)"
567
  ]
568
  },
569
  {
570
  "cell_type": "code",
571
+ "execution_count": 13,
572
  "id": "29d3bbf9-ab2a-48e2-a550-e16da5025720",
573
  "metadata": {
574
  "execution": {
575
+ "iopub.execute_input": "2025-01-22T14:31:44.266203Z",
576
+ "iopub.status.busy": "2025-01-22T14:31:44.265638Z",
577
+ "iopub.status.idle": "2025-01-22T14:31:44.271280Z",
578
+ "shell.execute_reply": "2025-01-22T14:31:44.270441Z",
579
+ "shell.execute_reply.started": "2025-01-22T14:31:44.266162Z"
580
  }
581
  },
582
  "outputs": [],
 
587
  },
588
  {
589
  "cell_type": "code",
590
+ "execution_count": 14,
591
  "id": "e3b099c6-6b98-473b-8797-5032213b9fcb",
592
  "metadata": {
593
  "execution": {
594
+ "iopub.execute_input": "2025-01-22T14:31:45.670794Z",
595
+ "iopub.status.busy": "2025-01-22T14:31:45.670345Z",
596
+ "iopub.status.idle": "2025-01-22T14:31:45.731173Z",
597
+ "shell.execute_reply": "2025-01-22T14:31:45.730818Z",
598
+ "shell.execute_reply.started": "2025-01-22T14:31:45.670769Z"
599
  }
600
  },
601
  "outputs": [
 
603
  "name": "stdout",
604
  "output_type": "stream",
605
  "text": [
606
+ "2025-01-22 09:31:45 Predictions: tensor([0, 0, 3, 1, 2, 4, 6], device='mps:0')\n"
607
  ]
608
  }
609
  ],
 
636
  },
637
  {
638
  "cell_type": "code",
639
+ "execution_count": 18,
640
  "id": "befb94b5-88bf-40fc-8b26-cf373d1256e0",
641
  "metadata": {
642
  "execution": {
643
+ "iopub.execute_input": "2025-01-22T14:37:57.412327Z",
644
+ "iopub.status.busy": "2025-01-22T14:37:57.411779Z",
645
+ "iopub.status.idle": "2025-01-22T14:37:59.349630Z",
646
+ "shell.execute_reply": "2025-01-22T14:37:59.348338Z",
647
+ "shell.execute_reply.started": "2025-01-22T14:37:57.412288Z"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
648
  }
649
  },
650
  "outputs": [
651
  {
652
+ "name": "stderr",
653
+ "output_type": "stream",
654
+ "text": [
655
+ "No files have been modified since last commit. Skipping to prevent empty commit.\n"
656
+ ]
 
 
 
 
 
 
 
657
  },
658
  {
659
  "data": {
660
  "text/plain": [
661
+ "CommitInfo(commit_url='https://huggingface.co/Nonnormalizable/frugal-ai-text-bert-tiny/commit/69d445f90562fc738f12cfb37908fccef8925f5c', commit_message='Upload README.md with huggingface_hub', commit_description='', oid='69d445f90562fc738f12cfb37908fccef8925f5c', pr_url=None, repo_url=RepoUrl('https://huggingface.co/Nonnormalizable/frugal-ai-text-bert-tiny', endpoint='https://huggingface.co', repo_type='model', repo_id='Nonnormalizable/frugal-ai-text-bert-tiny'), pr_revision=None, pr_num=None)"
662
  ]
663
  },
664
+ "execution_count": 18,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
665
  "metadata": {},
666
  "output_type": "execute_result"
667
  }
668
  ],
669
  "source": [
670
+ "model_final.push_to_hub(model_and_repo_name)\n",
671
+ "tokenizer_final.push_to_hub(model_and_repo_name)\n",
672
+ "model_final.config.push_to_hub(model_and_repo_name)\n",
673
  "card.push_to_hub(f\"Nonnormalizable/{model_and_repo_name}\")"
674
  ]
675
  },
676
  {
677
  "cell_type": "code",
678
  "execution_count": null,
679
+ "id": "f6df5d6b-2d24-4759-937b-7935ac01dba7",
680
  "metadata": {},
681
  "outputs": [],
682
  "source": []
 
703
  "widgets": {
704
  "application/vnd.jupyter.widget-state+json": {
705
  "state": {
706
+ "04362bf5ea1540e69a8ed37243e960fe": {
707
  "model_module": "@jupyter-widgets/controls",
708
  "model_module_version": "2.0.0",
709
  "model_name": "HTMLStyleModel",
 
713
  "text_color": null
714
  }
715
  },
716
+ "1a82e23ee0b44ec78b0bb2175f2e938a": {
717
  "model_module": "@jupyter-widgets/base",
718
  "model_module_version": "2.0.0",
719
  "model_name": "LayoutModel",
720
  "state": {}
721
  },
722
+ "3058e249f3a24b89a0946db9d46692cd": {
723
  "model_module": "@jupyter-widgets/controls",
724
  "model_module_version": "2.0.0",
725
  "model_name": "HTMLModel",
726
  "state": {
727
+ "layout": "IPY_MODEL_1a82e23ee0b44ec78b0bb2175f2e938a",
728
+ "style": "IPY_MODEL_04362bf5ea1540e69a8ed37243e960fe",
729
+ "value": " 17.6M/17.6M [00:00&lt;00:00,30.6MB/s]"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
730
  }
731
  },
732
+ "4bcb44aa9960417da7c3e374f5015413": {
733
  "model_module": "@jupyter-widgets/base",
734
  "model_module_version": "2.0.0",
735
  "model_name": "LayoutModel",
736
  "state": {}
737
  },
738
+ "572a4d1b74044da7a90c58c311a87eff": {
 
 
 
 
 
 
739
  "model_module": "@jupyter-widgets/controls",
740
  "model_module_version": "2.0.0",
741
  "model_name": "HTMLStyleModel",
 
745
  "text_color": null
746
  }
747
  },
748
+ "575da4c5a0b147989fc3444c95d5483b": {
749
  "model_module": "@jupyter-widgets/base",
750
  "model_module_version": "2.0.0",
751
  "model_name": "LayoutModel",
752
  "state": {}
753
  },
754
+ "8f403fa494c246c9af5ee00397ac6cf5": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
755
  "model_module": "@jupyter-widgets/base",
756
  "model_module_version": "2.0.0",
757
  "model_name": "LayoutModel",
758
  "state": {}
759
  },
760
+ "916778013b8d48d9acddd42e8b874c22": {
761
  "model_module": "@jupyter-widgets/controls",
762
  "model_module_version": "2.0.0",
763
  "model_name": "ProgressStyleModel",
 
765
  "description_width": ""
766
  }
767
  },
768
+ "9c74511b86c240a9afb83e5dcd16b03b": {
769
  "model_module": "@jupyter-widgets/controls",
770
  "model_module_version": "2.0.0",
771
+ "model_name": "FloatProgressModel",
772
  "state": {
773
+ "bar_style": "success",
774
+ "layout": "IPY_MODEL_575da4c5a0b147989fc3444c95d5483b",
775
+ "max": 17552376,
776
+ "style": "IPY_MODEL_916778013b8d48d9acddd42e8b874c22",
777
+ "value": 17552376
778
  }
779
  },
780
+ "9fb53962769d48e6a7ee640072ff1908": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
781
  "model_module": "@jupyter-widgets/controls",
782
  "model_module_version": "2.0.0",
783
+ "model_name": "HTMLModel",
784
  "state": {
785
+ "layout": "IPY_MODEL_4bcb44aa9960417da7c3e374f5015413",
786
+ "style": "IPY_MODEL_572a4d1b74044da7a90c58c311a87eff",
787
+ "value": "model.safetensors: 100%"
 
 
 
788
  }
789
  },
790
+ "eb84b40edbab4e9d91fd6283b144492f": {
791
  "model_module": "@jupyter-widgets/controls",
792
  "model_module_version": "2.0.0",
793
  "model_name": "HBoxModel",
794
  "state": {
795
  "children": [
796
+ "IPY_MODEL_9fb53962769d48e6a7ee640072ff1908",
797
+ "IPY_MODEL_9c74511b86c240a9afb83e5dcd16b03b",
798
+ "IPY_MODEL_3058e249f3a24b89a0946db9d46692cd"
799
  ],
800
+ "layout": "IPY_MODEL_8f403fa494c246c9af5ee00397ac6cf5"
 
 
 
 
 
 
 
 
 
 
801
  }
802
  }
803
  },