sycod commited on
Commit
6ebb6d1
·
1 Parent(s): d20ece8
.gitignore CHANGED
@@ -1,19 +1,17 @@
1
- .ipynb_checkpoints/sandbox-checkpoint.ipynb
2
- .DS_Store
3
- auto_evals/
4
- venv/
5
  __pycache__/
 
6
  .env
7
  .ipynb_checkpoints
8
- .vscode/
9
  .venv
10
-
 
 
 
11
  eval-queue/
12
  eval-results/
13
  eval-queue-bk/
14
  eval-results-bk/
15
  logs/
16
-
17
- emissions.csv
18
- data/
19
- pyro-sdis/
 
 
 
 
 
1
  __pycache__/
2
+ .DS_Store
3
  .env
4
  .ipynb_checkpoints
5
+ .ipynb_checkpoints/sandbox-checkpoint.ipynb
6
  .venv
7
+ .vscode/
8
+ auto_evals/
9
+ data/
10
+ emissions.csv
11
  eval-queue/
12
  eval-results/
13
  eval-queue-bk/
14
  eval-results-bk/
15
  logs/
16
+ pyro-sdis/
17
+ venv/
 
 
EDA.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
config.yaml CHANGED
@@ -1,23 +1,10 @@
1
- data:
2
- local_path: "data"
3
- img_dir: "Images"
4
- annot_dir: "Annotation"
5
- img_db_uri: "img_db.csv"
6
- train_dir: "train"
7
- test_dir: "test"
8
- checkpoint_dir : "model_chkpts"
9
- app_dir: "app"
10
 
11
- log:
12
- log_dir: "logs"
13
 
14
- models:
15
- classes_3: ["pug", "Siberian_husky", "borzoi"]
16
- classes_10: ["Leonberg", "basenji", "malamute", "papillon", "chow", "dhole", "dingo", "Cardigan", "Brabancon_griffon", "boxer"]
17
-
18
- app_data:
19
- local_path: "app_data"
20
- model: "EfficientNetB0_app.keras"
21
- onnx: "EfficientNetB0_app.onnx"
22
- # breeds are not in the same order as original classes
23
- breeds: ['Brabancon_griffon', 'Cardigan', 'Leonberg', 'basenji', 'boxer', 'chow', 'dhole', 'dingo', 'malamute', 'papillon']
 
1
+ data_dir: "data"
2
+ db_info_uri: "data_info.csv"
 
 
 
 
 
 
 
3
 
4
+ # log:
5
+ # log_dir: "logs"
6
 
7
+ # app_data:
8
+ # local_path: "app_data"
9
+ # model: "EfficientNetB0_app.keras"
10
+ # onnx: "EfficientNetB0_app.onnx"
 
 
 
 
 
 
notebooks/template-audio.ipynb DELETED
@@ -1,1351 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "markdown",
5
- "metadata": {},
6
- "source": [
7
- "# Text task notebook template\n",
8
- "## Loading the necessary libraries"
9
- ]
10
- },
11
- {
12
- "cell_type": "code",
13
- "execution_count": 3,
14
- "metadata": {},
15
- "outputs": [
16
- {
17
- "name": "stderr",
18
- "output_type": "stream",
19
- "text": [
20
- "[codecarbon WARNING @ 19:48:07] Multiple instances of codecarbon are allowed to run at the same time.\n",
21
- "[codecarbon INFO @ 19:48:07] [setup] RAM Tracking...\n",
22
- "[codecarbon INFO @ 19:48:07] [setup] CPU Tracking...\n",
23
- "[codecarbon WARNING @ 19:48:09] We saw that you have a 13th Gen Intel(R) Core(TM) i7-1365U but we don't know it. Please contact us.\n",
24
- "[codecarbon WARNING @ 19:48:09] No CPU tracking mode found. Falling back on CPU constant mode. \n",
25
- " Windows OS detected: Please install Intel Power Gadget to measure CPU\n",
26
- "\n",
27
- "[codecarbon WARNING @ 19:48:11] We saw that you have a 13th Gen Intel(R) Core(TM) i7-1365U but we don't know it. Please contact us.\n",
28
- "[codecarbon INFO @ 19:48:11] CPU Model on constant consumption mode: 13th Gen Intel(R) Core(TM) i7-1365U\n",
29
- "[codecarbon WARNING @ 19:48:11] No CPU tracking mode found. Falling back on CPU constant mode.\n",
30
- "[codecarbon INFO @ 19:48:11] [setup] GPU Tracking...\n",
31
- "[codecarbon INFO @ 19:48:11] No GPU found.\n",
32
- "[codecarbon INFO @ 19:48:11] >>> Tracker's metadata:\n",
33
- "[codecarbon INFO @ 19:48:11] Platform system: Windows-11-10.0.22631-SP0\n",
34
- "[codecarbon INFO @ 19:48:11] Python version: 3.12.7\n",
35
- "[codecarbon INFO @ 19:48:11] CodeCarbon version: 3.0.0_rc0\n",
36
- "[codecarbon INFO @ 19:48:11] Available RAM : 31.347 GB\n",
37
- "[codecarbon INFO @ 19:48:11] CPU count: 12\n",
38
- "[codecarbon INFO @ 19:48:11] CPU model: 13th Gen Intel(R) Core(TM) i7-1365U\n",
39
- "[codecarbon INFO @ 19:48:11] GPU count: None\n",
40
- "[codecarbon INFO @ 19:48:11] GPU model: None\n",
41
- "[codecarbon INFO @ 19:48:11] Saving emissions data to file c:\\git\\submission-template\\notebooks\\emissions.csv\n"
42
- ]
43
- }
44
- ],
45
- "source": [
46
- "from fastapi import APIRouter\n",
47
- "from datetime import datetime\n",
48
- "from datasets import load_dataset\n",
49
- "from sklearn.metrics import accuracy_score\n",
50
- "import random\n",
51
- "\n",
52
- "import sys\n",
53
- "sys.path.append('../tasks')\n",
54
- "\n",
55
- "from utils.evaluation import AudioEvaluationRequest\n",
56
- "from utils.emissions import tracker, clean_emissions_data, get_space_info\n",
57
- "\n",
58
- "\n",
59
- "# Define the label mapping\n",
60
- "LABEL_MAPPING = {\n",
61
- " \"chainsaw\": 0,\n",
62
- " \"environment\": 1\n",
63
- "}"
64
- ]
65
- },
66
- {
67
- "cell_type": "markdown",
68
- "metadata": {},
69
- "source": [
70
- "## Loading the datasets and splitting them"
71
- ]
72
- },
73
- {
74
- "cell_type": "code",
75
- "execution_count": 4,
76
- "metadata": {},
77
- "outputs": [
78
- {
79
- "data": {
80
- "application/vnd.jupyter.widget-view+json": {
81
- "model_id": "668da7bf85434e098b95c3ec447d78fe",
82
- "version_major": 2,
83
- "version_minor": 0
84
- },
85
- "text/plain": [
86
- "README.md: 0%| | 0.00/5.18k [00:00<?, ?B/s]"
87
- ]
88
- },
89
- "metadata": {},
90
- "output_type": "display_data"
91
- },
92
- {
93
- "name": "stderr",
94
- "output_type": "stream",
95
- "text": [
96
- "c:\\Users\\theo.alvesdacosta\\AppData\\Local\\anaconda3\\Lib\\site-packages\\huggingface_hub\\file_download.py:139: UserWarning: `huggingface_hub` cache-system uses symlinks by default to efficiently store duplicated files but your machine does not support them in C:\\Users\\theo.alvesdacosta\\.cache\\huggingface\\hub\\datasets--QuotaClimat--frugalaichallenge-text-train. Caching files will still work but in a degraded version that might require more space on your disk. This warning can be disabled by setting the `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For more details, see https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations.\n",
97
- "To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development\n",
98
- " warnings.warn(message)\n"
99
- ]
100
- },
101
- {
102
- "data": {
103
- "application/vnd.jupyter.widget-view+json": {
104
- "model_id": "5b68d43359eb429395da8be7d4b15556",
105
- "version_major": 2,
106
- "version_minor": 0
107
- },
108
- "text/plain": [
109
- "train.parquet: 0%| | 0.00/1.21M [00:00<?, ?B/s]"
110
- ]
111
- },
112
- "metadata": {},
113
- "output_type": "display_data"
114
- },
115
- {
116
- "data": {
117
- "application/vnd.jupyter.widget-view+json": {
118
- "model_id": "140a304773914e9db8f698eabeb40298",
119
- "version_major": 2,
120
- "version_minor": 0
121
- },
122
- "text/plain": [
123
- "Generating train split: 0%| | 0/6091 [00:00<?, ? examples/s]"
124
- ]
125
- },
126
- "metadata": {},
127
- "output_type": "display_data"
128
- },
129
- {
130
- "data": {
131
- "application/vnd.jupyter.widget-view+json": {
132
- "model_id": "6d04e8ab1906400e8e0029949dc523a5",
133
- "version_major": 2,
134
- "version_minor": 0
135
- },
136
- "text/plain": [
137
- "Map: 0%| | 0/6091 [00:00<?, ? examples/s]"
138
- ]
139
- },
140
- "metadata": {},
141
- "output_type": "display_data"
142
- }
143
- ],
144
- "source": [
145
- "request = AudioEvaluationRequest()\n",
146
- "\n",
147
- "# Load and prepare the dataset\n",
148
- "dataset = load_dataset(request.dataset_name)\n",
149
- "\n",
150
- "# Split dataset\n",
151
- "train_test = dataset[\"train\"].train_test_split(test_size=request.test_size, seed=request.test_seed)\n",
152
- "test_dataset = train_test[\"test\"]"
153
- ]
154
- },
155
- {
156
- "cell_type": "markdown",
157
- "metadata": {},
158
- "source": [
159
- "## Random Baseline"
160
- ]
161
- },
162
- {
163
- "cell_type": "code",
164
- "execution_count": 5,
165
- "metadata": {},
166
- "outputs": [],
167
- "source": [
168
- "# Start tracking emissions\n",
169
- "tracker.start()\n",
170
- "tracker.start_task(\"inference\")"
171
- ]
172
- },
173
- {
174
- "cell_type": "code",
175
- "execution_count": 6,
176
- "metadata": {},
177
- "outputs": [
178
- {
179
- "data": {
180
- "text/plain": [
181
- "[1,\n",
182
- " 7,\n",
183
- " 6,\n",
184
- " 6,\n",
185
- " 2,\n",
186
- " 0,\n",
187
- " 1,\n",
188
- " 7,\n",
189
- " 3,\n",
190
- " 6,\n",
191
- " 6,\n",
192
- " 3,\n",
193
- " 6,\n",
194
- " 6,\n",
195
- " 5,\n",
196
- " 0,\n",
197
- " 2,\n",
198
- " 6,\n",
199
- " 2,\n",
200
- " 6,\n",
201
- " 5,\n",
202
- " 4,\n",
203
- " 1,\n",
204
- " 3,\n",
205
- " 6,\n",
206
- " 4,\n",
207
- " 2,\n",
208
- " 1,\n",
209
- " 4,\n",
210
- " 0,\n",
211
- " 3,\n",
212
- " 4,\n",
213
- " 1,\n",
214
- " 5,\n",
215
- " 5,\n",
216
- " 1,\n",
217
- " 2,\n",
218
- " 7,\n",
219
- " 6,\n",
220
- " 1,\n",
221
- " 3,\n",
222
- " 1,\n",
223
- " 7,\n",
224
- " 7,\n",
225
- " 0,\n",
226
- " 0,\n",
227
- " 3,\n",
228
- " 3,\n",
229
- " 3,\n",
230
- " 4,\n",
231
- " 1,\n",
232
- " 4,\n",
233
- " 4,\n",
234
- " 1,\n",
235
- " 4,\n",
236
- " 5,\n",
237
- " 6,\n",
238
- " 1,\n",
239
- " 2,\n",
240
- " 2,\n",
241
- " 2,\n",
242
- " 5,\n",
243
- " 2,\n",
244
- " 7,\n",
245
- " 2,\n",
246
- " 7,\n",
247
- " 7,\n",
248
- " 6,\n",
249
- " 4,\n",
250
- " 2,\n",
251
- " 0,\n",
252
- " 1,\n",
253
- " 6,\n",
254
- " 3,\n",
255
- " 2,\n",
256
- " 5,\n",
257
- " 5,\n",
258
- " 2,\n",
259
- " 0,\n",
260
- " 7,\n",
261
- " 0,\n",
262
- " 1,\n",
263
- " 5,\n",
264
- " 5,\n",
265
- " 7,\n",
266
- " 4,\n",
267
- " 6,\n",
268
- " 7,\n",
269
- " 1,\n",
270
- " 7,\n",
271
- " 1,\n",
272
- " 0,\n",
273
- " 3,\n",
274
- " 4,\n",
275
- " 2,\n",
276
- " 5,\n",
277
- " 3,\n",
278
- " 3,\n",
279
- " 3,\n",
280
- " 2,\n",
281
- " 2,\n",
282
- " 1,\n",
283
- " 0,\n",
284
- " 4,\n",
285
- " 5,\n",
286
- " 7,\n",
287
- " 0,\n",
288
- " 3,\n",
289
- " 1,\n",
290
- " 4,\n",
291
- " 6,\n",
292
- " 0,\n",
293
- " 7,\n",
294
- " 1,\n",
295
- " 1,\n",
296
- " 2,\n",
297
- " 2,\n",
298
- " 4,\n",
299
- " 0,\n",
300
- " 4,\n",
301
- " 3,\n",
302
- " 4,\n",
303
- " 4,\n",
304
- " 2,\n",
305
- " 2,\n",
306
- " 3,\n",
307
- " 3,\n",
308
- " 7,\n",
309
- " 4,\n",
310
- " 7,\n",
311
- " 6,\n",
312
- " 4,\n",
313
- " 5,\n",
314
- " 4,\n",
315
- " 3,\n",
316
- " 6,\n",
317
- " 0,\n",
318
- " 4,\n",
319
- " 0,\n",
320
- " 1,\n",
321
- " 3,\n",
322
- " 6,\n",
323
- " 7,\n",
324
- " 3,\n",
325
- " 3,\n",
326
- " 0,\n",
327
- " 1,\n",
328
- " 2,\n",
329
- " 4,\n",
330
- " 4,\n",
331
- " 3,\n",
332
- " 1,\n",
333
- " 2,\n",
334
- " 4,\n",
335
- " 3,\n",
336
- " 0,\n",
337
- " 5,\n",
338
- " 3,\n",
339
- " 6,\n",
340
- " 3,\n",
341
- " 6,\n",
342
- " 1,\n",
343
- " 3,\n",
344
- " 4,\n",
345
- " 5,\n",
346
- " 4,\n",
347
- " 0,\n",
348
- " 7,\n",
349
- " 3,\n",
350
- " 6,\n",
351
- " 7,\n",
352
- " 4,\n",
353
- " 4,\n",
354
- " 5,\n",
355
- " 3,\n",
356
- " 1,\n",
357
- " 7,\n",
358
- " 4,\n",
359
- " 1,\n",
360
- " 0,\n",
361
- " 3,\n",
362
- " 0,\n",
363
- " 5,\n",
364
- " 3,\n",
365
- " 6,\n",
366
- " 3,\n",
367
- " 0,\n",
368
- " 7,\n",
369
- " 2,\n",
370
- " 0,\n",
371
- " 4,\n",
372
- " 1,\n",
373
- " 2,\n",
374
- " 6,\n",
375
- " 3,\n",
376
- " 4,\n",
377
- " 4,\n",
378
- " 5,\n",
379
- " 1,\n",
380
- " 5,\n",
381
- " 4,\n",
382
- " 0,\n",
383
- " 1,\n",
384
- " 7,\n",
385
- " 3,\n",
386
- " 6,\n",
387
- " 0,\n",
388
- " 7,\n",
389
- " 4,\n",
390
- " 6,\n",
391
- " 3,\n",
392
- " 0,\n",
393
- " 0,\n",
394
- " 4,\n",
395
- " 6,\n",
396
- " 6,\n",
397
- " 4,\n",
398
- " 0,\n",
399
- " 5,\n",
400
- " 7,\n",
401
- " 5,\n",
402
- " 1,\n",
403
- " 3,\n",
404
- " 6,\n",
405
- " 2,\n",
406
- " 3,\n",
407
- " 2,\n",
408
- " 4,\n",
409
- " 5,\n",
410
- " 1,\n",
411
- " 5,\n",
412
- " 0,\n",
413
- " 3,\n",
414
- " 3,\n",
415
- " 0,\n",
416
- " 0,\n",
417
- " 6,\n",
418
- " 6,\n",
419
- " 2,\n",
420
- " 0,\n",
421
- " 7,\n",
422
- " 4,\n",
423
- " 5,\n",
424
- " 7,\n",
425
- " 1,\n",
426
- " 0,\n",
427
- " 4,\n",
428
- " 5,\n",
429
- " 1,\n",
430
- " 7,\n",
431
- " 0,\n",
432
- " 7,\n",
433
- " 2,\n",
434
- " 6,\n",
435
- " 1,\n",
436
- " 3,\n",
437
- " 5,\n",
438
- " 5,\n",
439
- " 6,\n",
440
- " 5,\n",
441
- " 4,\n",
442
- " 3,\n",
443
- " 7,\n",
444
- " 4,\n",
445
- " 3,\n",
446
- " 5,\n",
447
- " 5,\n",
448
- " 7,\n",
449
- " 2,\n",
450
- " 6,\n",
451
- " 1,\n",
452
- " 5,\n",
453
- " 0,\n",
454
- " 3,\n",
455
- " 4,\n",
456
- " 2,\n",
457
- " 3,\n",
458
- " 7,\n",
459
- " 0,\n",
460
- " 1,\n",
461
- " 7,\n",
462
- " 6,\n",
463
- " 7,\n",
464
- " 7,\n",
465
- " 5,\n",
466
- " 6,\n",
467
- " 3,\n",
468
- " 2,\n",
469
- " 3,\n",
470
- " 0,\n",
471
- " 4,\n",
472
- " 3,\n",
473
- " 5,\n",
474
- " 6,\n",
475
- " 0,\n",
476
- " 0,\n",
477
- " 6,\n",
478
- " 6,\n",
479
- " 1,\n",
480
- " 4,\n",
481
- " 0,\n",
482
- " 4,\n",
483
- " 2,\n",
484
- " 7,\n",
485
- " 5,\n",
486
- " 7,\n",
487
- " 6,\n",
488
- " 3,\n",
489
- " 5,\n",
490
- " 6,\n",
491
- " 0,\n",
492
- " 4,\n",
493
- " 5,\n",
494
- " 6,\n",
495
- " 1,\n",
496
- " 2,\n",
497
- " 1,\n",
498
- " 5,\n",
499
- " 3,\n",
500
- " 0,\n",
501
- " 3,\n",
502
- " 7,\n",
503
- " 1,\n",
504
- " 0,\n",
505
- " 7,\n",
506
- " 0,\n",
507
- " 1,\n",
508
- " 0,\n",
509
- " 4,\n",
510
- " 1,\n",
511
- " 1,\n",
512
- " 0,\n",
513
- " 7,\n",
514
- " 1,\n",
515
- " 0,\n",
516
- " 7,\n",
517
- " 6,\n",
518
- " 2,\n",
519
- " 3,\n",
520
- " 7,\n",
521
- " 4,\n",
522
- " 3,\n",
523
- " 4,\n",
524
- " 3,\n",
525
- " 3,\n",
526
- " 2,\n",
527
- " 5,\n",
528
- " 1,\n",
529
- " 5,\n",
530
- " 1,\n",
531
- " 7,\n",
532
- " 3,\n",
533
- " 2,\n",
534
- " 6,\n",
535
- " 4,\n",
536
- " 4,\n",
537
- " 1,\n",
538
- " 2,\n",
539
- " 6,\n",
540
- " 7,\n",
541
- " 2,\n",
542
- " 7,\n",
543
- " 1,\n",
544
- " 3,\n",
545
- " 5,\n",
546
- " 2,\n",
547
- " 6,\n",
548
- " 4,\n",
549
- " 6,\n",
550
- " 7,\n",
551
- " 0,\n",
552
- " 5,\n",
553
- " 1,\n",
554
- " 6,\n",
555
- " 5,\n",
556
- " 3,\n",
557
- " 6,\n",
558
- " 5,\n",
559
- " 4,\n",
560
- " 7,\n",
561
- " 6,\n",
562
- " 5,\n",
563
- " 4,\n",
564
- " 3,\n",
565
- " 0,\n",
566
- " 0,\n",
567
- " 1,\n",
568
- " 7,\n",
569
- " 7,\n",
570
- " 6,\n",
571
- " 1,\n",
572
- " 4,\n",
573
- " 5,\n",
574
- " 6,\n",
575
- " 1,\n",
576
- " 5,\n",
577
- " 1,\n",
578
- " 2,\n",
579
- " 6,\n",
580
- " 2,\n",
581
- " 6,\n",
582
- " 0,\n",
583
- " 2,\n",
584
- " 1,\n",
585
- " 5,\n",
586
- " 5,\n",
587
- " 1,\n",
588
- " 7,\n",
589
- " 0,\n",
590
- " 5,\n",
591
- " 5,\n",
592
- " 1,\n",
593
- " 7,\n",
594
- " 7,\n",
595
- " 2,\n",
596
- " 1,\n",
597
- " 0,\n",
598
- " 1,\n",
599
- " 0,\n",
600
- " 5,\n",
601
- " 4,\n",
602
- " 2,\n",
603
- " 7,\n",
604
- " 4,\n",
605
- " 3,\n",
606
- " 6,\n",
607
- " 7,\n",
608
- " 5,\n",
609
- " 1,\n",
610
- " 0,\n",
611
- " 7,\n",
612
- " 2,\n",
613
- " 1,\n",
614
- " 2,\n",
615
- " 3,\n",
616
- " 1,\n",
617
- " 0,\n",
618
- " 3,\n",
619
- " 2,\n",
620
- " 6,\n",
621
- " 0,\n",
622
- " 5,\n",
623
- " 4,\n",
624
- " 7,\n",
625
- " 1,\n",
626
- " 1,\n",
627
- " 0,\n",
628
- " 7,\n",
629
- " 0,\n",
630
- " 6,\n",
631
- " 7,\n",
632
- " 6,\n",
633
- " 1,\n",
634
- " 5,\n",
635
- " 5,\n",
636
- " 7,\n",
637
- " 6,\n",
638
- " 1,\n",
639
- " 7,\n",
640
- " 6,\n",
641
- " 5,\n",
642
- " 4,\n",
643
- " 1,\n",
644
- " 4,\n",
645
- " 7,\n",
646
- " 5,\n",
647
- " 4,\n",
648
- " 0,\n",
649
- " 0,\n",
650
- " 7,\n",
651
- " 0,\n",
652
- " 0,\n",
653
- " 3,\n",
654
- " 6,\n",
655
- " 2,\n",
656
- " 5,\n",
657
- " 3,\n",
658
- " 0,\n",
659
- " 3,\n",
660
- " 6,\n",
661
- " 5,\n",
662
- " 7,\n",
663
- " 2,\n",
664
- " 6,\n",
665
- " 7,\n",
666
- " 5,\n",
667
- " 2,\n",
668
- " 3,\n",
669
- " 6,\n",
670
- " 7,\n",
671
- " 7,\n",
672
- " 7,\n",
673
- " 6,\n",
674
- " 1,\n",
675
- " 7,\n",
676
- " 4,\n",
677
- " 2,\n",
678
- " 7,\n",
679
- " 5,\n",
680
- " 4,\n",
681
- " 1,\n",
682
- " 2,\n",
683
- " 3,\n",
684
- " 7,\n",
685
- " 0,\n",
686
- " 2,\n",
687
- " 7,\n",
688
- " 6,\n",
689
- " 1,\n",
690
- " 4,\n",
691
- " 0,\n",
692
- " 6,\n",
693
- " 3,\n",
694
- " 1,\n",
695
- " 0,\n",
696
- " 3,\n",
697
- " 4,\n",
698
- " 7,\n",
699
- " 7,\n",
700
- " 4,\n",
701
- " 2,\n",
702
- " 1,\n",
703
- " 0,\n",
704
- " 5,\n",
705
- " 1,\n",
706
- " 7,\n",
707
- " 4,\n",
708
- " 6,\n",
709
- " 7,\n",
710
- " 7,\n",
711
- " 3,\n",
712
- " 4,\n",
713
- " 3,\n",
714
- " 5,\n",
715
- " 4,\n",
716
- " 4,\n",
717
- " 5,\n",
718
- " 0,\n",
719
- " 1,\n",
720
- " 3,\n",
721
- " 7,\n",
722
- " 5,\n",
723
- " 4,\n",
724
- " 7,\n",
725
- " 3,\n",
726
- " 3,\n",
727
- " 3,\n",
728
- " 5,\n",
729
- " 3,\n",
730
- " 3,\n",
731
- " 4,\n",
732
- " 0,\n",
733
- " 1,\n",
734
- " 7,\n",
735
- " 4,\n",
736
- " 7,\n",
737
- " 7,\n",
738
- " 5,\n",
739
- " 0,\n",
740
- " 0,\n",
741
- " 5,\n",
742
- " 2,\n",
743
- " 6,\n",
744
- " 2,\n",
745
- " 6,\n",
746
- " 7,\n",
747
- " 6,\n",
748
- " 5,\n",
749
- " 7,\n",
750
- " 5,\n",
751
- " 7,\n",
752
- " 1,\n",
753
- " 6,\n",
754
- " 6,\n",
755
- " 0,\n",
756
- " 4,\n",
757
- " 7,\n",
758
- " 3,\n",
759
- " 0,\n",
760
- " 0,\n",
761
- " 2,\n",
762
- " 5,\n",
763
- " 2,\n",
764
- " 3,\n",
765
- " 7,\n",
766
- " 1,\n",
767
- " 0,\n",
768
- " 3,\n",
769
- " 0,\n",
770
- " 0,\n",
771
- " 3,\n",
772
- " 3,\n",
773
- " 7,\n",
774
- " 3,\n",
775
- " 0,\n",
776
- " 1,\n",
777
- " 1,\n",
778
- " 6,\n",
779
- " 0,\n",
780
- " 0,\n",
781
- " 5,\n",
782
- " 0,\n",
783
- " 3,\n",
784
- " 4,\n",
785
- " 6,\n",
786
- " 7,\n",
787
- " 4,\n",
788
- " 0,\n",
789
- " 4,\n",
790
- " 4,\n",
791
- " 5,\n",
792
- " 4,\n",
793
- " 4,\n",
794
- " 3,\n",
795
- " 6,\n",
796
- " 5,\n",
797
- " 2,\n",
798
- " 0,\n",
799
- " 6,\n",
800
- " 0,\n",
801
- " 6,\n",
802
- " 4,\n",
803
- " 3,\n",
804
- " 5,\n",
805
- " 7,\n",
806
- " 7,\n",
807
- " 5,\n",
808
- " 5,\n",
809
- " 1,\n",
810
- " 5,\n",
811
- " 2,\n",
812
- " 7,\n",
813
- " 7,\n",
814
- " 6,\n",
815
- " 6,\n",
816
- " 7,\n",
817
- " 6,\n",
818
- " 5,\n",
819
- " 2,\n",
820
- " 4,\n",
821
- " 0,\n",
822
- " 4,\n",
823
- " 4,\n",
824
- " 7,\n",
825
- " 5,\n",
826
- " 2,\n",
827
- " 7,\n",
828
- " 0,\n",
829
- " 6,\n",
830
- " 0,\n",
831
- " 2,\n",
832
- " 6,\n",
833
- " 6,\n",
834
- " 2,\n",
835
- " 3,\n",
836
- " 0,\n",
837
- " 5,\n",
838
- " 0,\n",
839
- " 5,\n",
840
- " 7,\n",
841
- " 2,\n",
842
- " 7,\n",
843
- " 4,\n",
844
- " 7,\n",
845
- " 4,\n",
846
- " 0,\n",
847
- " 7,\n",
848
- " 1,\n",
849
- " 4,\n",
850
- " 5,\n",
851
- " 0,\n",
852
- " 5,\n",
853
- " 5,\n",
854
- " 2,\n",
855
- " 0,\n",
856
- " 2,\n",
857
- " 5,\n",
858
- " 5,\n",
859
- " 6,\n",
860
- " 3,\n",
861
- " 4,\n",
862
- " 1,\n",
863
- " 7,\n",
864
- " 7,\n",
865
- " 2,\n",
866
- " 3,\n",
867
- " 2,\n",
868
- " 5,\n",
869
- " 0,\n",
870
- " 7,\n",
871
- " 2,\n",
872
- " 3,\n",
873
- " 7,\n",
874
- " 2,\n",
875
- " 4,\n",
876
- " 0,\n",
877
- " 5,\n",
878
- " 7,\n",
879
- " 3,\n",
880
- " 6,\n",
881
- " 7,\n",
882
- " 6,\n",
883
- " 4,\n",
884
- " 3,\n",
885
- " 6,\n",
886
- " 5,\n",
887
- " 4,\n",
888
- " 0,\n",
889
- " 3,\n",
890
- " 4,\n",
891
- " 3,\n",
892
- " 5,\n",
893
- " 2,\n",
894
- " 4,\n",
895
- " 0,\n",
896
- " 3,\n",
897
- " 6,\n",
898
- " 1,\n",
899
- " 3,\n",
900
- " 1,\n",
901
- " 4,\n",
902
- " 3,\n",
903
- " 3,\n",
904
- " 3,\n",
905
- " 0,\n",
906
- " 7,\n",
907
- " 6,\n",
908
- " 2,\n",
909
- " 4,\n",
910
- " 6,\n",
911
- " 5,\n",
912
- " 4,\n",
913
- " 1,\n",
914
- " 7,\n",
915
- " 6,\n",
916
- " 1,\n",
917
- " 4,\n",
918
- " 3,\n",
919
- " 0,\n",
920
- " 7,\n",
921
- " 3,\n",
922
- " 1,\n",
923
- " 2,\n",
924
- " 1,\n",
925
- " 6,\n",
926
- " 4,\n",
927
- " 7,\n",
928
- " 1,\n",
929
- " 7,\n",
930
- " 1,\n",
931
- " 5,\n",
932
- " 1,\n",
933
- " 6,\n",
934
- " 3,\n",
935
- " 0,\n",
936
- " 2,\n",
937
- " 6,\n",
938
- " 7,\n",
939
- " 7,\n",
940
- " 0,\n",
941
- " 1,\n",
942
- " 4,\n",
943
- " 0,\n",
944
- " 4,\n",
945
- " 5,\n",
946
- " 3,\n",
947
- " 6,\n",
948
- " 2,\n",
949
- " 3,\n",
950
- " 4,\n",
951
- " 1,\n",
952
- " 6,\n",
953
- " 2,\n",
954
- " 4,\n",
955
- " 4,\n",
956
- " 6,\n",
957
- " 4,\n",
958
- " 5,\n",
959
- " 7,\n",
960
- " 1,\n",
961
- " 7,\n",
962
- " 7,\n",
963
- " 4,\n",
964
- " 7,\n",
965
- " 4,\n",
966
- " 3,\n",
967
- " 3,\n",
968
- " 6,\n",
969
- " 1,\n",
970
- " 2,\n",
971
- " 0,\n",
972
- " 0,\n",
973
- " 0,\n",
974
- " 2,\n",
975
- " 5,\n",
976
- " 6,\n",
977
- " 5,\n",
978
- " 7,\n",
979
- " 5,\n",
980
- " 7,\n",
981
- " 1,\n",
982
- " 1,\n",
983
- " 2,\n",
984
- " 1,\n",
985
- " 6,\n",
986
- " 5,\n",
987
- " 7,\n",
988
- " 0,\n",
989
- " 0,\n",
990
- " 5,\n",
991
- " 5,\n",
992
- " 0,\n",
993
- " 3,\n",
994
- " 7,\n",
995
- " 5,\n",
996
- " 2,\n",
997
- " 5,\n",
998
- " 4,\n",
999
- " 2,\n",
1000
- " 3,\n",
1001
- " 6,\n",
1002
- " 2,\n",
1003
- " 3,\n",
1004
- " 6,\n",
1005
- " 0,\n",
1006
- " 0,\n",
1007
- " 2,\n",
1008
- " 6,\n",
1009
- " 0,\n",
1010
- " 1,\n",
1011
- " 3,\n",
1012
- " 3,\n",
1013
- " 6,\n",
1014
- " 4,\n",
1015
- " 6,\n",
1016
- " 4,\n",
1017
- " 6,\n",
1018
- " 0,\n",
1019
- " 0,\n",
1020
- " 2,\n",
1021
- " 3,\n",
1022
- " 6,\n",
1023
- " 2,\n",
1024
- " 2,\n",
1025
- " 6,\n",
1026
- " 6,\n",
1027
- " 2,\n",
1028
- " 4,\n",
1029
- " 3,\n",
1030
- " 3,\n",
1031
- " 6,\n",
1032
- " 7,\n",
1033
- " 7,\n",
1034
- " 1,\n",
1035
- " 1,\n",
1036
- " 7,\n",
1037
- " 7,\n",
1038
- " 6,\n",
1039
- " 1,\n",
1040
- " 7,\n",
1041
- " 0,\n",
1042
- " 0,\n",
1043
- " 2,\n",
1044
- " 4,\n",
1045
- " 2,\n",
1046
- " 2,\n",
1047
- " 3,\n",
1048
- " 0,\n",
1049
- " 1,\n",
1050
- " 4,\n",
1051
- " 0,\n",
1052
- " 4,\n",
1053
- " 6,\n",
1054
- " 5,\n",
1055
- " 3,\n",
1056
- " 2,\n",
1057
- " 3,\n",
1058
- " 2,\n",
1059
- " 3,\n",
1060
- " 6,\n",
1061
- " 2,\n",
1062
- " 1,\n",
1063
- " 4,\n",
1064
- " 7,\n",
1065
- " 6,\n",
1066
- " 4,\n",
1067
- " 5,\n",
1068
- " 6,\n",
1069
- " 7,\n",
1070
- " 7,\n",
1071
- " 2,\n",
1072
- " 0,\n",
1073
- " 5,\n",
1074
- " 5,\n",
1075
- " 0,\n",
1076
- " 3,\n",
1077
- " 6,\n",
1078
- " 6,\n",
1079
- " 5,\n",
1080
- " 4,\n",
1081
- " 4,\n",
1082
- " 7,\n",
1083
- " 0,\n",
1084
- " 5,\n",
1085
- " 1,\n",
1086
- " 7,\n",
1087
- " 0,\n",
1088
- " 3,\n",
1089
- " 1,\n",
1090
- " 7,\n",
1091
- " 0,\n",
1092
- " 1,\n",
1093
- " 4,\n",
1094
- " 7,\n",
1095
- " 5,\n",
1096
- " 0,\n",
1097
- " 4,\n",
1098
- " 0,\n",
1099
- " 0,\n",
1100
- " 1,\n",
1101
- " 0,\n",
1102
- " 6,\n",
1103
- " 4,\n",
1104
- " 0,\n",
1105
- " 5,\n",
1106
- " 4,\n",
1107
- " 6,\n",
1108
- " 6,\n",
1109
- " 7,\n",
1110
- " 2,\n",
1111
- " 6,\n",
1112
- " 2,\n",
1113
- " 6,\n",
1114
- " 0,\n",
1115
- " 3,\n",
1116
- " 2,\n",
1117
- " 2,\n",
1118
- " 1,\n",
1119
- " 5,\n",
1120
- " 4,\n",
1121
- " 7,\n",
1122
- " 6,\n",
1123
- " 6,\n",
1124
- " 2,\n",
1125
- " 5,\n",
1126
- " 5,\n",
1127
- " 5,\n",
1128
- " 0,\n",
1129
- " 3,\n",
1130
- " 5,\n",
1131
- " 4,\n",
1132
- " 5,\n",
1133
- " 7,\n",
1134
- " 5,\n",
1135
- " 0,\n",
1136
- " 5,\n",
1137
- " 0,\n",
1138
- " 0,\n",
1139
- " 2,\n",
1140
- " 0,\n",
1141
- " 2,\n",
1142
- " 1,\n",
1143
- " 0,\n",
1144
- " 2,\n",
1145
- " 4,\n",
1146
- " 3,\n",
1147
- " 4,\n",
1148
- " 1,\n",
1149
- " 7,\n",
1150
- " 2,\n",
1151
- " 1,\n",
1152
- " 0,\n",
1153
- " 3,\n",
1154
- " 0,\n",
1155
- " 3,\n",
1156
- " 1,\n",
1157
- " 1,\n",
1158
- " 0,\n",
1159
- " 5,\n",
1160
- " 3,\n",
1161
- " 1,\n",
1162
- " 2,\n",
1163
- " 5,\n",
1164
- " 6,\n",
1165
- " 7,\n",
1166
- " 6,\n",
1167
- " 7,\n",
1168
- " 0,\n",
1169
- " 2,\n",
1170
- " 6,\n",
1171
- " 3,\n",
1172
- " 1,\n",
1173
- " 5,\n",
1174
- " 4,\n",
1175
- " 2,\n",
1176
- " 4,\n",
1177
- " 6,\n",
1178
- " 5,\n",
1179
- " 2,\n",
1180
- " 7,\n",
1181
- " ...]"
1182
- ]
1183
- },
1184
- "execution_count": 6,
1185
- "metadata": {},
1186
- "output_type": "execute_result"
1187
- }
1188
- ],
1189
- "source": [
1190
- "\n",
1191
- "#--------------------------------------------------------------------------------------------\n",
1192
- "# YOUR MODEL INFERENCE CODE HERE\n",
1193
- "# Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.\n",
1194
- "#-------------------------------------------------------------------------------------------- \n",
1195
- "\n",
1196
- "# Make random predictions (placeholder for actual model inference)\n",
1197
- "true_labels = test_dataset[\"label\"]\n",
1198
- "predictions = [random.randint(0, 1) for _ in range(len(true_labels))]\n",
1199
- "\n",
1200
- "predictions\n",
1201
- "\n",
1202
- "#--------------------------------------------------------------------------------------------\n",
1203
- "# YOUR MODEL INFERENCE STOPS HERE\n",
1204
- "#-------------------------------------------------------------------------------------------- "
1205
- ]
1206
- },
1207
- {
1208
- "cell_type": "code",
1209
- "execution_count": 8,
1210
- "metadata": {},
1211
- "outputs": [
1212
- {
1213
- "name": "stderr",
1214
- "output_type": "stream",
1215
- "text": [
1216
- "[codecarbon WARNING @ 19:53:32] Background scheduler didn't run for a long period (47s), results might be inaccurate\n",
1217
- "[codecarbon INFO @ 19:53:32] Energy consumed for RAM : 0.000156 kWh. RAM Power : 11.755242347717285 W\n",
1218
- "[codecarbon INFO @ 19:53:32] Delta energy consumed for CPU with constant : 0.000564 kWh, power : 42.5 W\n",
1219
- "[codecarbon INFO @ 19:53:32] Energy consumed for All CPU : 0.000564 kWh\n",
1220
- "[codecarbon INFO @ 19:53:32] 0.000720 kWh of electricity used since the beginning.\n"
1221
- ]
1222
- },
1223
- {
1224
- "data": {
1225
- "text/plain": [
1226
- "EmissionsData(timestamp='2025-01-21T19:53:32', project_name='codecarbon', run_id='908f2e7e-4bb2-4991-a0f6-56bf8d7eda21', experiment_id='5b0fa12a-3dd7-45bb-9766-cc326314d9f1', duration=47.736408500000834, emissions=4.032368007471064e-05, emissions_rate=8.444466886328872e-07, cpu_power=42.5, gpu_power=0.0, ram_power=11.755242347717285, cpu_energy=0.0005636615353475565, gpu_energy=0, ram_energy=0.00015590305493261682, energy_consumed=0.0007195645902801733, country_name='France', country_iso_code='FRA', region='île-de-france', cloud_provider='', cloud_region='', os='Windows-11-10.0.22631-SP0', python_version='3.12.7', codecarbon_version='3.0.0_rc0', cpu_count=12, cpu_model='13th Gen Intel(R) Core(TM) i7-1365U', gpu_count=None, gpu_model=None, longitude=2.3494, latitude=48.8558, ram_total_size=31.347312927246094, tracking_mode='machine', on_cloud='N', pue=1.0)"
1227
- ]
1228
- },
1229
- "execution_count": 8,
1230
- "metadata": {},
1231
- "output_type": "execute_result"
1232
- }
1233
- ],
1234
- "source": [
1235
- "# Stop tracking emissions\n",
1236
- "emissions_data = tracker.stop_task()\n",
1237
- "emissions_data"
1238
- ]
1239
- },
1240
- {
1241
- "cell_type": "code",
1242
- "execution_count": 9,
1243
- "metadata": {},
1244
- "outputs": [
1245
- {
1246
- "data": {
1247
- "text/plain": [
1248
- "0.10090237899917966"
1249
- ]
1250
- },
1251
- "execution_count": 9,
1252
- "metadata": {},
1253
- "output_type": "execute_result"
1254
- }
1255
- ],
1256
- "source": [
1257
- "# Calculate accuracy\n",
1258
- "accuracy = accuracy_score(true_labels, predictions)\n",
1259
- "accuracy"
1260
- ]
1261
- },
1262
- {
1263
- "cell_type": "code",
1264
- "execution_count": 10,
1265
- "metadata": {},
1266
- "outputs": [
1267
- {
1268
- "data": {
1269
- "text/plain": [
1270
- "{'submission_timestamp': '2025-01-21T19:53:46.639165',\n",
1271
- " 'accuracy': 0.10090237899917966,\n",
1272
- " 'energy_consumed_wh': 0.7195645902801733,\n",
1273
- " 'emissions_gco2eq': 0.040323680074710634,\n",
1274
- " 'emissions_data': {'run_id': '908f2e7e-4bb2-4991-a0f6-56bf8d7eda21',\n",
1275
- " 'duration': 47.736408500000834,\n",
1276
- " 'emissions': 4.032368007471064e-05,\n",
1277
- " 'emissions_rate': 8.444466886328872e-07,\n",
1278
- " 'cpu_power': 42.5,\n",
1279
- " 'gpu_power': 0.0,\n",
1280
- " 'ram_power': 11.755242347717285,\n",
1281
- " 'cpu_energy': 0.0005636615353475565,\n",
1282
- " 'gpu_energy': 0,\n",
1283
- " 'ram_energy': 0.00015590305493261682,\n",
1284
- " 'energy_consumed': 0.0007195645902801733,\n",
1285
- " 'country_name': 'France',\n",
1286
- " 'country_iso_code': 'FRA',\n",
1287
- " 'region': 'île-de-france',\n",
1288
- " 'cloud_provider': '',\n",
1289
- " 'cloud_region': '',\n",
1290
- " 'os': 'Windows-11-10.0.22631-SP0',\n",
1291
- " 'python_version': '3.12.7',\n",
1292
- " 'codecarbon_version': '3.0.0_rc0',\n",
1293
- " 'cpu_count': 12,\n",
1294
- " 'cpu_model': '13th Gen Intel(R) Core(TM) i7-1365U',\n",
1295
- " 'gpu_count': None,\n",
1296
- " 'gpu_model': None,\n",
1297
- " 'ram_total_size': 31.347312927246094,\n",
1298
- " 'tracking_mode': 'machine',\n",
1299
- " 'on_cloud': 'N',\n",
1300
- " 'pue': 1.0},\n",
1301
- " 'dataset_config': {'dataset_name': 'QuotaClimat/frugalaichallenge-text-train',\n",
1302
- " 'test_size': 0.2,\n",
1303
- " 'test_seed': 42}}"
1304
- ]
1305
- },
1306
- "execution_count": 10,
1307
- "metadata": {},
1308
- "output_type": "execute_result"
1309
- }
1310
- ],
1311
- "source": [
1312
- "# Prepare results dictionary\n",
1313
- "results = {\n",
1314
- " \"submission_timestamp\": datetime.now().isoformat(),\n",
1315
- " \"accuracy\": float(accuracy),\n",
1316
- " \"energy_consumed_wh\": emissions_data.energy_consumed * 1000,\n",
1317
- " \"emissions_gco2eq\": emissions_data.emissions * 1000,\n",
1318
- " \"emissions_data\": clean_emissions_data(emissions_data),\n",
1319
- " \"dataset_config\": {\n",
1320
- " \"dataset_name\": request.dataset_name,\n",
1321
- " \"test_size\": request.test_size,\n",
1322
- " \"test_seed\": request.test_seed\n",
1323
- " }\n",
1324
- "}\n",
1325
- "\n",
1326
- "results"
1327
- ]
1328
- }
1329
- ],
1330
- "metadata": {
1331
- "kernelspec": {
1332
- "display_name": "base",
1333
- "language": "python",
1334
- "name": "python3"
1335
- },
1336
- "language_info": {
1337
- "codemirror_mode": {
1338
- "name": "ipython",
1339
- "version": 3
1340
- },
1341
- "file_extension": ".py",
1342
- "mimetype": "text/x-python",
1343
- "name": "python",
1344
- "nbconvert_exporter": "python",
1345
- "pygments_lexer": "ipython3",
1346
- "version": "3.12.7"
1347
- }
1348
- },
1349
- "nbformat": 4,
1350
- "nbformat_minor": 2
1351
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
notebooks/template-image.ipynb DELETED
@@ -1,475 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "markdown",
5
- "metadata": {},
6
- "source": [
7
- "# 🚧 Info\n",
8
- "\n",
9
- "https://huggingface.co/datasets/pyronear/pyro-sdis\n",
10
- "\n",
11
- "https://frugalaichallenge.org/participate/"
12
- ]
13
- },
14
- {
15
- "cell_type": "markdown",
16
- "metadata": {},
17
- "source": [
18
- "# Image task notebook template\n",
19
- "## Loading the necessary libraries"
20
- ]
21
- },
22
- {
23
- "cell_type": "code",
24
- "execution_count": 1,
25
- "metadata": {},
26
- "outputs": [
27
- {
28
- "ename": "ModuleNotFoundError",
29
- "evalue": "No module named 'tasks'",
30
- "output_type": "error",
31
- "traceback": [
32
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
33
- "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
34
- "Cell \u001b[0;32mIn[1], line 9\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mrandom\u001b[39;00m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;66;03m# import sys\u001b[39;00m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;66;03m# sys.path.append('../')\u001b[39;00m\n\u001b[0;32m----> 9\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mtasks\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mevaluation\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m ImageEvaluationRequest\n\u001b[1;32m 10\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mtasks\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01memissions\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m tracker, clean_emissions_data, get_space_info\n\u001b[1;32m 11\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mtasks\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mload_data\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m load_data\n",
35
- "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'tasks'"
36
- ]
37
- }
38
- ],
39
- "source": [
40
- "from fastapi import APIRouter\n",
41
- "from datetime import datetime\n",
42
- "from sklearn.metrics import accuracy_score, precision_score, recall_score\n",
43
- "\n",
44
- "import random\n",
45
- "# import sys\n",
46
- "# sys.path.append('../')\n",
47
- "\n",
48
- "from tasks.utils.evaluation import ImageEvaluationRequest\n",
49
- "from tasks.utils.emissions import tracker, clean_emissions_data, get_space_info\n",
50
- "from tasks.utils.load_data import load_data\n",
51
- "from tasks.image import parse_boxes,compute_iou,compute_max_iou"
52
- ]
53
- },
54
- {
55
- "cell_type": "markdown",
56
- "metadata": {},
57
- "source": [
58
- "## Loading the datasets and splitting them"
59
- ]
60
- },
61
- {
62
- "cell_type": "code",
63
- "execution_count": 2,
64
- "metadata": {},
65
- "outputs": [],
66
- "source": [
67
- "request = ImageEvaluationRequest()\n",
68
- "# Define paths\n",
69
- "REPO_ID = request.dataset_name\n",
70
- "OUTPUT_DIR = \"../pyro-sdis\""
71
- ]
72
- },
73
- {
74
- "cell_type": "markdown",
75
- "metadata": {},
76
- "source": [
77
- "## 🚧 Code JL"
78
- ]
79
- },
80
- {
81
- "cell_type": "markdown",
82
- "metadata": {},
83
- "source": [
84
- "**Export Dataset**: Use the following function to save the dataset in Ultralytics format:"
85
- ]
86
- },
87
- {
88
- "cell_type": "code",
89
- "execution_count": null,
90
- "metadata": {},
91
- "outputs": [],
92
- "source": [
93
- "# Load and prepare dataset\n",
94
- "ds = load_data(REPO_ID, OUTPUT_DIR)"
95
- ]
96
- },
97
- {
98
- "cell_type": "code",
99
- "execution_count": null,
100
- "metadata": {},
101
- "outputs": [
102
- {
103
- "name": "stderr",
104
- "output_type": "stream",
105
- "text": [
106
- "Generating train split: 100%|██████████| 29537/29537 [00:03<00:00, 7616.82 examples/s]\n",
107
- "Generating val split: 100%|██████████| 4099/4099 [00:00<00:00, 10697.80 examples/s]\n"
108
- ]
109
- }
110
- ],
111
- "source": [
112
- "# # Create the directory structure\n",
113
- "# for split in [\"train\", \"val\"]:\n",
114
- "# os.makedirs(os.path.join(IMAGE_DIR, split), exist_ok=True)\n",
115
- "# os.makedirs(os.path.join(LABEL_DIR, split), exist_ok=True)\n",
116
- "\n",
117
- "# # Load the dataset from the Hugging Face Hub\n",
118
- "# dataset = load_dataset(REPO_ID)\n",
119
- "\n",
120
- "# # Save in Ultralytics format\n",
121
- "# def save_ultralytics_format(dataset_split, split):\n",
122
- "# \"\"\"\n",
123
- "# Save a dataset split into the Ultralytics format.\n",
124
- "# Args:\n",
125
- "# dataset_split: The dataset split (e.g., dataset[\"train\"])\n",
126
- "# split: \"train\" or \"val\"\n",
127
- "# \"\"\"\n",
128
- "# for example in dataset_split:\n",
129
- "# # Save the image to the appropriate folder\n",
130
- "# image = example[\"image\"] # PIL.Image.Image\n",
131
- "# image_name = example[\"image_name\"] # Original file name\n",
132
- "# output_image_path = os.path.join(IMAGE_DIR, split, image_name)\n",
133
- "\n",
134
- "# # Save the image object to disk\n",
135
- "# image.save(output_image_path)\n",
136
- "\n",
137
- "# # Save label\n",
138
- "# annotations = example[\"annotations\"]\n",
139
- "# label_name = image_name.replace(\".jpg\", \".txt\").replace(\".png\", \".txt\")\n",
140
- "# output_label_path = os.path.join(LABEL_DIR, split, label_name)\n",
141
- " \n",
142
- "# with open(output_label_path, \"w\") as label_file:\n",
143
- "# label_file.write(annotations)\n",
144
- "\n",
145
- "# # Save train and validation splits\n",
146
- "# save_ultralytics_format(dataset[\"train\"], \"train\")\n",
147
- "# save_ultralytics_format(dataset[\"val\"], \"val\")\n",
148
- "\n",
149
- "# print(\"Dataset exported to Ultralytics format.\")"
150
- ]
151
- },
152
- {
153
- "cell_type": "markdown",
154
- "metadata": {},
155
- "source": [
156
- "**Training** with Ultralytics YOLO"
157
- ]
158
- },
159
- {
160
- "cell_type": "code",
161
- "execution_count": null,
162
- "metadata": {},
163
- "outputs": [],
164
- "source": [
165
- "# from huggingface_hub import hf_hub_download\n",
166
- "\n",
167
- "# # Correctly set repo_id and repo_type\n",
168
- "# repo_id = \"pyronear/pyro-sdis\"\n",
169
- "# filename = \"data.yaml\"\n",
170
- "\n",
171
- "# # Download data.yaml to the current directory\n",
172
- "# yaml_path = hf_hub_download(repo_id=repo_id, filename=filename, repo_type=\"dataset\", local_dir=\".\")\n",
173
- "# print(f\"data.yaml downloaded to: {yaml_path}\")"
174
- ]
175
- },
176
- {
177
- "cell_type": "markdown",
178
- "metadata": {},
179
- "source": [
180
- "Train with Yolo (command line)"
181
- ]
182
- },
183
- {
184
- "cell_type": "code",
185
- "execution_count": null,
186
- "metadata": {},
187
- "outputs": [],
188
- "source": [
189
- "# yolo task=detect mode=train data=data.yaml model=yolov8n.pt epochs=50 imgsz=640 single_cls=True"
190
- ]
191
- },
192
- {
193
- "cell_type": "markdown",
194
- "metadata": {},
195
- "source": [
196
- "## 🚧 fin Code JL"
197
- ]
198
- },
199
- {
200
- "cell_type": "code",
201
- "execution_count": null,
202
- "metadata": {},
203
- "outputs": [],
204
- "source": [
205
- "# Split dataset\n",
206
- "train_test = dataset[\"train\"].train_test_split(test_size=request.test_size, seed=request.test_seed)\n",
207
- "test_dataset = train_test[\"test\"]"
208
- ]
209
- },
210
- {
211
- "cell_type": "code",
212
- "execution_count": 7,
213
- "metadata": {},
214
- "outputs": [
215
- {
216
- "data": {
217
- "text/plain": [
218
- "Dataset({\n",
219
- " features: ['image', 'annotations', 'image_name', 'partner', 'camera', 'date'],\n",
220
- " num_rows: 29537\n",
221
- "})"
222
- ]
223
- },
224
- "execution_count": 7,
225
- "metadata": {},
226
- "output_type": "execute_result"
227
- }
228
- ],
229
- "source": [
230
- "dataset[\"train\"]"
231
- ]
232
- },
233
- {
234
- "cell_type": "code",
235
- "execution_count": 14,
236
- "metadata": {},
237
- "outputs": [
238
- {
239
- "data": {
240
- "text/plain": [
241
- "datasets.dataset_dict.DatasetDict"
242
- ]
243
- },
244
- "execution_count": 14,
245
- "metadata": {},
246
- "output_type": "execute_result"
247
- }
248
- ],
249
- "source": [
250
- "type(dataset)"
251
- ]
252
- },
253
- {
254
- "cell_type": "markdown",
255
- "metadata": {},
256
- "source": [
257
- "## Random Baseline"
258
- ]
259
- },
260
- {
261
- "cell_type": "code",
262
- "execution_count": 8,
263
- "metadata": {},
264
- "outputs": [
265
- {
266
- "name": "stderr",
267
- "output_type": "stream",
268
- "text": [
269
- "[codecarbon WARNING @ 17:11:39] Already started tracking\n",
270
- "[codecarbon INFO @ 17:11:39] A task is already under measure\n"
271
- ]
272
- }
273
- ],
274
- "source": [
275
- "# Start tracking emissions\n",
276
- "tracker.start()\n",
277
- "tracker.start_task(\"inference\")"
278
- ]
279
- },
280
- {
281
- "cell_type": "code",
282
- "execution_count": 11,
283
- "metadata": {},
284
- "outputs": [],
285
- "source": [
286
- "\n",
287
- "#--------------------------------------------------------------------------------------------\n",
288
- "# YOUR MODEL INFERENCE CODE HERE\n",
289
- "# Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.\n",
290
- "#--------------------------------------------------------------------------------------------\n",
291
- "\n",
292
- "# Make random predictions (placeholder for actual model inference)\n",
293
- "\n",
294
- "predictions = []\n",
295
- "true_labels = []\n",
296
- "pred_boxes = []\n",
297
- "true_boxes_list = [] # List of lists, each inner list contains boxes for one image\n",
298
- "\n",
299
- "for example in test_dataset:\n",
300
- " # Parse true annotation (YOLO format: class_id x_center y_center width height)\n",
301
- " annotation = example.get(\"annotations\", \"\").strip()\n",
302
- " has_smoke = len(annotation) > 0\n",
303
- " true_labels.append(int(has_smoke))\n",
304
- " \n",
305
- " # Make random classification prediction\n",
306
- " pred_has_smoke = random.random() > 0.5\n",
307
- " predictions.append(int(pred_has_smoke))\n",
308
- " \n",
309
- " # If there's a true box, parse it and make random box prediction\n",
310
- " if has_smoke:\n",
311
- " # Parse all true boxes from the annotation\n",
312
- " image_true_boxes = parse_boxes(annotation)\n",
313
- " true_boxes_list.append(image_true_boxes)\n",
314
- " \n",
315
- " # For baseline, make one random box prediction per image\n",
316
- " # In a real model, you might want to predict multiple boxes\n",
317
- " random_box = [\n",
318
- " random.random(), # x_center\n",
319
- " random.random(), # y_center\n",
320
- " random.random() * 0.5, # width (max 0.5)\n",
321
- " random.random() * 0.5 # height (max 0.5)\n",
322
- " ]\n",
323
- " pred_boxes.append(random_box)\n",
324
- "\n",
325
- "\n",
326
- "#--------------------------------------------------------------------------------------------\n",
327
- "# YOUR MODEL INFERENCE STOPS HERE\n",
328
- "#-------------------------------------------------------------------------------------------- "
329
- ]
330
- },
331
- {
332
- "cell_type": "code",
333
- "execution_count": 9,
334
- "metadata": {},
335
- "outputs": [
336
- {
337
- "name": "stderr",
338
- "output_type": "stream",
339
- "text": [
340
- "[codecarbon WARNING @ 17:12:24] Background scheduler didn't run for a long period (1885s), results might be inaccurate\n",
341
- "[codecarbon INFO @ 17:12:24] Energy consumed for RAM : 0.003142 kWh. RAM Power : 6.0 W\n",
342
- "[codecarbon INFO @ 17:12:24] Energy consumed for all CPUs : 0.002618 kWh. Total CPU Power : 5.0 W\n",
343
- "[codecarbon INFO @ 17:12:24] 0.005760 kWh of electricity used since the beginning.\n"
344
- ]
345
- }
346
- ],
347
- "source": [
348
- "# Stop tracking emissions\n",
349
- "emissions_data = tracker.stop_task()"
350
- ]
351
- },
352
- {
353
- "cell_type": "code",
354
- "execution_count": 12,
355
- "metadata": {},
356
- "outputs": [],
357
- "source": [
358
- "import numpy as np\n",
359
- "\n",
360
- "# Calculate classification metrics\n",
361
- "classification_accuracy = accuracy_score(true_labels, predictions)\n",
362
- "classification_precision = precision_score(true_labels, predictions)\n",
363
- "classification_recall = recall_score(true_labels, predictions)\n",
364
- "\n",
365
- "# Calculate mean IoU for object detection (only for images with smoke)\n",
366
- "# For each image, we compute the max IoU between the predicted box and all true boxes\n",
367
- "ious = []\n",
368
- "for true_boxes, pred_box in zip(true_boxes_list, pred_boxes):\n",
369
- " max_iou = compute_max_iou(true_boxes, pred_box)\n",
370
- " ious.append(max_iou)\n",
371
- "\n",
372
- "mean_iou = float(np.mean(ious)) if ious else 0.0"
373
- ]
374
- },
375
- {
376
- "cell_type": "code",
377
- "execution_count": 13,
378
- "metadata": {},
379
- "outputs": [
380
- {
381
- "data": {
382
- "text/plain": [
383
- "{'submission_timestamp': '2025-01-23T17:13:47.158903',\n",
384
- " 'classification_accuracy': 0.4974610697359513,\n",
385
- " 'classification_precision': 0.8362892223738063,\n",
386
- " 'classification_recall': 0.49625581866019025,\n",
387
- " 'mean_iou': 0.0026954029097350594,\n",
388
- " 'energy_consumed_wh': 5.759879923426909,\n",
389
- " 'emissions_gco2eq': 0.2006914961719638,\n",
390
- " 'emissions_data': {'run_id': 'fbab9dd9-2893-4216-91c4-232be358d4dd',\n",
391
- " 'duration': 1885.054949500016,\n",
392
- " 'emissions': 0.0002006914961719638,\n",
393
- " 'emissions_rate': 1.0646457428260931e-07,\n",
394
- " 'cpu_power': 5.0,\n",
395
- " 'gpu_power': 0.0,\n",
396
- " 'ram_power': 6.0,\n",
397
- " 'cpu_energy': 0.002618128800231918,\n",
398
- " 'gpu_energy': 0,\n",
399
- " 'ram_energy': 0.0031417511231949906,\n",
400
- " 'energy_consumed': 0.005759879923426909,\n",
401
- " 'country_name': 'Switzerland',\n",
402
- " 'country_iso_code': 'CHE',\n",
403
- " 'region': 'zurich',\n",
404
- " 'cloud_provider': '',\n",
405
- " 'cloud_region': '',\n",
406
- " 'os': 'macOS-15.2-arm64-arm-64bit',\n",
407
- " 'python_version': '3.12.7',\n",
408
- " 'codecarbon_version': '2.8.3',\n",
409
- " 'cpu_count': 8,\n",
410
- " 'cpu_model': 'Apple M1',\n",
411
- " 'gpu_count': None,\n",
412
- " 'gpu_model': None,\n",
413
- " 'ram_total_size': 16.0,\n",
414
- " 'tracking_mode': 'machine',\n",
415
- " 'on_cloud': 'N',\n",
416
- " 'pue': 1.0},\n",
417
- " 'dataset_config': {'dataset_name': 'pyronear/pyro-sdis',\n",
418
- " 'test_size': 0.2,\n",
419
- " 'test_seed': 42}}"
420
- ]
421
- },
422
- "execution_count": 13,
423
- "metadata": {},
424
- "output_type": "execute_result"
425
- }
426
- ],
427
- "source": [
428
- "\n",
429
- "# Prepare results dictionary\n",
430
- "results = {\n",
431
- " \"submission_timestamp\": datetime.now().isoformat(),\n",
432
- " \"classification_accuracy\": float(classification_accuracy),\n",
433
- " \"classification_precision\": float(classification_precision),\n",
434
- " \"classification_recall\": float(classification_recall),\n",
435
- " \"mean_iou\": mean_iou,\n",
436
- " \"energy_consumed_wh\": emissions_data.energy_consumed * 1000,\n",
437
- " \"emissions_gco2eq\": emissions_data.emissions * 1000,\n",
438
- " \"emissions_data\": clean_emissions_data(emissions_data),\n",
439
- " \"dataset_config\": {\n",
440
- " \"dataset_name\": request.dataset_name,\n",
441
- " \"test_size\": request.test_size,\n",
442
- " \"test_seed\": request.test_seed\n",
443
- " }\n",
444
- "}\n",
445
- "results"
446
- ]
447
- },
448
- {
449
- "cell_type": "markdown",
450
- "metadata": {},
451
- "source": []
452
- }
453
- ],
454
- "metadata": {
455
- "kernelspec": {
456
- "display_name": ".venv",
457
- "language": "python",
458
- "name": "python3"
459
- },
460
- "language_info": {
461
- "codemirror_mode": {
462
- "name": "ipython",
463
- "version": 3
464
- },
465
- "file_extension": ".py",
466
- "mimetype": "text/x-python",
467
- "name": "python",
468
- "nbconvert_exporter": "python",
469
- "pygments_lexer": "ipython3",
470
- "version": "3.12.7"
471
- }
472
- },
473
- "nbformat": 4,
474
- "nbformat_minor": 2
475
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
notebooks/template-text.ipynb DELETED
@@ -1,1642 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "markdown",
5
- "metadata": {},
6
- "source": [
7
- "# Text task notebook template\n",
8
- "## Loading the necessary libraries"
9
- ]
10
- },
11
- {
12
- "cell_type": "code",
13
- "execution_count": 3,
14
- "metadata": {},
15
- "outputs": [
16
- {
17
- "name": "stderr",
18
- "output_type": "stream",
19
- "text": [
20
- "[codecarbon WARNING @ 19:48:07] Multiple instances of codecarbon are allowed to run at the same time.\n",
21
- "[codecarbon INFO @ 19:48:07] [setup] RAM Tracking...\n",
22
- "[codecarbon INFO @ 19:48:07] [setup] CPU Tracking...\n",
23
- "[codecarbon WARNING @ 19:48:09] We saw that you have a 13th Gen Intel(R) Core(TM) i7-1365U but we don't know it. Please contact us.\n",
24
- "[codecarbon WARNING @ 19:48:09] No CPU tracking mode found. Falling back on CPU constant mode. \n",
25
- " Windows OS detected: Please install Intel Power Gadget to measure CPU\n",
26
- "\n",
27
- "[codecarbon WARNING @ 19:48:11] We saw that you have a 13th Gen Intel(R) Core(TM) i7-1365U but we don't know it. Please contact us.\n",
28
- "[codecarbon INFO @ 19:48:11] CPU Model on constant consumption mode: 13th Gen Intel(R) Core(TM) i7-1365U\n",
29
- "[codecarbon WARNING @ 19:48:11] No CPU tracking mode found. Falling back on CPU constant mode.\n",
30
- "[codecarbon INFO @ 19:48:11] [setup] GPU Tracking...\n",
31
- "[codecarbon INFO @ 19:48:11] No GPU found.\n",
32
- "[codecarbon INFO @ 19:48:11] >>> Tracker's metadata:\n",
33
- "[codecarbon INFO @ 19:48:11] Platform system: Windows-11-10.0.22631-SP0\n",
34
- "[codecarbon INFO @ 19:48:11] Python version: 3.12.7\n",
35
- "[codecarbon INFO @ 19:48:11] CodeCarbon version: 3.0.0_rc0\n",
36
- "[codecarbon INFO @ 19:48:11] Available RAM : 31.347 GB\n",
37
- "[codecarbon INFO @ 19:48:11] CPU count: 12\n",
38
- "[codecarbon INFO @ 19:48:11] CPU model: 13th Gen Intel(R) Core(TM) i7-1365U\n",
39
- "[codecarbon INFO @ 19:48:11] GPU count: None\n",
40
- "[codecarbon INFO @ 19:48:11] GPU model: None\n",
41
- "[codecarbon INFO @ 19:48:11] Saving emissions data to file c:\\git\\submission-template\\notebooks\\emissions.csv\n"
42
- ]
43
- }
44
- ],
45
- "source": [
46
- "from fastapi import APIRouter\n",
47
- "from datetime import datetime\n",
48
- "from datasets import load_dataset\n",
49
- "from sklearn.metrics import accuracy_score\n",
50
- "import random\n",
51
- "\n",
52
- "import sys\n",
53
- "sys.path.append('../tasks')\n",
54
- "\n",
55
- "from utils.evaluation import TextEvaluationRequest\n",
56
- "from utils.emissions import tracker, clean_emissions_data, get_space_info\n",
57
- "\n",
58
- "\n",
59
- "# Define the label mapping\n",
60
- "LABEL_MAPPING = {\n",
61
- " \"0_not_relevant\": 0,\n",
62
- " \"1_not_happening\": 1,\n",
63
- " \"2_not_human\": 2,\n",
64
- " \"3_not_bad\": 3,\n",
65
- " \"4_solutions_harmful_unnecessary\": 4,\n",
66
- " \"5_science_unreliable\": 5,\n",
67
- " \"6_proponents_biased\": 6,\n",
68
- " \"7_fossil_fuels_needed\": 7\n",
69
- "}"
70
- ]
71
- },
72
- {
73
- "cell_type": "markdown",
74
- "metadata": {},
75
- "source": [
76
- "## Loading the datasets and splitting them"
77
- ]
78
- },
79
- {
80
- "cell_type": "code",
81
- "execution_count": 4,
82
- "metadata": {},
83
- "outputs": [
84
- {
85
- "data": {
86
- "application/vnd.jupyter.widget-view+json": {
87
- "model_id": "668da7bf85434e098b95c3ec447d78fe",
88
- "version_major": 2,
89
- "version_minor": 0
90
- },
91
- "text/plain": [
92
- "README.md: 0%| | 0.00/5.18k [00:00<?, ?B/s]"
93
- ]
94
- },
95
- "metadata": {},
96
- "output_type": "display_data"
97
- },
98
- {
99
- "name": "stderr",
100
- "output_type": "stream",
101
- "text": [
102
- "c:\\Users\\theo.alvesdacosta\\AppData\\Local\\anaconda3\\Lib\\site-packages\\huggingface_hub\\file_download.py:139: UserWarning: `huggingface_hub` cache-system uses symlinks by default to efficiently store duplicated files but your machine does not support them in C:\\Users\\theo.alvesdacosta\\.cache\\huggingface\\hub\\datasets--QuotaClimat--frugalaichallenge-text-train. Caching files will still work but in a degraded version that might require more space on your disk. This warning can be disabled by setting the `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For more details, see https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations.\n",
103
- "To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development\n",
104
- " warnings.warn(message)\n"
105
- ]
106
- },
107
- {
108
- "data": {
109
- "application/vnd.jupyter.widget-view+json": {
110
- "model_id": "5b68d43359eb429395da8be7d4b15556",
111
- "version_major": 2,
112
- "version_minor": 0
113
- },
114
- "text/plain": [
115
- "train.parquet: 0%| | 0.00/1.21M [00:00<?, ?B/s]"
116
- ]
117
- },
118
- "metadata": {},
119
- "output_type": "display_data"
120
- },
121
- {
122
- "data": {
123
- "application/vnd.jupyter.widget-view+json": {
124
- "model_id": "140a304773914e9db8f698eabeb40298",
125
- "version_major": 2,
126
- "version_minor": 0
127
- },
128
- "text/plain": [
129
- "Generating train split: 0%| | 0/6091 [00:00<?, ? examples/s]"
130
- ]
131
- },
132
- "metadata": {},
133
- "output_type": "display_data"
134
- },
135
- {
136
- "data": {
137
- "application/vnd.jupyter.widget-view+json": {
138
- "model_id": "6d04e8ab1906400e8e0029949dc523a5",
139
- "version_major": 2,
140
- "version_minor": 0
141
- },
142
- "text/plain": [
143
- "Map: 0%| | 0/6091 [00:00<?, ? examples/s]"
144
- ]
145
- },
146
- "metadata": {},
147
- "output_type": "display_data"
148
- }
149
- ],
150
- "source": [
151
- "request = TextEvaluationRequest()\n",
152
- "\n",
153
- "# Load and prepare the dataset\n",
154
- "dataset = load_dataset(request.dataset_name)\n",
155
- "\n",
156
- "# Convert string labels to integers\n",
157
- "dataset = dataset.map(lambda x: {\"label\": LABEL_MAPPING[x[\"label\"]]})\n",
158
- "\n",
159
- "# Split dataset\n",
160
- "train_test = dataset[\"train\"].train_test_split(test_size=request.test_size, seed=request.test_seed)\n",
161
- "test_dataset = train_test[\"test\"]"
162
- ]
163
- },
164
- {
165
- "cell_type": "markdown",
166
- "metadata": {},
167
- "source": [
168
- "## Random Baseline"
169
- ]
170
- },
171
- {
172
- "cell_type": "code",
173
- "execution_count": 5,
174
- "metadata": {},
175
- "outputs": [],
176
- "source": [
177
- "# Start tracking emissions\n",
178
- "tracker.start()\n",
179
- "tracker.start_task(\"inference\")"
180
- ]
181
- },
182
- {
183
- "cell_type": "code",
184
- "execution_count": 6,
185
- "metadata": {},
186
- "outputs": [
187
- {
188
- "data": {
189
- "text/plain": [
190
- "[1,\n",
191
- " 7,\n",
192
- " 6,\n",
193
- " 6,\n",
194
- " 2,\n",
195
- " 0,\n",
196
- " 1,\n",
197
- " 7,\n",
198
- " 3,\n",
199
- " 6,\n",
200
- " 6,\n",
201
- " 3,\n",
202
- " 6,\n",
203
- " 6,\n",
204
- " 5,\n",
205
- " 0,\n",
206
- " 2,\n",
207
- " 6,\n",
208
- " 2,\n",
209
- " 6,\n",
210
- " 5,\n",
211
- " 4,\n",
212
- " 1,\n",
213
- " 3,\n",
214
- " 6,\n",
215
- " 4,\n",
216
- " 2,\n",
217
- " 1,\n",
218
- " 4,\n",
219
- " 0,\n",
220
- " 3,\n",
221
- " 4,\n",
222
- " 1,\n",
223
- " 5,\n",
224
- " 5,\n",
225
- " 1,\n",
226
- " 2,\n",
227
- " 7,\n",
228
- " 6,\n",
229
- " 1,\n",
230
- " 3,\n",
231
- " 1,\n",
232
- " 7,\n",
233
- " 7,\n",
234
- " 0,\n",
235
- " 0,\n",
236
- " 3,\n",
237
- " 3,\n",
238
- " 3,\n",
239
- " 4,\n",
240
- " 1,\n",
241
- " 4,\n",
242
- " 4,\n",
243
- " 1,\n",
244
- " 4,\n",
245
- " 5,\n",
246
- " 6,\n",
247
- " 1,\n",
248
- " 2,\n",
249
- " 2,\n",
250
- " 2,\n",
251
- " 5,\n",
252
- " 2,\n",
253
- " 7,\n",
254
- " 2,\n",
255
- " 7,\n",
256
- " 7,\n",
257
- " 6,\n",
258
- " 4,\n",
259
- " 2,\n",
260
- " 0,\n",
261
- " 1,\n",
262
- " 6,\n",
263
- " 3,\n",
264
- " 2,\n",
265
- " 5,\n",
266
- " 5,\n",
267
- " 2,\n",
268
- " 0,\n",
269
- " 7,\n",
270
- " 0,\n",
271
- " 1,\n",
272
- " 5,\n",
273
- " 5,\n",
274
- " 7,\n",
275
- " 4,\n",
276
- " 6,\n",
277
- " 7,\n",
278
- " 1,\n",
279
- " 7,\n",
280
- " 1,\n",
281
- " 0,\n",
282
- " 3,\n",
283
- " 4,\n",
284
- " 2,\n",
285
- " 5,\n",
286
- " 3,\n",
287
- " 3,\n",
288
- " 3,\n",
289
- " 2,\n",
290
- " 2,\n",
291
- " 1,\n",
292
- " 0,\n",
293
- " 4,\n",
294
- " 5,\n",
295
- " 7,\n",
296
- " 0,\n",
297
- " 3,\n",
298
- " 1,\n",
299
- " 4,\n",
300
- " 6,\n",
301
- " 0,\n",
302
- " 7,\n",
303
- " 1,\n",
304
- " 1,\n",
305
- " 2,\n",
306
- " 2,\n",
307
- " 4,\n",
308
- " 0,\n",
309
- " 4,\n",
310
- " 3,\n",
311
- " 4,\n",
312
- " 4,\n",
313
- " 2,\n",
314
- " 2,\n",
315
- " 3,\n",
316
- " 3,\n",
317
- " 7,\n",
318
- " 4,\n",
319
- " 7,\n",
320
- " 6,\n",
321
- " 4,\n",
322
- " 5,\n",
323
- " 4,\n",
324
- " 3,\n",
325
- " 6,\n",
326
- " 0,\n",
327
- " 4,\n",
328
- " 0,\n",
329
- " 1,\n",
330
- " 3,\n",
331
- " 6,\n",
332
- " 7,\n",
333
- " 3,\n",
334
- " 3,\n",
335
- " 0,\n",
336
- " 1,\n",
337
- " 2,\n",
338
- " 4,\n",
339
- " 4,\n",
340
- " 3,\n",
341
- " 1,\n",
342
- " 2,\n",
343
- " 4,\n",
344
- " 3,\n",
345
- " 0,\n",
346
- " 5,\n",
347
- " 3,\n",
348
- " 6,\n",
349
- " 3,\n",
350
- " 6,\n",
351
- " 1,\n",
352
- " 3,\n",
353
- " 4,\n",
354
- " 5,\n",
355
- " 4,\n",
356
- " 0,\n",
357
- " 7,\n",
358
- " 3,\n",
359
- " 6,\n",
360
- " 7,\n",
361
- " 4,\n",
362
- " 4,\n",
363
- " 5,\n",
364
- " 3,\n",
365
- " 1,\n",
366
- " 7,\n",
367
- " 4,\n",
368
- " 1,\n",
369
- " 0,\n",
370
- " 3,\n",
371
- " 0,\n",
372
- " 5,\n",
373
- " 3,\n",
374
- " 6,\n",
375
- " 3,\n",
376
- " 0,\n",
377
- " 7,\n",
378
- " 2,\n",
379
- " 0,\n",
380
- " 4,\n",
381
- " 1,\n",
382
- " 2,\n",
383
- " 6,\n",
384
- " 3,\n",
385
- " 4,\n",
386
- " 4,\n",
387
- " 5,\n",
388
- " 1,\n",
389
- " 5,\n",
390
- " 4,\n",
391
- " 0,\n",
392
- " 1,\n",
393
- " 7,\n",
394
- " 3,\n",
395
- " 6,\n",
396
- " 0,\n",
397
- " 7,\n",
398
- " 4,\n",
399
- " 6,\n",
400
- " 3,\n",
401
- " 0,\n",
402
- " 0,\n",
403
- " 4,\n",
404
- " 6,\n",
405
- " 6,\n",
406
- " 4,\n",
407
- " 0,\n",
408
- " 5,\n",
409
- " 7,\n",
410
- " 5,\n",
411
- " 1,\n",
412
- " 3,\n",
413
- " 6,\n",
414
- " 2,\n",
415
- " 3,\n",
416
- " 2,\n",
417
- " 4,\n",
418
- " 5,\n",
419
- " 1,\n",
420
- " 5,\n",
421
- " 0,\n",
422
- " 3,\n",
423
- " 3,\n",
424
- " 0,\n",
425
- " 0,\n",
426
- " 6,\n",
427
- " 6,\n",
428
- " 2,\n",
429
- " 0,\n",
430
- " 7,\n",
431
- " 4,\n",
432
- " 5,\n",
433
- " 7,\n",
434
- " 1,\n",
435
- " 0,\n",
436
- " 4,\n",
437
- " 5,\n",
438
- " 1,\n",
439
- " 7,\n",
440
- " 0,\n",
441
- " 7,\n",
442
- " 2,\n",
443
- " 6,\n",
444
- " 1,\n",
445
- " 3,\n",
446
- " 5,\n",
447
- " 5,\n",
448
- " 6,\n",
449
- " 5,\n",
450
- " 4,\n",
451
- " 3,\n",
452
- " 7,\n",
453
- " 4,\n",
454
- " 3,\n",
455
- " 5,\n",
456
- " 5,\n",
457
- " 7,\n",
458
- " 2,\n",
459
- " 6,\n",
460
- " 1,\n",
461
- " 5,\n",
462
- " 0,\n",
463
- " 3,\n",
464
- " 4,\n",
465
- " 2,\n",
466
- " 3,\n",
467
- " 7,\n",
468
- " 0,\n",
469
- " 1,\n",
470
- " 7,\n",
471
- " 6,\n",
472
- " 7,\n",
473
- " 7,\n",
474
- " 5,\n",
475
- " 6,\n",
476
- " 3,\n",
477
- " 2,\n",
478
- " 3,\n",
479
- " 0,\n",
480
- " 4,\n",
481
- " 3,\n",
482
- " 5,\n",
483
- " 6,\n",
484
- " 0,\n",
485
- " 0,\n",
486
- " 6,\n",
487
- " 6,\n",
488
- " 1,\n",
489
- " 4,\n",
490
- " 0,\n",
491
- " 4,\n",
492
- " 2,\n",
493
- " 7,\n",
494
- " 5,\n",
495
- " 7,\n",
496
- " 6,\n",
497
- " 3,\n",
498
- " 5,\n",
499
- " 6,\n",
500
- " 0,\n",
501
- " 4,\n",
502
- " 5,\n",
503
- " 6,\n",
504
- " 1,\n",
505
- " 2,\n",
506
- " 1,\n",
507
- " 5,\n",
508
- " 3,\n",
509
- " 0,\n",
510
- " 3,\n",
511
- " 7,\n",
512
- " 1,\n",
513
- " 0,\n",
514
- " 7,\n",
515
- " 0,\n",
516
- " 1,\n",
517
- " 0,\n",
518
- " 4,\n",
519
- " 1,\n",
520
- " 1,\n",
521
- " 0,\n",
522
- " 7,\n",
523
- " 1,\n",
524
- " 0,\n",
525
- " 7,\n",
526
- " 6,\n",
527
- " 2,\n",
528
- " 3,\n",
529
- " 7,\n",
530
- " 4,\n",
531
- " 3,\n",
532
- " 4,\n",
533
- " 3,\n",
534
- " 3,\n",
535
- " 2,\n",
536
- " 5,\n",
537
- " 1,\n",
538
- " 5,\n",
539
- " 1,\n",
540
- " 7,\n",
541
- " 3,\n",
542
- " 2,\n",
543
- " 6,\n",
544
- " 4,\n",
545
- " 4,\n",
546
- " 1,\n",
547
- " 2,\n",
548
- " 6,\n",
549
- " 7,\n",
550
- " 2,\n",
551
- " 7,\n",
552
- " 1,\n",
553
- " 3,\n",
554
- " 5,\n",
555
- " 2,\n",
556
- " 6,\n",
557
- " 4,\n",
558
- " 6,\n",
559
- " 7,\n",
560
- " 0,\n",
561
- " 5,\n",
562
- " 1,\n",
563
- " 6,\n",
564
- " 5,\n",
565
- " 3,\n",
566
- " 6,\n",
567
- " 5,\n",
568
- " 4,\n",
569
- " 7,\n",
570
- " 6,\n",
571
- " 5,\n",
572
- " 4,\n",
573
- " 3,\n",
574
- " 0,\n",
575
- " 0,\n",
576
- " 1,\n",
577
- " 7,\n",
578
- " 7,\n",
579
- " 6,\n",
580
- " 1,\n",
581
- " 4,\n",
582
- " 5,\n",
583
- " 6,\n",
584
- " 1,\n",
585
- " 5,\n",
586
- " 1,\n",
587
- " 2,\n",
588
- " 6,\n",
589
- " 2,\n",
590
- " 6,\n",
591
- " 0,\n",
592
- " 2,\n",
593
- " 1,\n",
594
- " 5,\n",
595
- " 5,\n",
596
- " 1,\n",
597
- " 7,\n",
598
- " 0,\n",
599
- " 5,\n",
600
- " 5,\n",
601
- " 1,\n",
602
- " 7,\n",
603
- " 7,\n",
604
- " 2,\n",
605
- " 1,\n",
606
- " 0,\n",
607
- " 1,\n",
608
- " 0,\n",
609
- " 5,\n",
610
- " 4,\n",
611
- " 2,\n",
612
- " 7,\n",
613
- " 4,\n",
614
- " 3,\n",
615
- " 6,\n",
616
- " 7,\n",
617
- " 5,\n",
618
- " 1,\n",
619
- " 0,\n",
620
- " 7,\n",
621
- " 2,\n",
622
- " 1,\n",
623
- " 2,\n",
624
- " 3,\n",
625
- " 1,\n",
626
- " 0,\n",
627
- " 3,\n",
628
- " 2,\n",
629
- " 6,\n",
630
- " 0,\n",
631
- " 5,\n",
632
- " 4,\n",
633
- " 7,\n",
634
- " 1,\n",
635
- " 1,\n",
636
- " 0,\n",
637
- " 7,\n",
638
- " 0,\n",
639
- " 6,\n",
640
- " 7,\n",
641
- " 6,\n",
642
- " 1,\n",
643
- " 5,\n",
644
- " 5,\n",
645
- " 7,\n",
646
- " 6,\n",
647
- " 1,\n",
648
- " 7,\n",
649
- " 6,\n",
650
- " 5,\n",
651
- " 4,\n",
652
- " 1,\n",
653
- " 4,\n",
654
- " 7,\n",
655
- " 5,\n",
656
- " 4,\n",
657
- " 0,\n",
658
- " 0,\n",
659
- " 7,\n",
660
- " 0,\n",
661
- " 0,\n",
662
- " 3,\n",
663
- " 6,\n",
664
- " 2,\n",
665
- " 5,\n",
666
- " 3,\n",
667
- " 0,\n",
668
- " 3,\n",
669
- " 6,\n",
670
- " 5,\n",
671
- " 7,\n",
672
- " 2,\n",
673
- " 6,\n",
674
- " 7,\n",
675
- " 5,\n",
676
- " 2,\n",
677
- " 3,\n",
678
- " 6,\n",
679
- " 7,\n",
680
- " 7,\n",
681
- " 7,\n",
682
- " 6,\n",
683
- " 1,\n",
684
- " 7,\n",
685
- " 4,\n",
686
- " 2,\n",
687
- " 7,\n",
688
- " 5,\n",
689
- " 4,\n",
690
- " 1,\n",
691
- " 2,\n",
692
- " 3,\n",
693
- " 7,\n",
694
- " 0,\n",
695
- " 2,\n",
696
- " 7,\n",
697
- " 6,\n",
698
- " 1,\n",
699
- " 4,\n",
700
- " 0,\n",
701
- " 6,\n",
702
- " 3,\n",
703
- " 1,\n",
704
- " 0,\n",
705
- " 3,\n",
706
- " 4,\n",
707
- " 7,\n",
708
- " 7,\n",
709
- " 4,\n",
710
- " 2,\n",
711
- " 1,\n",
712
- " 0,\n",
713
- " 5,\n",
714
- " 1,\n",
715
- " 7,\n",
716
- " 4,\n",
717
- " 6,\n",
718
- " 7,\n",
719
- " 7,\n",
720
- " 3,\n",
721
- " 4,\n",
722
- " 3,\n",
723
- " 5,\n",
724
- " 4,\n",
725
- " 4,\n",
726
- " 5,\n",
727
- " 0,\n",
728
- " 1,\n",
729
- " 3,\n",
730
- " 7,\n",
731
- " 5,\n",
732
- " 4,\n",
733
- " 7,\n",
734
- " 3,\n",
735
- " 3,\n",
736
- " 3,\n",
737
- " 5,\n",
738
- " 3,\n",
739
- " 3,\n",
740
- " 4,\n",
741
- " 0,\n",
742
- " 1,\n",
743
- " 7,\n",
744
- " 4,\n",
745
- " 7,\n",
746
- " 7,\n",
747
- " 5,\n",
748
- " 0,\n",
749
- " 0,\n",
750
- " 5,\n",
751
- " 2,\n",
752
- " 6,\n",
753
- " 2,\n",
754
- " 6,\n",
755
- " 7,\n",
756
- " 6,\n",
757
- " 5,\n",
758
- " 7,\n",
759
- " 5,\n",
760
- " 7,\n",
761
- " 1,\n",
762
- " 6,\n",
763
- " 6,\n",
764
- " 0,\n",
765
- " 4,\n",
766
- " 7,\n",
767
- " 3,\n",
768
- " 0,\n",
769
- " 0,\n",
770
- " 2,\n",
771
- " 5,\n",
772
- " 2,\n",
773
- " 3,\n",
774
- " 7,\n",
775
- " 1,\n",
776
- " 0,\n",
777
- " 3,\n",
778
- " 0,\n",
779
- " 0,\n",
780
- " 3,\n",
781
- " 3,\n",
782
- " 7,\n",
783
- " 3,\n",
784
- " 0,\n",
785
- " 1,\n",
786
- " 1,\n",
787
- " 6,\n",
788
- " 0,\n",
789
- " 0,\n",
790
- " 5,\n",
791
- " 0,\n",
792
- " 3,\n",
793
- " 4,\n",
794
- " 6,\n",
795
- " 7,\n",
796
- " 4,\n",
797
- " 0,\n",
798
- " 4,\n",
799
- " 4,\n",
800
- " 5,\n",
801
- " 4,\n",
802
- " 4,\n",
803
- " 3,\n",
804
- " 6,\n",
805
- " 5,\n",
806
- " 2,\n",
807
- " 0,\n",
808
- " 6,\n",
809
- " 0,\n",
810
- " 6,\n",
811
- " 4,\n",
812
- " 3,\n",
813
- " 5,\n",
814
- " 7,\n",
815
- " 7,\n",
816
- " 5,\n",
817
- " 5,\n",
818
- " 1,\n",
819
- " 5,\n",
820
- " 2,\n",
821
- " 7,\n",
822
- " 7,\n",
823
- " 6,\n",
824
- " 6,\n",
825
- " 7,\n",
826
- " 6,\n",
827
- " 5,\n",
828
- " 2,\n",
829
- " 4,\n",
830
- " 0,\n",
831
- " 4,\n",
832
- " 4,\n",
833
- " 7,\n",
834
- " 5,\n",
835
- " 2,\n",
836
- " 7,\n",
837
- " 0,\n",
838
- " 6,\n",
839
- " 0,\n",
840
- " 2,\n",
841
- " 6,\n",
842
- " 6,\n",
843
- " 2,\n",
844
- " 3,\n",
845
- " 0,\n",
846
- " 5,\n",
847
- " 0,\n",
848
- " 5,\n",
849
- " 7,\n",
850
- " 2,\n",
851
- " 7,\n",
852
- " 4,\n",
853
- " 7,\n",
854
- " 4,\n",
855
- " 0,\n",
856
- " 7,\n",
857
- " 1,\n",
858
- " 4,\n",
859
- " 5,\n",
860
- " 0,\n",
861
- " 5,\n",
862
- " 5,\n",
863
- " 2,\n",
864
- " 0,\n",
865
- " 2,\n",
866
- " 5,\n",
867
- " 5,\n",
868
- " 6,\n",
869
- " 3,\n",
870
- " 4,\n",
871
- " 1,\n",
872
- " 7,\n",
873
- " 7,\n",
874
- " 2,\n",
875
- " 3,\n",
876
- " 2,\n",
877
- " 5,\n",
878
- " 0,\n",
879
- " 7,\n",
880
- " 2,\n",
881
- " 3,\n",
882
- " 7,\n",
883
- " 2,\n",
884
- " 4,\n",
885
- " 0,\n",
886
- " 5,\n",
887
- " 7,\n",
888
- " 3,\n",
889
- " 6,\n",
890
- " 7,\n",
891
- " 6,\n",
892
- " 4,\n",
893
- " 3,\n",
894
- " 6,\n",
895
- " 5,\n",
896
- " 4,\n",
897
- " 0,\n",
898
- " 3,\n",
899
- " 4,\n",
900
- " 3,\n",
901
- " 5,\n",
902
- " 2,\n",
903
- " 4,\n",
904
- " 0,\n",
905
- " 3,\n",
906
- " 6,\n",
907
- " 1,\n",
908
- " 3,\n",
909
- " 1,\n",
910
- " 4,\n",
911
- " 3,\n",
912
- " 3,\n",
913
- " 3,\n",
914
- " 0,\n",
915
- " 7,\n",
916
- " 6,\n",
917
- " 2,\n",
918
- " 4,\n",
919
- " 6,\n",
920
- " 5,\n",
921
- " 4,\n",
922
- " 1,\n",
923
- " 7,\n",
924
- " 6,\n",
925
- " 1,\n",
926
- " 4,\n",
927
- " 3,\n",
928
- " 0,\n",
929
- " 7,\n",
930
- " 3,\n",
931
- " 1,\n",
932
- " 2,\n",
933
- " 1,\n",
934
- " 6,\n",
935
- " 4,\n",
936
- " 7,\n",
937
- " 1,\n",
938
- " 7,\n",
939
- " 1,\n",
940
- " 5,\n",
941
- " 1,\n",
942
- " 6,\n",
943
- " 3,\n",
944
- " 0,\n",
945
- " 2,\n",
946
- " 6,\n",
947
- " 7,\n",
948
- " 7,\n",
949
- " 0,\n",
950
- " 1,\n",
951
- " 4,\n",
952
- " 0,\n",
953
- " 4,\n",
954
- " 5,\n",
955
- " 3,\n",
956
- " 6,\n",
957
- " 2,\n",
958
- " 3,\n",
959
- " 4,\n",
960
- " 1,\n",
961
- " 6,\n",
962
- " 2,\n",
963
- " 4,\n",
964
- " 4,\n",
965
- " 6,\n",
966
- " 4,\n",
967
- " 5,\n",
968
- " 7,\n",
969
- " 1,\n",
970
- " 7,\n",
971
- " 7,\n",
972
- " 4,\n",
973
- " 7,\n",
974
- " 4,\n",
975
- " 3,\n",
976
- " 3,\n",
977
- " 6,\n",
978
- " 1,\n",
979
- " 2,\n",
980
- " 0,\n",
981
- " 0,\n",
982
- " 0,\n",
983
- " 2,\n",
984
- " 5,\n",
985
- " 6,\n",
986
- " 5,\n",
987
- " 7,\n",
988
- " 5,\n",
989
- " 7,\n",
990
- " 1,\n",
991
- " 1,\n",
992
- " 2,\n",
993
- " 1,\n",
994
- " 6,\n",
995
- " 5,\n",
996
- " 7,\n",
997
- " 0,\n",
998
- " 0,\n",
999
- " 5,\n",
1000
- " 5,\n",
1001
- " 0,\n",
1002
- " 3,\n",
1003
- " 7,\n",
1004
- " 5,\n",
1005
- " 2,\n",
1006
- " 5,\n",
1007
- " 4,\n",
1008
- " 2,\n",
1009
- " 3,\n",
1010
- " 6,\n",
1011
- " 2,\n",
1012
- " 3,\n",
1013
- " 6,\n",
1014
- " 0,\n",
1015
- " 0,\n",
1016
- " 2,\n",
1017
- " 6,\n",
1018
- " 0,\n",
1019
- " 1,\n",
1020
- " 3,\n",
1021
- " 3,\n",
1022
- " 6,\n",
1023
- " 4,\n",
1024
- " 6,\n",
1025
- " 4,\n",
1026
- " 6,\n",
1027
- " 0,\n",
1028
- " 0,\n",
1029
- " 2,\n",
1030
- " 3,\n",
1031
- " 6,\n",
1032
- " 2,\n",
1033
- " 2,\n",
1034
- " 6,\n",
1035
- " 6,\n",
1036
- " 2,\n",
1037
- " 4,\n",
1038
- " 3,\n",
1039
- " 3,\n",
1040
- " 6,\n",
1041
- " 7,\n",
1042
- " 7,\n",
1043
- " 1,\n",
1044
- " 1,\n",
1045
- " 7,\n",
1046
- " 7,\n",
1047
- " 6,\n",
1048
- " 1,\n",
1049
- " 7,\n",
1050
- " 0,\n",
1051
- " 0,\n",
1052
- " 2,\n",
1053
- " 4,\n",
1054
- " 2,\n",
1055
- " 2,\n",
1056
- " 3,\n",
1057
- " 0,\n",
1058
- " 1,\n",
1059
- " 4,\n",
1060
- " 0,\n",
1061
- " 4,\n",
1062
- " 6,\n",
1063
- " 5,\n",
1064
- " 3,\n",
1065
- " 2,\n",
1066
- " 3,\n",
1067
- " 2,\n",
1068
- " 3,\n",
1069
- " 6,\n",
1070
- " 2,\n",
1071
- " 1,\n",
1072
- " 4,\n",
1073
- " 7,\n",
1074
- " 6,\n",
1075
- " 4,\n",
1076
- " 5,\n",
1077
- " 6,\n",
1078
- " 7,\n",
1079
- " 7,\n",
1080
- " 2,\n",
1081
- " 0,\n",
1082
- " 5,\n",
1083
- " 5,\n",
1084
- " 0,\n",
1085
- " 3,\n",
1086
- " 6,\n",
1087
- " 6,\n",
1088
- " 5,\n",
1089
- " 4,\n",
1090
- " 4,\n",
1091
- " 7,\n",
1092
- " 0,\n",
1093
- " 5,\n",
1094
- " 1,\n",
1095
- " 7,\n",
1096
- " 0,\n",
1097
- " 3,\n",
1098
- " 1,\n",
1099
- " 7,\n",
1100
- " 0,\n",
1101
- " 1,\n",
1102
- " 4,\n",
1103
- " 7,\n",
1104
- " 5,\n",
1105
- " 0,\n",
1106
- " 4,\n",
1107
- " 0,\n",
1108
- " 0,\n",
1109
- " 1,\n",
1110
- " 0,\n",
1111
- " 6,\n",
1112
- " 4,\n",
1113
- " 0,\n",
1114
- " 5,\n",
1115
- " 4,\n",
1116
- " 6,\n",
1117
- " 6,\n",
1118
- " 7,\n",
1119
- " 2,\n",
1120
- " 6,\n",
1121
- " 2,\n",
1122
- " 6,\n",
1123
- " 0,\n",
1124
- " 3,\n",
1125
- " 2,\n",
1126
- " 2,\n",
1127
- " 1,\n",
1128
- " 5,\n",
1129
- " 4,\n",
1130
- " 7,\n",
1131
- " 6,\n",
1132
- " 6,\n",
1133
- " 2,\n",
1134
- " 5,\n",
1135
- " 5,\n",
1136
- " 5,\n",
1137
- " 0,\n",
1138
- " 3,\n",
1139
- " 5,\n",
1140
- " 4,\n",
1141
- " 5,\n",
1142
- " 7,\n",
1143
- " 5,\n",
1144
- " 0,\n",
1145
- " 5,\n",
1146
- " 0,\n",
1147
- " 0,\n",
1148
- " 2,\n",
1149
- " 0,\n",
1150
- " 2,\n",
1151
- " 1,\n",
1152
- " 0,\n",
1153
- " 2,\n",
1154
- " 4,\n",
1155
- " 3,\n",
1156
- " 4,\n",
1157
- " 1,\n",
1158
- " 7,\n",
1159
- " 2,\n",
1160
- " 1,\n",
1161
- " 0,\n",
1162
- " 3,\n",
1163
- " 0,\n",
1164
- " 3,\n",
1165
- " 1,\n",
1166
- " 1,\n",
1167
- " 0,\n",
1168
- " 5,\n",
1169
- " 3,\n",
1170
- " 1,\n",
1171
- " 2,\n",
1172
- " 5,\n",
1173
- " 6,\n",
1174
- " 7,\n",
1175
- " 6,\n",
1176
- " 7,\n",
1177
- " 0,\n",
1178
- " 2,\n",
1179
- " 6,\n",
1180
- " 3,\n",
1181
- " 1,\n",
1182
- " 5,\n",
1183
- " 4,\n",
1184
- " 2,\n",
1185
- " 4,\n",
1186
- " 6,\n",
1187
- " 5,\n",
1188
- " 2,\n",
1189
- " 7,\n",
1190
- " ...]"
1191
- ]
1192
- },
1193
- "execution_count": 6,
1194
- "metadata": {},
1195
- "output_type": "execute_result"
1196
- }
1197
- ],
1198
- "source": [
1199
- "\n",
1200
- "#--------------------------------------------------------------------------------------------\n",
1201
- "# YOUR MODEL INFERENCE CODE HERE\n",
1202
- "# Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.\n",
1203
- "#-------------------------------------------------------------------------------------------- \n",
1204
- "\n",
1205
- "# Make random predictions (placeholder for actual model inference)\n",
1206
- "true_labels = test_dataset[\"label\"]\n",
1207
- "predictions = [random.randint(0, 7) for _ in range(len(true_labels))]\n",
1208
- "\n",
1209
- "predictions\n",
1210
- "\n",
1211
- "#--------------------------------------------------------------------------------------------\n",
1212
- "# YOUR MODEL INFERENCE STOPS HERE\n",
1213
- "#-------------------------------------------------------------------------------------------- "
1214
- ]
1215
- },
1216
- {
1217
- "cell_type": "code",
1218
- "execution_count": 8,
1219
- "metadata": {},
1220
- "outputs": [
1221
- {
1222
- "name": "stderr",
1223
- "output_type": "stream",
1224
- "text": [
1225
- "[codecarbon WARNING @ 19:53:32] Background scheduler didn't run for a long period (47s), results might be inaccurate\n",
1226
- "[codecarbon INFO @ 19:53:32] Energy consumed for RAM : 0.000156 kWh. RAM Power : 11.755242347717285 W\n",
1227
- "[codecarbon INFO @ 19:53:32] Delta energy consumed for CPU with constant : 0.000564 kWh, power : 42.5 W\n",
1228
- "[codecarbon INFO @ 19:53:32] Energy consumed for All CPU : 0.000564 kWh\n",
1229
- "[codecarbon INFO @ 19:53:32] 0.000720 kWh of electricity used since the beginning.\n"
1230
- ]
1231
- },
1232
- {
1233
- "data": {
1234
- "text/plain": [
1235
- "EmissionsData(timestamp='2025-01-21T19:53:32', project_name='codecarbon', run_id='908f2e7e-4bb2-4991-a0f6-56bf8d7eda21', experiment_id='5b0fa12a-3dd7-45bb-9766-cc326314d9f1', duration=47.736408500000834, emissions=4.032368007471064e-05, emissions_rate=8.444466886328872e-07, cpu_power=42.5, gpu_power=0.0, ram_power=11.755242347717285, cpu_energy=0.0005636615353475565, gpu_energy=0, ram_energy=0.00015590305493261682, energy_consumed=0.0007195645902801733, country_name='France', country_iso_code='FRA', region='île-de-france', cloud_provider='', cloud_region='', os='Windows-11-10.0.22631-SP0', python_version='3.12.7', codecarbon_version='3.0.0_rc0', cpu_count=12, cpu_model='13th Gen Intel(R) Core(TM) i7-1365U', gpu_count=None, gpu_model=None, longitude=2.3494, latitude=48.8558, ram_total_size=31.347312927246094, tracking_mode='machine', on_cloud='N', pue=1.0)"
1236
- ]
1237
- },
1238
- "execution_count": 8,
1239
- "metadata": {},
1240
- "output_type": "execute_result"
1241
- }
1242
- ],
1243
- "source": [
1244
- "# Stop tracking emissions\n",
1245
- "emissions_data = tracker.stop_task()\n",
1246
- "emissions_data"
1247
- ]
1248
- },
1249
- {
1250
- "cell_type": "code",
1251
- "execution_count": 9,
1252
- "metadata": {},
1253
- "outputs": [
1254
- {
1255
- "data": {
1256
- "text/plain": [
1257
- "0.10090237899917966"
1258
- ]
1259
- },
1260
- "execution_count": 9,
1261
- "metadata": {},
1262
- "output_type": "execute_result"
1263
- }
1264
- ],
1265
- "source": [
1266
- "# Calculate accuracy\n",
1267
- "accuracy = accuracy_score(true_labels, predictions)\n",
1268
- "accuracy"
1269
- ]
1270
- },
1271
- {
1272
- "cell_type": "code",
1273
- "execution_count": 10,
1274
- "metadata": {},
1275
- "outputs": [
1276
- {
1277
- "data": {
1278
- "text/plain": [
1279
- "{'submission_timestamp': '2025-01-21T19:53:46.639165',\n",
1280
- " 'accuracy': 0.10090237899917966,\n",
1281
- " 'energy_consumed_wh': 0.7195645902801733,\n",
1282
- " 'emissions_gco2eq': 0.040323680074710634,\n",
1283
- " 'emissions_data': {'run_id': '908f2e7e-4bb2-4991-a0f6-56bf8d7eda21',\n",
1284
- " 'duration': 47.736408500000834,\n",
1285
- " 'emissions': 4.032368007471064e-05,\n",
1286
- " 'emissions_rate': 8.444466886328872e-07,\n",
1287
- " 'cpu_power': 42.5,\n",
1288
- " 'gpu_power': 0.0,\n",
1289
- " 'ram_power': 11.755242347717285,\n",
1290
- " 'cpu_energy': 0.0005636615353475565,\n",
1291
- " 'gpu_energy': 0,\n",
1292
- " 'ram_energy': 0.00015590305493261682,\n",
1293
- " 'energy_consumed': 0.0007195645902801733,\n",
1294
- " 'country_name': 'France',\n",
1295
- " 'country_iso_code': 'FRA',\n",
1296
- " 'region': 'île-de-france',\n",
1297
- " 'cloud_provider': '',\n",
1298
- " 'cloud_region': '',\n",
1299
- " 'os': 'Windows-11-10.0.22631-SP0',\n",
1300
- " 'python_version': '3.12.7',\n",
1301
- " 'codecarbon_version': '3.0.0_rc0',\n",
1302
- " 'cpu_count': 12,\n",
1303
- " 'cpu_model': '13th Gen Intel(R) Core(TM) i7-1365U',\n",
1304
- " 'gpu_count': None,\n",
1305
- " 'gpu_model': None,\n",
1306
- " 'ram_total_size': 31.347312927246094,\n",
1307
- " 'tracking_mode': 'machine',\n",
1308
- " 'on_cloud': 'N',\n",
1309
- " 'pue': 1.0},\n",
1310
- " 'dataset_config': {'dataset_name': 'QuotaClimat/frugalaichallenge-text-train',\n",
1311
- " 'test_size': 0.2,\n",
1312
- " 'test_seed': 42}}"
1313
- ]
1314
- },
1315
- "execution_count": 10,
1316
- "metadata": {},
1317
- "output_type": "execute_result"
1318
- }
1319
- ],
1320
- "source": [
1321
- "# Prepare results dictionary\n",
1322
- "results = {\n",
1323
- " \"submission_timestamp\": datetime.now().isoformat(),\n",
1324
- " \"accuracy\": float(accuracy),\n",
1325
- " \"energy_consumed_wh\": emissions_data.energy_consumed * 1000,\n",
1326
- " \"emissions_gco2eq\": emissions_data.emissions * 1000,\n",
1327
- " \"emissions_data\": clean_emissions_data(emissions_data),\n",
1328
- " \"dataset_config\": {\n",
1329
- " \"dataset_name\": request.dataset_name,\n",
1330
- " \"test_size\": request.test_size,\n",
1331
- " \"test_seed\": request.test_seed\n",
1332
- " }\n",
1333
- "}\n",
1334
- "\n",
1335
- "results"
1336
- ]
1337
- },
1338
- {
1339
- "cell_type": "markdown",
1340
- "metadata": {},
1341
- "source": [
1342
- "## Development of the model"
1343
- ]
1344
- },
1345
- {
1346
- "cell_type": "code",
1347
- "execution_count": 11,
1348
- "metadata": {},
1349
- "outputs": [
1350
- {
1351
- "data": {
1352
- "application/vnd.jupyter.widget-view+json": {
1353
- "model_id": "90f50ab19698484489f36976745efad3",
1354
- "version_major": 2,
1355
- "version_minor": 0
1356
- },
1357
- "text/plain": [
1358
- "config.json: 0%| | 0.00/1.15k [00:00<?, ?B/s]"
1359
- ]
1360
- },
1361
- "metadata": {},
1362
- "output_type": "display_data"
1363
- },
1364
- {
1365
- "name": "stderr",
1366
- "output_type": "stream",
1367
- "text": [
1368
- "c:\\Users\\theo.alvesdacosta\\AppData\\Local\\anaconda3\\Lib\\site-packages\\huggingface_hub\\file_download.py:139: UserWarning: `huggingface_hub` cache-system uses symlinks by default to efficiently store duplicated files but your machine does not support them in C:\\Users\\theo.alvesdacosta\\.cache\\huggingface\\hub\\models--facebook--bart-large-mnli. Caching files will still work but in a degraded version that might require more space on your disk. This warning can be disabled by setting the `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For more details, see https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations.\n",
1369
- "To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development\n",
1370
- " warnings.warn(message)\n"
1371
- ]
1372
- },
1373
- {
1374
- "data": {
1375
- "application/vnd.jupyter.widget-view+json": {
1376
- "model_id": "6e3974d8ff284603821f7beca9bd353d",
1377
- "version_major": 2,
1378
- "version_minor": 0
1379
- },
1380
- "text/plain": [
1381
- "model.safetensors: 0%| | 0.00/1.63G [00:00<?, ?B/s]"
1382
- ]
1383
- },
1384
- "metadata": {},
1385
- "output_type": "display_data"
1386
- },
1387
- {
1388
- "data": {
1389
- "application/vnd.jupyter.widget-view+json": {
1390
- "model_id": "bc29cb379c644b00b1bdf61d5426d99d",
1391
- "version_major": 2,
1392
- "version_minor": 0
1393
- },
1394
- "text/plain": [
1395
- "tokenizer_config.json: 0%| | 0.00/26.0 [00:00<?, ?B/s]"
1396
- ]
1397
- },
1398
- "metadata": {},
1399
- "output_type": "display_data"
1400
- },
1401
- {
1402
- "data": {
1403
- "application/vnd.jupyter.widget-view+json": {
1404
- "model_id": "635503cf819747c9a83f22aa4f2f11db",
1405
- "version_major": 2,
1406
- "version_minor": 0
1407
- },
1408
- "text/plain": [
1409
- "vocab.json: 0%| | 0.00/899k [00:00<?, ?B/s]"
1410
- ]
1411
- },
1412
- "metadata": {},
1413
- "output_type": "display_data"
1414
- },
1415
- {
1416
- "data": {
1417
- "application/vnd.jupyter.widget-view+json": {
1418
- "model_id": "3a5f53e451e8483ca7c33f42245abd13",
1419
- "version_major": 2,
1420
- "version_minor": 0
1421
- },
1422
- "text/plain": [
1423
- "merges.txt: 0%| | 0.00/456k [00:00<?, ?B/s]"
1424
- ]
1425
- },
1426
- "metadata": {},
1427
- "output_type": "display_data"
1428
- },
1429
- {
1430
- "data": {
1431
- "application/vnd.jupyter.widget-view+json": {
1432
- "model_id": "84f922d1b68a4a0faa5e920d004efca0",
1433
- "version_major": 2,
1434
- "version_minor": 0
1435
- },
1436
- "text/plain": [
1437
- "tokenizer.json: 0%| | 0.00/1.36M [00:00<?, ?B/s]"
1438
- ]
1439
- },
1440
- "metadata": {},
1441
- "output_type": "display_data"
1442
- },
1443
- {
1444
- "name": "stderr",
1445
- "output_type": "stream",
1446
- "text": [
1447
- "Device set to use cpu\n"
1448
- ]
1449
- }
1450
- ],
1451
- "source": [
1452
- "from transformers import pipeline\n",
1453
- "classifier = pipeline(\"zero-shot-classification\",\n",
1454
- " model=\"facebook/bart-large-mnli\")\n"
1455
- ]
1456
- },
1457
- {
1458
- "cell_type": "code",
1459
- "execution_count": 14,
1460
- "metadata": {},
1461
- "outputs": [],
1462
- "source": [
1463
- "sequence_to_classify = \"one day I will see the world\"\n",
1464
- "\n",
1465
- "candidate_labels = [\n",
1466
- " \"Not related to climate change disinformation\",\n",
1467
- " \"Climate change is not real and not happening\",\n",
1468
- " \"Climate change is not human-induced\",\n",
1469
- " \"Climate change impacts are not that bad\",\n",
1470
- " \"Climate change solutions are harmful and unnecessary\",\n",
1471
- " \"Climate change science is unreliable\",\n",
1472
- " \"Climate change proponents are biased\",\n",
1473
- " \"Fossil fuels are needed to address climate change\"\n",
1474
- "]"
1475
- ]
1476
- },
1477
- {
1478
- "cell_type": "code",
1479
- "execution_count": 15,
1480
- "metadata": {},
1481
- "outputs": [
1482
- {
1483
- "data": {
1484
- "text/plain": [
1485
- "{'sequence': 'one day I will see the world',\n",
1486
- " 'labels': ['Fossil fuels are needed to address climate change',\n",
1487
- " 'Climate change science is unreliable',\n",
1488
- " 'Not related to climate change disinformation',\n",
1489
- " 'Climate change proponents are biased',\n",
1490
- " 'Climate change impacts are not that bad',\n",
1491
- " 'Climate change solutions are harmful and unnecessary',\n",
1492
- " 'Climate change is not human-induced',\n",
1493
- " 'Climate change is not real and not happening'],\n",
1494
- " 'scores': [0.16242119669914246,\n",
1495
- " 0.15683825314044952,\n",
1496
- " 0.1564282774925232,\n",
1497
- " 0.14603719115257263,\n",
1498
- " 0.12794046103954315,\n",
1499
- " 0.10180754214525223,\n",
1500
- " 0.0936085507273674,\n",
1501
- " 0.0549185685813427]}"
1502
- ]
1503
- },
1504
- "execution_count": 15,
1505
- "metadata": {},
1506
- "output_type": "execute_result"
1507
- }
1508
- ],
1509
- "source": [
1510
- "classifier(sequence_to_classify, candidate_labels)"
1511
- ]
1512
- },
1513
- {
1514
- "cell_type": "code",
1515
- "execution_count": 26,
1516
- "metadata": {},
1517
- "outputs": [
1518
- {
1519
- "name": "stderr",
1520
- "output_type": "stream",
1521
- "text": [
1522
- "[codecarbon WARNING @ 11:00:07] Already started tracking\n"
1523
- ]
1524
- },
1525
- {
1526
- "data": {
1527
- "application/vnd.jupyter.widget-view+json": {
1528
- "model_id": "5d66a13f76a4411d95b62d4a73012495",
1529
- "version_major": 2,
1530
- "version_minor": 0
1531
- },
1532
- "text/plain": [
1533
- "0it [00:00, ?it/s]"
1534
- ]
1535
- },
1536
- "metadata": {},
1537
- "output_type": "display_data"
1538
- },
1539
- {
1540
- "name": "stderr",
1541
- "output_type": "stream",
1542
- "text": [
1543
- "[codecarbon WARNING @ 11:05:57] Background scheduler didn't run for a long period (349s), results might be inaccurate\n",
1544
- "[codecarbon INFO @ 11:05:57] Energy consumed for RAM : 0.018069 kWh. RAM Power : 11.755242347717285 W\n",
1545
- "[codecarbon INFO @ 11:05:57] Delta energy consumed for CPU with constant : 0.004122 kWh, power : 42.5 W\n",
1546
- "[codecarbon INFO @ 11:05:57] Energy consumed for All CPU : 0.065327 kWh\n",
1547
- "[codecarbon INFO @ 11:05:57] 0.083395 kWh of electricity used since the beginning.\n"
1548
- ]
1549
- },
1550
- {
1551
- "data": {
1552
- "text/plain": [
1553
- "EmissionsData(timestamp='2025-01-22T11:05:57', project_name='codecarbon', run_id='908f2e7e-4bb2-4991-a0f6-56bf8d7eda21', experiment_id='5b0fa12a-3dd7-45bb-9766-cc326314d9f1', duration=349.19709450000664, emissions=0.0002949120266226386, emissions_rate=8.445461750018632e-07, cpu_power=42.5, gpu_power=0.0, ram_power=11.755242347717285, cpu_energy=0.004122396676597424, gpu_energy=0, ram_energy=0.0011402244733631148, energy_consumed=0.005262621149960539, country_name='France', country_iso_code='FRA', region='île-de-france', cloud_provider='', cloud_region='', os='Windows-11-10.0.22631-SP0', python_version='3.12.7', codecarbon_version='3.0.0_rc0', cpu_count=12, cpu_model='13th Gen Intel(R) Core(TM) i7-1365U', gpu_count=None, gpu_model=None, longitude=2.3494, latitude=48.8558, ram_total_size=31.347312927246094, tracking_mode='machine', on_cloud='N', pue=1.0)"
1554
- ]
1555
- },
1556
- "execution_count": 26,
1557
- "metadata": {},
1558
- "output_type": "execute_result"
1559
- }
1560
- ],
1561
- "source": [
1562
- "# Start tracking emissions\n",
1563
- "tracker.start()\n",
1564
- "tracker.start_task(\"inference\")\n",
1565
- "\n",
1566
- "from tqdm.auto import tqdm\n",
1567
- "predictions = []\n",
1568
- "\n",
1569
- "\n",
1570
- "\n",
1571
- "# Option 1: Simple loop approach\n",
1572
- "\n",
1573
- "for i, text in tqdm(enumerate(test_dataset[\"quote\"])):\n",
1574
- "\n",
1575
- " result = classifier(text, candidate_labels)\n",
1576
- "\n",
1577
- " # Get index of highest scoring label\n",
1578
- "\n",
1579
- " pred_label = candidate_labels.index(result[\"labels\"][0])\n",
1580
- "\n",
1581
- " predictions.append(pred_label)\n",
1582
- " if i == 100:\n",
1583
- " break\n",
1584
- "\n",
1585
- "\n",
1586
- "# Stop tracking emissions\n",
1587
- "emissions_data = tracker.stop_task()\n",
1588
- "emissions_data\n"
1589
- ]
1590
- },
1591
- {
1592
- "cell_type": "code",
1593
- "execution_count": 28,
1594
- "metadata": {},
1595
- "outputs": [
1596
- {
1597
- "data": {
1598
- "text/plain": [
1599
- "0.4"
1600
- ]
1601
- },
1602
- "execution_count": 28,
1603
- "metadata": {},
1604
- "output_type": "execute_result"
1605
- }
1606
- ],
1607
- "source": [
1608
- "# Calculate accuracy\n",
1609
- "accuracy = accuracy_score(true_labels[:100], predictions[:100])\n",
1610
- "accuracy"
1611
- ]
1612
- },
1613
- {
1614
- "cell_type": "code",
1615
- "execution_count": null,
1616
- "metadata": {},
1617
- "outputs": [],
1618
- "source": []
1619
- }
1620
- ],
1621
- "metadata": {
1622
- "kernelspec": {
1623
- "display_name": "base",
1624
- "language": "python",
1625
- "name": "python3"
1626
- },
1627
- "language_info": {
1628
- "codemirror_mode": {
1629
- "name": "ipython",
1630
- "version": 3
1631
- },
1632
- "file_extension": ".py",
1633
- "mimetype": "text/x-python",
1634
- "name": "python",
1635
- "nbconvert_exporter": "python",
1636
- "pygments_lexer": "ipython3",
1637
- "version": "3.12.7"
1638
- }
1639
- },
1640
- "nbformat": 4,
1641
- "nbformat_minor": 2
1642
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/load_data.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Load dataset and save locally in Ultralytics format"""
2
+
3
+ from datasets import load_dataset
4
+ import logging
5
+ import os
6
+ import pandas as pd
7
+
8
+
9
+ # Save in Ultralytics format
10
+ def save_ultralytics_format(dataset_split, split, IMAGE_DIR, LABEL_DIR):
11
+ """Save a dataset split into the Ultralytics format.
12
+
13
+ Args:
14
+ dataset_split: The dataset split (e.g., dataset["train"])
15
+ split: "train" or "val"
16
+ """
17
+ image_split_dir = os.path.join(IMAGE_DIR, split)
18
+ label_split_dir = os.path.join(LABEL_DIR, split)
19
+ if len(os.listdir(image_split_dir)) > 0 or len(os.listdir(label_split_dir)) > 0:
20
+ logging.info(f"{image_split_dir} or {label_split_dir} not empty: passing")
21
+ else:
22
+ for example in dataset_split:
23
+ # Save image to appropriate folder
24
+ image = example["image"] # PIL.Image.Image
25
+ image_name = example["image_name"] # Original file name
26
+ output_image_path = os.path.join(image_split_dir, image_name)
27
+ # Save image object to disk
28
+ image.save(output_image_path)
29
+
30
+ # Save label
31
+ annotations = example["annotations"]
32
+ label_name = image_name.replace(".jpg", ".txt").replace(".png", ".txt")
33
+ output_label_path = os.path.join(label_split_dir, label_name)
34
+ # Save label file
35
+ with open(output_label_path, "w") as label_file:
36
+ label_file.write(annotations)
37
+
38
+ logging.info(f"Dataset {split} split exported to Ultralytics format")
39
+
40
+
41
+ def create_df(ds, split_name, OUTPUT_DIR):
42
+ """Create dataframe from dataset"""
43
+ df = pd.DataFrame(
44
+ [[i.size[0], i.size[1], i.format, i.mode] for i in ds["image"]],
45
+ columns=["width", "height", "format", "mode"]
46
+ )
47
+ df["name"] = ds["image_name"]
48
+ df["uri"] = df['name'].apply(lambda x: os.path.join(OUTPUT_DIR, "images", split_name, x))
49
+ df["annotations"] = ds["annotations"]
50
+ df["partner"] = ds["partner"]
51
+ df["camera"] = ds["camera"]
52
+ df["timestamp"] = ds["date"]
53
+
54
+ return df
55
+
56
+
57
+ def load_data(OUTPUT_DIR, REPO_ID, DB_INFO_URI):
58
+ """Load data and save to local directory in Ultralytics format
59
+ """
60
+
61
+ # Check if data information already exists before eventually loading model
62
+ db_info_path = os.path.join(OUTPUT_DIR, DB_INFO_URI)
63
+ if os.path.exists(db_info_path):
64
+ df = pd.read_csv(db_info_path, index_col=0)
65
+ return df
66
+
67
+ # Create the directory structure
68
+ IMAGE_DIR = os.path.join(OUTPUT_DIR, "images")
69
+ LABEL_DIR = os.path.join(OUTPUT_DIR, "labels")
70
+ for split in ["train", "val"]:
71
+ os.makedirs(os.path.join(IMAGE_DIR, split), exist_ok=True)
72
+ os.makedirs(os.path.join(LABEL_DIR, split), exist_ok=True)
73
+
74
+ # Load the dataset from the Hugging Face Hub
75
+ dataset = load_dataset(REPO_ID)
76
+ logging.info("Dataset loaded in cache folder")
77
+
78
+ # Save train and validation splits
79
+ save_ultralytics_format(dataset["train"], "train", IMAGE_DIR, LABEL_DIR)
80
+ save_ultralytics_format(dataset["val"], "val", IMAGE_DIR, LABEL_DIR)
81
+
82
+ # Create global dataframe from splits
83
+ df_val = create_df(dataset["val"], "val", OUTPUT_DIR)
84
+ # Separate train to save memory
85
+ df_train_1 = create_df(dataset["train"][:10000], "train", OUTPUT_DIR)
86
+ df_train_2 = create_df(dataset["train"][10000:20000], "train", OUTPUT_DIR)
87
+ df_train_3 = create_df(dataset["train"][20000:], "train", OUTPUT_DIR)
88
+ # Save as one CSV
89
+ df = pd.concat([df_val, df_train_1, df_train_2, df_train_3], axis=0, ignore_index=True)
90
+ with open(db_info_path, "wb") as f:
91
+ df.to_csv(f)
92
+
93
+ return df
94
+
95
+
96
+ if __name__ == "__main__":
97
+ help()
src/models.py ADDED
@@ -0,0 +1,395 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Training utilities"""
2
+
3
+ # OS & env
4
+ import os
5
+ import logging
6
+ import datetime
7
+ import time
8
+
9
+ # DS, ML & DL
10
+ import numpy as np
11
+ from sklearn.metrics import confusion_matrix, classification_report
12
+ from keras.utils import image_dataset_from_directory
13
+ from keras.layers import RandomFlip, RandomRotation, RandomZoom
14
+ from keras.layers import GaussianNoise, RandomContrast, RandomBrightness
15
+ from tensorflow.keras.callbacks import Callback, TensorBoard
16
+ from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
17
+ import tensorflow as tf
18
+
19
+ # images & data viz
20
+ import matplotlib.pyplot as plt
21
+ import seaborn as sns
22
+
23
+
24
+ class ConditionalAugmentation(tf.keras.layers.Layer):
25
+ def __init__(self, rate=0.2, **kwargs):
26
+ super(ConditionalAugmentation, self).__init__(**kwargs)
27
+ self.rate = rate
28
+ self.flip = RandomFlip("horizontal")
29
+ self.rotation = RandomRotation(0.25)
30
+ self.zoom = RandomZoom(0.1)
31
+ self.noise = GaussianNoise(0.1)
32
+ self.contrast = RandomContrast(0.1)
33
+ self.brightness = RandomBrightness(0.1)
34
+
35
+ def call(self, inputs, training=None):
36
+ if training:
37
+ x = inputs
38
+ x = tf.cond(
39
+ tf.random.uniform(()) < self.rate, lambda: self.flip(x), lambda: x
40
+ )
41
+ x = tf.cond(
42
+ tf.random.uniform(()) < self.rate, lambda: self.rotation(x), lambda: x
43
+ )
44
+ x = tf.cond(
45
+ tf.random.uniform(()) < self.rate, lambda: self.zoom(x), lambda: x
46
+ )
47
+ x = tf.cond(
48
+ tf.random.uniform(()) < self.rate, lambda: self.noise(x), lambda: x
49
+ )
50
+ x = tf.cond(
51
+ tf.random.uniform(()) < self.rate, lambda: self.contrast(x), lambda: x
52
+ )
53
+ x = tf.cond(
54
+ tf.random.uniform(()) < self.rate, lambda: self.brightness(x), lambda: x
55
+ )
56
+ return x
57
+ return inputs
58
+
59
+
60
+ def evaluate_model(
61
+ model,
62
+ model_arch,
63
+ train_ds,
64
+ val_ds,
65
+ test_ds,
66
+ LOG_DIR,
67
+ CHKPT_DIR,
68
+ model_name="raw_model",
69
+ input_size=(224, 224),
70
+ batch_size=32,
71
+ n_epochs=10,
72
+ optimizer="adam",
73
+ loss="sparse_categorical_crossentropy",
74
+ metrics=["accuracy", "categorical_accuracy"],
75
+ ) -> tuple:
76
+ """Train, evaluate and log model from architecture and configuration
77
+
78
+ Return model, history and plot confusion matrix
79
+ """
80
+
81
+ if not os.path.exists(CHKPT_DIR):
82
+ os.makedirs(CHKPT_DIR)
83
+ chkpt_name = model_name + ".weights.h5"
84
+ chkpt_uri = os.path.join(CHKPT_DIR, chkpt_name)
85
+
86
+ model_config = f"""
87
+ | Config | Value |
88
+ |:---:|:---:|
89
+ | **model name** | {model_name} |
90
+ | **input size** | {input_size} |
91
+ | **batch size** | {batch_size} |
92
+ | **n epochs** | {n_epochs} |
93
+ | **optimizer** | {optimizer} |
94
+ | **loss** | {loss} |
95
+ | **metrics** | {metrics} |
96
+ | **best weights URI** | {chkpt_uri} |
97
+ """
98
+
99
+ # set log folder
100
+ log_dir = os.path.join(
101
+ LOG_DIR, model_name, datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
102
+ )
103
+
104
+ # COMPLIE
105
+ logging.info("⚙️ compiling")
106
+ model.compile(
107
+ optimizer=optimizer,
108
+ loss=loss,
109
+ metrics=metrics,
110
+ )
111
+
112
+ # CALLBACKS
113
+ logging.info("🛎️ declaring callbacks")
114
+
115
+ class TimingCallback(Callback):
116
+ def __init__(self):
117
+ self.logs = []
118
+ self.start_time = None
119
+
120
+ def on_train_begin(self, logs={}):
121
+ self.start_time = time.time()
122
+
123
+ # log time by epoch
124
+ def on_epoch_end(self, epoch, logs={}):
125
+ self.logs.append(time.time() - self.start_time)
126
+
127
+ # log total time
128
+ def on_train_end(self, logs={}):
129
+ self.tot_time_sec = time.time() - self.start_time
130
+ self.total_time = f"Total train time: {self.tot_time_sec // 60 :.0f}'{self.tot_time_sec % 60 :.0f}s"
131
+
132
+ timing_callback = TimingCallback()
133
+ checkpoint = ModelCheckpoint(
134
+ chkpt_uri,
135
+ save_best_only=True,
136
+ save_weights_only=True,
137
+ )
138
+ early_stopping = EarlyStopping(
139
+ monitor="val_loss", patience=6, restore_best_weights=True
140
+ )
141
+
142
+ tensorboard_callback = TensorBoard(
143
+ log_dir=log_dir,
144
+ histogram_freq=0, # do not save weights & biases (too much memory)
145
+ write_graph=True,
146
+ write_images=True,
147
+ update_freq="epoch",
148
+ )
149
+
150
+ # FIT
151
+ logging.info("💪 starting training")
152
+ model_history = model.fit(
153
+ train_ds,
154
+ validation_data=val_ds,
155
+ epochs=n_epochs,
156
+ callbacks=[timing_callback, checkpoint, early_stopping, tensorboard_callback],
157
+ )
158
+
159
+ # EVALUATE ON TEST DATASET
160
+ logging.info("🧐 evaluating model")
161
+ model.load_weights(chkpt_uri)
162
+ test_loss, *test_metrics = model.evaluate(test_ds)
163
+ predictions = model.predict(test_ds)
164
+
165
+ # CONFUSION MATRIX
166
+ logging.info("📈 plotting results")
167
+ # get true labels from test dataset
168
+ true_labels = np.concatenate([y for x, y in test_ds], axis=0)
169
+ # convert predictions to classes
170
+ predicted_classes = np.argmax(predictions, axis=1)
171
+ # compute confusion matrix
172
+ conf_matrix = confusion_matrix(true_labels, predicted_classes)
173
+ # precision & F1 score
174
+ report = classification_report(
175
+ true_labels,
176
+ predicted_classes,
177
+ target_names=test_ds.class_names,
178
+ )
179
+ report_dict = classification_report(
180
+ true_labels,
181
+ predicted_classes,
182
+ target_names=test_ds.class_names,
183
+ output_dict=True,
184
+ )
185
+ print(report)
186
+
187
+ # plot it
188
+ conf_mtx_plot = plt.figure(figsize=(6, 4))
189
+ sns.heatmap(
190
+ conf_matrix,
191
+ annot=True,
192
+ fmt="d",
193
+ cmap="Blues",
194
+ xticklabels=test_ds.class_names,
195
+ yticklabels=test_ds.class_names,
196
+ )
197
+ plt.suptitle(f"{model_name} model", color="blue", weight="bold")
198
+ plt.title(
199
+ f"acc. {report_dict['accuracy'] :.02f} - loss {test_loss :.02f} - {timing_callback.total_time}",
200
+ fontsize=10,
201
+ )
202
+ plt.xlabel("Predictions", color="red", weight="bold")
203
+ plt.ylabel("True labels", color="green", weight="bold")
204
+ plt.show()
205
+
206
+ # convert image for Tensorboard
207
+ conf_mtx_plot.canvas.draw()
208
+ image_array = np.array(conf_mtx_plot.canvas.renderer.buffer_rgba())
209
+ conf_mtx_plot_tf = tf.convert_to_tensor(image_array)
210
+ conf_mtx_plot_tf = tf.expand_dims(conf_mtx_plot_tf, 0)
211
+
212
+ plt.close()
213
+
214
+ # LOG IN TENSORBOARD
215
+ logging.info("📓 logging results")
216
+ file_writer = tf.summary.create_file_writer(log_dir + "/metrics")
217
+ with file_writer.as_default():
218
+ tf.summary.text("configuration", model_config, step=0)
219
+ tf.summary.text("architecture", model_arch, step=0)
220
+ tf.summary.text("total_training_time", timing_callback.total_time, step=0)
221
+ for i, time_per_epoch in enumerate(timing_callback.logs):
222
+ tf.summary.scalar("time_per_epoch", time_per_epoch, step=i + 1)
223
+ tf.summary.image("confusion_matrix", conf_mtx_plot_tf, step=0)
224
+
225
+ return model, model_history
226
+
227
+
228
+ def eval_pretrained_model(
229
+ model,
230
+ train_ds,
231
+ val_ds,
232
+ test_ds,
233
+ LOG_DIR,
234
+ CHKPT_DIR,
235
+ model_name="raw_model",
236
+ input_size=(224, 224),
237
+ batch_size=32,
238
+ n_epochs=10,
239
+ optimizer="adam",
240
+ loss="sparse_categorical_crossentropy",
241
+ metrics=["accuracy"],
242
+ ) -> tuple:
243
+ """Train, evaluate and log pre-trained model from architecture and configuration
244
+
245
+ Return model, history and plot confusion matrix
246
+ """
247
+
248
+ if not os.path.exists(CHKPT_DIR):
249
+ os.makedirs(CHKPT_DIR)
250
+ chkpt_name = model_name + ".weights.h5"
251
+ chkpt_uri = os.path.join(CHKPT_DIR, chkpt_name)
252
+
253
+ model_config = f"""
254
+ | Config | Value |
255
+ |:---:|:---:|
256
+ | **model name** | {model_name} |
257
+ | **input size** | {input_size} |
258
+ | **batch size** | {batch_size} |
259
+ | **n epochs** | {n_epochs} |
260
+ | **optimizer** | {optimizer} |
261
+ | **loss** | {loss} |
262
+ | **metrics** | {metrics} |
263
+ | **best weights URI** | {chkpt_uri} |
264
+ """
265
+
266
+ # set log folder
267
+ log_dir = os.path.join(
268
+ LOG_DIR, model_name, datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
269
+ )
270
+
271
+ # COMPLIE
272
+ logging.info("⚙️ compiling")
273
+ model.compile(
274
+ optimizer=optimizer,
275
+ loss=loss,
276
+ metrics=metrics,
277
+ )
278
+
279
+ # CALLBACKS
280
+ logging.info("🛎️ declaring callbacks")
281
+
282
+ class TimingCallback(Callback):
283
+ def __init__(self):
284
+ self.logs = []
285
+ self.start_time = None
286
+
287
+ def on_train_begin(self, logs={}):
288
+ self.start_time = time.time()
289
+
290
+ # log time by epoch
291
+ def on_epoch_end(self, epoch, logs={}):
292
+ self.logs.append(time.time() - self.start_time)
293
+
294
+ # log total time
295
+ def on_train_end(self, logs={}):
296
+ self.tot_time_sec = time.time() - self.start_time
297
+ self.total_time = f"Total train time: {self.tot_time_sec // 60 :.0f}'{self.tot_time_sec % 60 :.0f}s"
298
+
299
+ timing_callback = TimingCallback()
300
+ checkpoint = ModelCheckpoint(
301
+ chkpt_uri,
302
+ save_best_only=True,
303
+ save_weights_only=True,
304
+ )
305
+ early_stopping = EarlyStopping(
306
+ monitor="val_loss", patience=10, restore_best_weights=True
307
+ )
308
+
309
+ tensorboard_callback = TensorBoard(
310
+ log_dir=log_dir,
311
+ histogram_freq=0, # do not save weights & biases (too much memory)
312
+ write_graph=True,
313
+ write_images=True,
314
+ update_freq="epoch",
315
+ )
316
+
317
+ # FIT
318
+ logging.info("💪 starting training")
319
+ model_history = model.fit(
320
+ train_ds,
321
+ validation_data=val_ds,
322
+ epochs=n_epochs,
323
+ callbacks=[timing_callback, checkpoint, early_stopping, tensorboard_callback],
324
+ )
325
+
326
+ # EVALUATE ON TEST DATASET
327
+ logging.info("🧐 evaluating model")
328
+ model.load_weights(chkpt_uri)
329
+ test_loss, *test_metrics = model.evaluate(test_ds)
330
+ predictions = model.predict(test_ds)
331
+
332
+ # CONFUSION MATRIX
333
+ logging.info("📈 plotting results")
334
+ # get true labels from test dataset
335
+ true_labels = np.concatenate([y for x, y in test_ds], axis=0)
336
+ # convert predictions to classes
337
+ predicted_classes = np.argmax(predictions, axis=1)
338
+ # compute confusion matrix
339
+ conf_matrix = confusion_matrix(true_labels, predicted_classes)
340
+ # precision & F1 score
341
+ report = classification_report(
342
+ true_labels,
343
+ predicted_classes,
344
+ target_names=test_ds.class_names,
345
+ )
346
+ report_dict = classification_report(
347
+ true_labels,
348
+ predicted_classes,
349
+ target_names=test_ds.class_names,
350
+ output_dict=True,
351
+ )
352
+ print(report)
353
+
354
+ # plot it
355
+ conf_mtx_plot = plt.figure(figsize=(6, 4))
356
+ sns.heatmap(
357
+ conf_matrix,
358
+ annot=True,
359
+ fmt="d",
360
+ cmap="Blues",
361
+ xticklabels=test_ds.class_names,
362
+ yticklabels=test_ds.class_names,
363
+ )
364
+ plt.suptitle(f"{model_name} model", color="blue", weight="bold")
365
+ plt.title(
366
+ f"acc. {report_dict['accuracy'] :.02f} - loss {test_loss :.02f} - {timing_callback.total_time}",
367
+ fontsize=10,
368
+ )
369
+ plt.xlabel("Predictions", color="red", weight="bold")
370
+ plt.ylabel("True labels", color="green", weight="bold")
371
+ plt.show()
372
+
373
+ # convert image for Tensorboard
374
+ conf_mtx_plot.canvas.draw()
375
+ image_array = np.array(conf_mtx_plot.canvas.renderer.buffer_rgba())
376
+ conf_mtx_plot_tf = tf.convert_to_tensor(image_array)
377
+ conf_mtx_plot_tf = tf.expand_dims(conf_mtx_plot_tf, 0)
378
+
379
+ plt.close()
380
+
381
+ # LOG IN TENSORBOARD
382
+ logging.info("📓 logging results")
383
+ file_writer = tf.summary.create_file_writer(log_dir + "/metrics")
384
+ with file_writer.as_default():
385
+ tf.summary.text("configuration", model_config, step=0)
386
+ tf.summary.text("total_training_time", timing_callback.total_time, step=0)
387
+ for i, time_per_epoch in enumerate(timing_callback.logs):
388
+ tf.summary.scalar("time_per_epoch", time_per_epoch, step=i + 1)
389
+ tf.summary.image("confusion_matrix", conf_mtx_plot_tf, step=0)
390
+
391
+ return model, model_history
392
+
393
+
394
+ if __name__ == "__main__":
395
+ help()
tasks/utils/load_data.py DELETED
@@ -1,59 +0,0 @@
1
- """Load dataset and save locally in Ultralytics format"""
2
-
3
- from datasets import load_dataset
4
- import os
5
-
6
-
7
- def load_data(REPO_ID, OUTPUT_DIR):
8
- """Load data and save to local directory"""
9
-
10
- IMAGE_DIR = os.path.join(OUTPUT_DIR, "images")
11
- LABEL_DIR = os.path.join(OUTPUT_DIR, "labels")
12
-
13
- # 🚧 CHECK IF FOLDER EXISTS
14
- # 🚧 CHECK IF FOLDER EXISTS
15
- # 🚧 CHECK IF FOLDER EXISTS
16
- # 🚧 CHECK IF FOLDER EXISTS
17
-
18
- # Create the directory structure
19
- for split in ["train", "val"]:
20
- os.makedirs(os.path.join(IMAGE_DIR, split), exist_ok=True)
21
- os.makedirs(os.path.join(LABEL_DIR, split), exist_ok=True)
22
-
23
- # Load the dataset from the Hugging Face Hub
24
- dataset = load_dataset(REPO_ID)
25
-
26
- # Save in Ultralytics format
27
- def save_ultralytics_format(dataset_split, split):
28
- """
29
- Save a dataset split into the Ultralytics format.
30
- Args:
31
- dataset_split: The dataset split (e.g., dataset["train"])
32
- split: "train" or "val"
33
- """
34
- for example in dataset_split:
35
- # Save the image to the appropriate folder
36
- image = example["image"] # PIL.Image.Image
37
- image_name = example["image_name"] # Original file name
38
- output_image_path = os.path.join(IMAGE_DIR, split, image_name)
39
-
40
- # Save the image object to disk
41
- image.save(output_image_path)
42
-
43
- # Save label
44
- annotations = example["annotations"]
45
- label_name = image_name.replace(".jpg", ".txt").replace(".png", ".txt")
46
- output_label_path = os.path.join(LABEL_DIR, split, label_name)
47
-
48
- with open(output_label_path, "w") as label_file:
49
- label_file.write(annotations)
50
-
51
- # Save train and validation splits
52
- save_ultralytics_format(dataset["train"], "train")
53
- save_ultralytics_format(dataset["val"], "val")
54
-
55
- print("Dataset exported to Ultralytics format.")
56
-
57
-
58
- if __name__ == "__main__":
59
- help()