File size: 2,503 Bytes
9dd1449
87e0304
9dd1449
87e0304
9dd1449
87e0304
9dd1449
 
 
 
 
 
87e0304
9dd1449
87e0304
9dd1449
 
 
 
87e0304
9dd1449
87e0304
9dd1449
 
 
 
87e0304
9dd1449
87e0304
9dd1449
 
 
 
87e0304
9dd1449
87e0304
9dd1449
 
 
 
87e0304
9dd1449
87e0304
9dd1449
 
 
 
87e0304
9dd1449
87e0304
9dd1449
 
 
 
87e0304
9dd1449
87e0304
9dd1449
 
 
 
87e0304
9dd1449
87e0304
9dd1449
 
 
 
87e0304
9dd1449
87e0304
9dd1449
 
 
 
87e0304
9dd1449
87e0304
9dd1449
 
 
 
87e0304
 
 
 
 
 
9dd1449
 
 
 
 
 
 
 
87e0304
9dd1449
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
{
  "best_metric": 82.71868753015785,
  "best_model_checkpoint": "/scratch/p310333/whisper-small-dialect_all_seed168/checkpoint-250",
  "epoch": 0.03017137340091721,
  "eval_steps": 250,
  "global_step": 250,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.003017137340091721,
      "grad_norm": 20.100784301757812,
      "learning_rate": 5.000000000000001e-07,
      "loss": 1.2639,
      "step": 25
    },
    {
      "epoch": 0.006034274680183442,
      "grad_norm": 24.114206314086914,
      "learning_rate": 1.0000000000000002e-06,
      "loss": 1.383,
      "step": 50
    },
    {
      "epoch": 0.009051412020275163,
      "grad_norm": 18.229045867919922,
      "learning_rate": 1.5e-06,
      "loss": 1.2886,
      "step": 75
    },
    {
      "epoch": 0.012068549360366883,
      "grad_norm": 21.113380432128906,
      "learning_rate": 2.0000000000000003e-06,
      "loss": 1.2495,
      "step": 100
    },
    {
      "epoch": 0.015085686700458605,
      "grad_norm": 19.328229904174805,
      "learning_rate": 2.5e-06,
      "loss": 1.2043,
      "step": 125
    },
    {
      "epoch": 0.018102824040550327,
      "grad_norm": 25.50276756286621,
      "learning_rate": 3e-06,
      "loss": 1.273,
      "step": 150
    },
    {
      "epoch": 0.021119961380642045,
      "grad_norm": 18.037517547607422,
      "learning_rate": 3.5e-06,
      "loss": 1.2175,
      "step": 175
    },
    {
      "epoch": 0.024137098720733767,
      "grad_norm": 22.56538963317871,
      "learning_rate": 4.000000000000001e-06,
      "loss": 1.1856,
      "step": 200
    },
    {
      "epoch": 0.02715423606082549,
      "grad_norm": 16.57452392578125,
      "learning_rate": 4.5e-06,
      "loss": 1.1098,
      "step": 225
    },
    {
      "epoch": 0.03017137340091721,
      "grad_norm": 19.666919708251953,
      "learning_rate": 5e-06,
      "loss": 1.0496,
      "step": 250
    },
    {
      "epoch": 0.03017137340091721,
      "eval_cer": 53.72986812504118,
      "eval_loss": 1.3066076040267944,
      "eval_runtime": 3968.9492,
      "eval_samples_per_second": 4.175,
      "eval_steps_per_second": 0.522,
      "eval_wer": 82.71868753015785,
      "step": 250
    }
  ],
  "logging_steps": 25,
  "max_steps": 5000,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 250,
  "total_flos": 5.7717080064e+17,
  "train_batch_size": 8,
  "trial_name": null,
  "trial_params": null
}