bilbo991 commited on
Commit
04bf5bd
·
1 Parent(s): dc115e7

End of training

Browse files
all_results.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
  "epoch": 3.0,
3
- "eval_loss": 1.8828033208847046,
4
- "eval_runtime": 235.0465,
5
- "eval_samples_per_second": 85.09,
6
- "eval_steps_per_second": 1.774,
7
- "train_loss": 0.6835632873535156,
8
- "train_runtime": 4876.546,
9
- "train_samples_per_second": 36.911,
10
- "train_steps_per_second": 0.769
11
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "eval_loss": 2.000882148742676,
4
+ "eval_runtime": 235.8424,
5
+ "eval_samples_per_second": 84.802,
6
+ "eval_steps_per_second": 1.768,
7
+ "train_loss": 0.27894207763671874,
8
+ "train_runtime": 4884.0553,
9
+ "train_samples_per_second": 36.855,
10
+ "train_steps_per_second": 0.768
11
  }
eval_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 3.0,
3
- "eval_loss": 1.8828033208847046,
4
- "eval_runtime": 235.0465,
5
- "eval_samples_per_second": 85.09,
6
- "eval_steps_per_second": 1.774
7
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "eval_loss": 2.000882148742676,
4
+ "eval_runtime": 235.8424,
5
+ "eval_samples_per_second": 84.802,
6
+ "eval_steps_per_second": 1.768
7
  }
runs/Jul30_21-32-32_cvrl-flynn-ws2/events.out.tfevents.1690772400.cvrl-flynn-ws2.5335.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cdfb2ee6851e95be317e28e33c99172b18799db66b6177583fccf9fe238b7ad
3
+ size 359
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 3.0,
3
- "train_loss": 0.6835632873535156,
4
- "train_runtime": 4876.546,
5
- "train_samples_per_second": 36.911,
6
- "train_steps_per_second": 0.769
7
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "train_loss": 0.27894207763671874,
4
+ "train_runtime": 4884.0553,
5
+ "train_samples_per_second": 36.855,
6
+ "train_steps_per_second": 0.768
7
  }
trainer_state.json CHANGED
@@ -10,53 +10,53 @@
10
  {
11
  "epoch": 0.4,
12
  "learning_rate": 4.3333333333333334e-05,
13
- "loss": 1.2685,
14
  "step": 500
15
  },
16
  {
17
  "epoch": 0.8,
18
  "learning_rate": 3.6666666666666666e-05,
19
- "loss": 1.1982,
20
  "step": 1000
21
  },
22
  {
23
  "epoch": 1.2,
24
  "learning_rate": 3e-05,
25
- "loss": 0.8293,
26
  "step": 1500
27
  },
28
  {
29
  "epoch": 1.6,
30
  "learning_rate": 2.3333333333333336e-05,
31
- "loss": 0.5996,
32
  "step": 2000
33
  },
34
  {
35
  "epoch": 2.0,
36
  "learning_rate": 1.6666666666666667e-05,
37
- "loss": 0.5457,
38
  "step": 2500
39
  },
40
  {
41
  "epoch": 2.4,
42
  "learning_rate": 1e-05,
43
- "loss": 0.2705,
44
  "step": 3000
45
  },
46
  {
47
  "epoch": 2.8,
48
  "learning_rate": 3.3333333333333333e-06,
49
- "loss": 0.2712,
50
  "step": 3500
51
  },
52
  {
53
  "epoch": 3.0,
54
  "step": 3750,
55
  "total_flos": 2.40331104e+16,
56
- "train_loss": 0.6835632873535156,
57
- "train_runtime": 4876.546,
58
- "train_samples_per_second": 36.911,
59
- "train_steps_per_second": 0.769
60
  }
61
  ],
62
  "max_steps": 3750,
 
10
  {
11
  "epoch": 0.4,
12
  "learning_rate": 4.3333333333333334e-05,
13
+ "loss": 0.4395,
14
  "step": 500
15
  },
16
  {
17
  "epoch": 0.8,
18
  "learning_rate": 3.6666666666666666e-05,
19
+ "loss": 0.4735,
20
  "step": 1000
21
  },
22
  {
23
  "epoch": 1.2,
24
  "learning_rate": 3e-05,
25
+ "loss": 0.3482,
26
  "step": 1500
27
  },
28
  {
29
  "epoch": 1.6,
30
  "learning_rate": 2.3333333333333336e-05,
31
+ "loss": 0.2636,
32
  "step": 2000
33
  },
34
  {
35
  "epoch": 2.0,
36
  "learning_rate": 1.6666666666666667e-05,
37
+ "loss": 0.2316,
38
  "step": 2500
39
  },
40
  {
41
  "epoch": 2.4,
42
  "learning_rate": 1e-05,
43
+ "loss": 0.1222,
44
  "step": 3000
45
  },
46
  {
47
  "epoch": 2.8,
48
  "learning_rate": 3.3333333333333333e-06,
49
+ "loss": 0.1307,
50
  "step": 3500
51
  },
52
  {
53
  "epoch": 3.0,
54
  "step": 3750,
55
  "total_flos": 2.40331104e+16,
56
+ "train_loss": 0.27894207763671874,
57
+ "train_runtime": 4884.0553,
58
+ "train_samples_per_second": 36.855,
59
+ "train_steps_per_second": 0.768
60
  }
61
  ],
62
  "max_steps": 3750,