peterbeamish commited on
Commit
9a423c5
·
1 Parent(s): c7896d3

End of training

Browse files
README.md CHANGED
@@ -14,8 +14,6 @@ should probably proofread and complete it, then remove this comment. -->
14
  # trained_model
15
 
16
  This model is a fine-tuned version of [google/flan-t5-large](https://huggingface.co/google/flan-t5-large) on an unknown dataset.
17
- It achieves the following results on the evaluation set:
18
- - Loss: 0.9780
19
 
20
  ## Model description
21
 
@@ -34,23 +32,21 @@ More information needed
34
  ### Training hyperparameters
35
 
36
  The following hyperparameters were used during training:
37
- - learning_rate: 0.0005811623641719214
38
- - train_batch_size: 8
39
  - eval_batch_size: 8
40
  - seed: 42
41
- - gradient_accumulation_steps: 7
42
- - total_train_batch_size: 56
43
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
44
  - lr_scheduler_type: linear
45
- - num_epochs: 2
46
 
47
  ### Training results
48
 
49
  | Training Loss | Epoch | Step | Validation Loss |
50
  |:-------------:|:-----:|:----:|:---------------:|
51
- | No log | 0.52 | 100 | 1.0140 |
52
- | No log | 1.05 | 200 | 0.9792 |
53
- | No log | 1.57 | 300 | 0.9780 |
54
 
55
 
56
  ### Framework versions
 
14
  # trained_model
15
 
16
  This model is a fine-tuned version of [google/flan-t5-large](https://huggingface.co/google/flan-t5-large) on an unknown dataset.
 
 
17
 
18
  ## Model description
19
 
 
32
  ### Training hyperparameters
33
 
34
  The following hyperparameters were used during training:
35
+ - learning_rate: 0.0006741100367675095
36
+ - train_batch_size: 52
37
  - eval_batch_size: 8
38
  - seed: 42
39
+ - gradient_accumulation_steps: 5
40
+ - total_train_batch_size: 260
41
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
42
  - lr_scheduler_type: linear
43
+ - num_epochs: 4
44
 
45
  ### Training results
46
 
47
  | Training Loss | Epoch | Step | Validation Loss |
48
  |:-------------:|:-----:|:----:|:---------------:|
49
+ | No log | 2.43 | 100 | 0.9711 |
 
 
50
 
51
 
52
  ### Framework versions
adapter_config.json CHANGED
@@ -16,8 +16,8 @@
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
19
- "q",
20
- "v"
21
  ],
22
  "task_type": "SEQ_2_SEQ_LM"
23
  }
 
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
19
+ "v",
20
+ "q"
21
  ],
22
  "task_type": "SEQ_2_SEQ_LM"
23
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d2554237ab2c5c5fcb57ac013a6288d5504accf79ebc68d638e29ad54e2bad2
3
  size 18915328
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d1b2057d531f2a5429fcc79d54a8884c543e3f0e7359a624ad94a407a665485
3
  size 18915328
runs/Nov06_02-21-32_288b75d43cd4/events.out.tfevents.1699237305.288b75d43cd4.1909.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b037751356eaea6c8c967fa225a104b9acef35af6d7a41205921ca7dc6b37409
3
+ size 5394
trainer_state.json CHANGED
@@ -1,52 +1,36 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.9970126960418222,
5
  "eval_steps": 100,
6
- "global_step": 382,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.52,
13
- "eval_loss": 1.0140411853790283,
14
- "eval_runtime": 1465.4694,
15
- "eval_samples_per_second": 7.31,
16
- "eval_steps_per_second": 0.914,
17
  "step": 100
18
  },
19
  {
20
- "epoch": 1.05,
21
- "eval_loss": 0.9791701436042786,
22
- "eval_runtime": 1462.2287,
23
- "eval_samples_per_second": 7.326,
24
- "eval_steps_per_second": 0.916,
25
- "step": 200
26
- },
27
- {
28
- "epoch": 1.57,
29
- "eval_loss": 0.9779573678970337,
30
- "eval_runtime": 1465.9156,
31
- "eval_samples_per_second": 7.308,
32
- "eval_steps_per_second": 0.914,
33
- "step": 300
34
- },
35
- {
36
- "epoch": 2.0,
37
- "step": 382,
38
- "total_flos": 4.961368405337702e+16,
39
- "train_loss": 0.8977117887966296,
40
- "train_runtime": 14787.7789,
41
- "train_samples_per_second": 1.449,
42
- "train_steps_per_second": 0.026
43
  }
44
  ],
45
  "logging_steps": 500,
46
- "max_steps": 382,
47
- "num_train_epochs": 2,
48
  "save_steps": 500,
49
- "total_flos": 4.961368405337702e+16,
50
  "trial_name": null,
51
  "trial_params": null
52
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.9805825242718447,
5
  "eval_steps": 100,
6
+ "global_step": 164,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 2.43,
13
+ "eval_loss": 0.9710711240768433,
14
+ "eval_runtime": 1487.3043,
15
+ "eval_samples_per_second": 7.203,
16
+ "eval_steps_per_second": 0.901,
17
  "step": 100
18
  },
19
  {
20
+ "epoch": 3.98,
21
+ "step": 164,
22
+ "total_flos": 9.889339416772608e+16,
23
+ "train_loss": 1.1787047967678164,
24
+ "train_runtime": 21689.2536,
25
+ "train_samples_per_second": 1.976,
26
+ "train_steps_per_second": 0.008
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  }
28
  ],
29
  "logging_steps": 500,
30
+ "max_steps": 164,
31
+ "num_train_epochs": 4,
32
  "save_steps": 500,
33
+ "total_flos": 9.889339416772608e+16,
34
  "trial_name": null,
35
  "trial_params": null
36
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78c9fe5062705c374b5b532489d2b6c07ba7a6c5eec5526fda5268bc5595c0a7
3
  size 4536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3afdb64a91e6b71d89a1d36421804cef1fa3e521341ea328400dcf79b3449517
3
  size 4536