TweedleDeepLearnings commited on
Commit
95190da
·
verified ·
1 Parent(s): 01ee1e1

Training in progress, step 4200, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:58c906a26353ae99b743ab79f39ef725d8d528e21eb6f076312c620de73b09fe
3
  size 661507488
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7366348163c99efbb02e5f01b2f61b07546f6102646b714e00ff1bd3f8d00e90
3
  size 661507488
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:63e4a7a2aa1e35d79ced510269f15008ac4f78e885198ea4c0cde962b6dd3150
3
  size 1304683322
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26b9405225c8ee9d4e508062d1d41005bfdbaab56152c534bf3aaddb846161fd
3
  size 1304683322
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:121f8b5e30b0918b00c74bb785e42917fabdb7ffd4ed90261735119342cdd43a
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2685d0dd924a34fc4ec9a9f3842eb7519a236c210aba111e993c2b054e96d853
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8f4b5fa4a528e2de46c6bbf01aa3d6b42175f89e8d5cd03589a1cc600d94a92c
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:308c9a4f60ecce2fc7b3db5fed8a2c00f3ca1deb19bfe053249bef8b7cd57ffb
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.348792552947998,
3
- "best_model_checkpoint": "./output/checkpoint-4050",
4
- "epoch": 0.1084279288926965,
5
  "eval_steps": 150,
6
- "global_step": 4050,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3058,6 +3058,119 @@
3058
  "eval_samples_per_second": 6.508,
3059
  "eval_steps_per_second": 6.508,
3060
  "step": 4050
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3061
  }
3062
  ],
3063
  "logging_steps": 10,
@@ -3077,7 +3190,7 @@
3077
  "attributes": {}
3078
  }
3079
  },
3080
- "total_flos": 9.180712254618778e+17,
3081
  "train_batch_size": 4,
3082
  "trial_name": null,
3083
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.3470451831817627,
3
+ "best_model_checkpoint": "./output/checkpoint-4200",
4
+ "epoch": 0.11244377811094453,
5
  "eval_steps": 150,
6
+ "global_step": 4200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3058
  "eval_samples_per_second": 6.508,
3059
  "eval_steps_per_second": 6.508,
3060
  "step": 4050
3061
+ },
3062
+ {
3063
+ "epoch": 0.10869565217391304,
3064
+ "grad_norm": 6.412740707397461,
3065
+ "learning_rate": 3.875884122974123e-06,
3066
+ "loss": 1.3756,
3067
+ "step": 4060
3068
+ },
3069
+ {
3070
+ "epoch": 0.10896337545512957,
3071
+ "grad_norm": 6.571822643280029,
3072
+ "learning_rate": 3.7963028584976805e-06,
3073
+ "loss": 1.3773,
3074
+ "step": 4070
3075
+ },
3076
+ {
3077
+ "epoch": 0.10923109873634611,
3078
+ "grad_norm": 6.47897481918335,
3079
+ "learning_rate": 3.717469876711713e-06,
3080
+ "loss": 1.3746,
3081
+ "step": 4080
3082
+ },
3083
+ {
3084
+ "epoch": 0.10949882201756264,
3085
+ "grad_norm": 6.563449382781982,
3086
+ "learning_rate": 3.6393884181313417e-06,
3087
+ "loss": 1.382,
3088
+ "step": 4090
3089
+ },
3090
+ {
3091
+ "epoch": 0.10976654529877918,
3092
+ "grad_norm": 6.455676078796387,
3093
+ "learning_rate": 3.562061692379507e-06,
3094
+ "loss": 1.3519,
3095
+ "step": 4100
3096
+ },
3097
+ {
3098
+ "epoch": 0.11003426857999572,
3099
+ "grad_norm": 5.957856178283691,
3100
+ "learning_rate": 3.4854928780550306e-06,
3101
+ "loss": 1.3711,
3102
+ "step": 4110
3103
+ },
3104
+ {
3105
+ "epoch": 0.11030199186121226,
3106
+ "grad_norm": 6.082734107971191,
3107
+ "learning_rate": 3.409685122601979e-06,
3108
+ "loss": 1.3038,
3109
+ "step": 4120
3110
+ },
3111
+ {
3112
+ "epoch": 0.11056971514242879,
3113
+ "grad_norm": 5.809603691101074,
3114
+ "learning_rate": 3.3346415421802494e-06,
3115
+ "loss": 1.3587,
3116
+ "step": 4130
3117
+ },
3118
+ {
3119
+ "epoch": 0.11083743842364532,
3120
+ "grad_norm": 6.081882476806641,
3121
+ "learning_rate": 3.26036522153751e-06,
3122
+ "loss": 1.3672,
3123
+ "step": 4140
3124
+ },
3125
+ {
3126
+ "epoch": 0.11110516170486186,
3127
+ "grad_norm": 5.788993835449219,
3128
+ "learning_rate": 3.186859213882386e-06,
3129
+ "loss": 1.3615,
3130
+ "step": 4150
3131
+ },
3132
+ {
3133
+ "epoch": 0.11137288498607839,
3134
+ "grad_norm": 5.722326755523682,
3135
+ "learning_rate": 3.114126540758946e-06,
3136
+ "loss": 1.2914,
3137
+ "step": 4160
3138
+ },
3139
+ {
3140
+ "epoch": 0.11164060826729492,
3141
+ "grad_norm": 6.233955383300781,
3142
+ "learning_rate": 3.042170191922509e-06,
3143
+ "loss": 1.3286,
3144
+ "step": 4170
3145
+ },
3146
+ {
3147
+ "epoch": 0.11190833154851146,
3148
+ "grad_norm": 6.276589393615723,
3149
+ "learning_rate": 2.9709931252167426e-06,
3150
+ "loss": 1.3943,
3151
+ "step": 4180
3152
+ },
3153
+ {
3154
+ "epoch": 0.11217605482972799,
3155
+ "grad_norm": 6.818645000457764,
3156
+ "learning_rate": 2.9005982664520734e-06,
3157
+ "loss": 1.3535,
3158
+ "step": 4190
3159
+ },
3160
+ {
3161
+ "epoch": 0.11244377811094453,
3162
+ "grad_norm": 6.53585147857666,
3163
+ "learning_rate": 2.830988509285433e-06,
3164
+ "loss": 1.3412,
3165
+ "step": 4200
3166
+ },
3167
+ {
3168
+ "epoch": 0.11244377811094453,
3169
+ "eval_loss": 1.3470451831817627,
3170
+ "eval_runtime": 76.7654,
3171
+ "eval_samples_per_second": 6.513,
3172
+ "eval_steps_per_second": 6.513,
3173
+ "step": 4200
3174
  }
3175
  ],
3176
  "logging_steps": 10,
 
3190
  "attributes": {}
3191
  }
3192
  },
3193
+ "total_flos": 9.521351998649088e+17,
3194
  "train_batch_size": 4,
3195
  "trial_name": null,
3196
  "trial_params": null