mrferr3t commited on
Commit
401b217
·
verified ·
1 Parent(s): 4eeadd5

End of training

Browse files
Files changed (3) hide show
  1. README.md +19 -10
  2. adapter_model.bin +1 -1
  3. adapter_model.safetensors +1 -1
README.md CHANGED
@@ -3,9 +3,10 @@ library_name: peft
3
  license: apache-2.0
4
  base_model: Artples/L-MChat-7b
5
  tags:
 
6
  - generated_from_trainer
7
  model-index:
8
- - name: miner_id_24
9
  results: []
10
  ---
11
 
@@ -40,10 +41,10 @@ datasets:
40
  system_prompt: ''
41
  debug: null
42
  deepspeed: null
43
- early_stopping_patience:
44
  early_stopping_threshold: 1.0e-05
45
  eval_max_new_tokens: 128
46
- eval_steps:
47
  eval_strategy: null
48
  flash_attention: true
49
  fp16: null
@@ -52,7 +53,7 @@ fsdp_config: null
52
  gradient_accumulation_steps: 4
53
  gradient_checkpointing: false
54
  group_by_length: false
55
- hub_model_id:
56
  hub_repo: null
57
  hub_strategy: checkpoint
58
  hub_token: null
@@ -60,7 +61,7 @@ learning_rate: 0.0004
60
  load_in_4bit: false
61
  load_in_8bit: false
62
  local_rank: null
63
- logging_steps:
64
  lora_alpha: 16
65
  lora_dropout: 0.05
66
  lora_fan_in_fan_out: null
@@ -68,7 +69,7 @@ lora_model_dir: null
68
  lora_r: 8
69
  lora_target_linear: true
70
  lr_scheduler: cosine
71
- max_steps: 50
72
  micro_batch_size: 9
73
  mlflow_experiment_name: /tmp/159279c5560d1ca0_train_data.json
74
  model_type: AutoModelForCausalLM
@@ -79,7 +80,7 @@ pad_to_sequence_len: true
79
  resume_from_checkpoint: null
80
  s2_attention: null
81
  sample_packing: false
82
- save_steps:
83
  saves_per_epoch: 0
84
  sequence_len: 512
85
  special_tokens:
@@ -89,7 +90,7 @@ tf32: false
89
  tokenizer_type: AutoTokenizer
90
  train_on_inputs: false
91
  trust_remote_code: true
92
- val_set_size: 0.0
93
  wandb_entity: null
94
  wandb_mode: disabled
95
  wandb_name: 0da85ac8-626a-4340-8d64-a50f9017e723
@@ -104,9 +105,11 @@ xformers_attention: null
104
 
105
  </details><br>
106
 
107
- # miner_id_24
108
 
109
  This model is a fine-tuned version of [Artples/L-MChat-7b](https://huggingface.co/Artples/L-MChat-7b) on the None dataset.
 
 
110
 
111
  ## Model description
112
 
@@ -134,10 +137,16 @@ The following hyperparameters were used during training:
134
  - optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
135
  - lr_scheduler_type: cosine
136
  - lr_scheduler_warmup_steps: 100
137
- - training_steps: 50
138
 
139
  ### Training results
140
 
 
 
 
 
 
 
141
 
142
 
143
  ### Framework versions
 
3
  license: apache-2.0
4
  base_model: Artples/L-MChat-7b
5
  tags:
6
+ - axolotl
7
  - generated_from_trainer
8
  model-index:
9
+ - name: f356bf41-5032-425b-8da4-284285a6d4b3
10
  results: []
11
  ---
12
 
 
41
  system_prompt: ''
42
  debug: null
43
  deepspeed: null
44
+ early_stopping_patience: 2
45
  early_stopping_threshold: 1.0e-05
46
  eval_max_new_tokens: 128
47
+ eval_steps: 138
48
  eval_strategy: null
49
  flash_attention: true
50
  fp16: null
 
53
  gradient_accumulation_steps: 4
54
  gradient_checkpointing: false
55
  group_by_length: false
56
+ hub_model_id: mrferr3t/f356bf41-5032-425b-8da4-284285a6d4b3
57
  hub_repo: null
58
  hub_strategy: checkpoint
59
  hub_token: null
 
61
  load_in_4bit: false
62
  load_in_8bit: false
63
  local_rank: null
64
+ logging_steps: 138
65
  lora_alpha: 16
66
  lora_dropout: 0.05
67
  lora_fan_in_fan_out: null
 
69
  lora_r: 8
70
  lora_target_linear: true
71
  lr_scheduler: cosine
72
+ max_steps: null
73
  micro_batch_size: 9
74
  mlflow_experiment_name: /tmp/159279c5560d1ca0_train_data.json
75
  model_type: AutoModelForCausalLM
 
80
  resume_from_checkpoint: null
81
  s2_attention: null
82
  sample_packing: false
83
+ save_steps: 138
84
  saves_per_epoch: 0
85
  sequence_len: 512
86
  special_tokens:
 
90
  tokenizer_type: AutoTokenizer
91
  train_on_inputs: false
92
  trust_remote_code: true
93
+ val_set_size: .05000000
94
  wandb_entity: null
95
  wandb_mode: disabled
96
  wandb_name: 0da85ac8-626a-4340-8d64-a50f9017e723
 
105
 
106
  </details><br>
107
 
108
+ # f356bf41-5032-425b-8da4-284285a6d4b3
109
 
110
  This model is a fine-tuned version of [Artples/L-MChat-7b](https://huggingface.co/Artples/L-MChat-7b) on the None dataset.
111
+ It achieves the following results on the evaluation set:
112
+ - Loss: 0.8026
113
 
114
  ## Model description
115
 
 
137
  - optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
138
  - lr_scheduler_type: cosine
139
  - lr_scheduler_warmup_steps: 100
140
+ - num_epochs: 100
141
 
142
  ### Training results
143
 
144
+ | Training Loss | Epoch | Step | Validation Loss |
145
+ |:-------------:|:------:|:----:|:---------------:|
146
+ | No log | 0.0015 | 1 | 1.1178 |
147
+ | 3.1374 | 0.2108 | 138 | 0.7750 |
148
+ | 3.0967 | 0.4215 | 276 | 0.7873 |
149
+ | 3.1177 | 0.6323 | 414 | 0.8026 |
150
 
151
 
152
  ### Framework versions
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e2d1bfd31bcbb7c6ede8105802f3019ef9d52d6a57b5b200b45f6b1d6baa82ba
3
  size 84047370
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9990e0e744a6b31e416b1cb206f26ebc6d1304f61fa4266e1ac7e6a9a9a9ee0f
3
  size 84047370
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:59c37ad0118d4ba73506a07d36cc16d598775c6f4156fcf098513e676eae46d7
3
  size 83945296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc34008e5a29a99fc60ab96d896178d2b4f20a21f213d20b84d7205e7d1e51f4
3
  size 83945296