mrferr3t commited on
Commit
0426838
·
verified ·
1 Parent(s): 401b217

Training in progress, step 483

Browse files
README.md CHANGED
@@ -3,10 +3,9 @@ library_name: peft
3
  license: apache-2.0
4
  base_model: Artples/L-MChat-7b
5
  tags:
6
- - axolotl
7
  - generated_from_trainer
8
  model-index:
9
- - name: f356bf41-5032-425b-8da4-284285a6d4b3
10
  results: []
11
  ---
12
 
@@ -41,10 +40,10 @@ datasets:
41
  system_prompt: ''
42
  debug: null
43
  deepspeed: null
44
- early_stopping_patience: 2
45
  early_stopping_threshold: 1.0e-05
46
  eval_max_new_tokens: 128
47
- eval_steps: 138
48
  eval_strategy: null
49
  flash_attention: true
50
  fp16: null
@@ -53,7 +52,7 @@ fsdp_config: null
53
  gradient_accumulation_steps: 4
54
  gradient_checkpointing: false
55
  group_by_length: false
56
- hub_model_id: mrferr3t/f356bf41-5032-425b-8da4-284285a6d4b3
57
  hub_repo: null
58
  hub_strategy: checkpoint
59
  hub_token: null
@@ -61,7 +60,7 @@ learning_rate: 0.0004
61
  load_in_4bit: false
62
  load_in_8bit: false
63
  local_rank: null
64
- logging_steps: 138
65
  lora_alpha: 16
66
  lora_dropout: 0.05
67
  lora_fan_in_fan_out: null
@@ -69,7 +68,7 @@ lora_model_dir: null
69
  lora_r: 8
70
  lora_target_linear: true
71
  lr_scheduler: cosine
72
- max_steps: null
73
  micro_batch_size: 9
74
  mlflow_experiment_name: /tmp/159279c5560d1ca0_train_data.json
75
  model_type: AutoModelForCausalLM
@@ -80,7 +79,7 @@ pad_to_sequence_len: true
80
  resume_from_checkpoint: null
81
  s2_attention: null
82
  sample_packing: false
83
- save_steps: 138
84
  saves_per_epoch: 0
85
  sequence_len: 512
86
  special_tokens:
@@ -90,7 +89,7 @@ tf32: false
90
  tokenizer_type: AutoTokenizer
91
  train_on_inputs: false
92
  trust_remote_code: true
93
- val_set_size: .05000000
94
  wandb_entity: null
95
  wandb_mode: disabled
96
  wandb_name: 0da85ac8-626a-4340-8d64-a50f9017e723
@@ -105,11 +104,9 @@ xformers_attention: null
105
 
106
  </details><br>
107
 
108
- # f356bf41-5032-425b-8da4-284285a6d4b3
109
 
110
  This model is a fine-tuned version of [Artples/L-MChat-7b](https://huggingface.co/Artples/L-MChat-7b) on the None dataset.
111
- It achieves the following results on the evaluation set:
112
- - Loss: 0.8026
113
 
114
  ## Model description
115
 
@@ -137,16 +134,10 @@ The following hyperparameters were used during training:
137
  - optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
138
  - lr_scheduler_type: cosine
139
  - lr_scheduler_warmup_steps: 100
140
- - num_epochs: 100
141
 
142
  ### Training results
143
 
144
- | Training Loss | Epoch | Step | Validation Loss |
145
- |:-------------:|:------:|:----:|:---------------:|
146
- | No log | 0.0015 | 1 | 1.1178 |
147
- | 3.1374 | 0.2108 | 138 | 0.7750 |
148
- | 3.0967 | 0.4215 | 276 | 0.7873 |
149
- | 3.1177 | 0.6323 | 414 | 0.8026 |
150
 
151
 
152
  ### Framework versions
 
3
  license: apache-2.0
4
  base_model: Artples/L-MChat-7b
5
  tags:
 
6
  - generated_from_trainer
7
  model-index:
8
+ - name: miner_id_24
9
  results: []
10
  ---
11
 
 
40
  system_prompt: ''
41
  debug: null
42
  deepspeed: null
43
+ early_stopping_patience:
44
  early_stopping_threshold: 1.0e-05
45
  eval_max_new_tokens: 128
46
+ eval_steps:
47
  eval_strategy: null
48
  flash_attention: true
49
  fp16: null
 
52
  gradient_accumulation_steps: 4
53
  gradient_checkpointing: false
54
  group_by_length: false
55
+ hub_model_id:
56
  hub_repo: null
57
  hub_strategy: checkpoint
58
  hub_token: null
 
60
  load_in_4bit: false
61
  load_in_8bit: false
62
  local_rank: null
63
+ logging_steps:
64
  lora_alpha: 16
65
  lora_dropout: 0.05
66
  lora_fan_in_fan_out: null
 
68
  lora_r: 8
69
  lora_target_linear: true
70
  lr_scheduler: cosine
71
+ max_steps: 50
72
  micro_batch_size: 9
73
  mlflow_experiment_name: /tmp/159279c5560d1ca0_train_data.json
74
  model_type: AutoModelForCausalLM
 
79
  resume_from_checkpoint: null
80
  s2_attention: null
81
  sample_packing: false
82
+ save_steps:
83
  saves_per_epoch: 0
84
  sequence_len: 512
85
  special_tokens:
 
89
  tokenizer_type: AutoTokenizer
90
  train_on_inputs: false
91
  trust_remote_code: true
92
+ val_set_size: 0.0
93
  wandb_entity: null
94
  wandb_mode: disabled
95
  wandb_name: 0da85ac8-626a-4340-8d64-a50f9017e723
 
104
 
105
  </details><br>
106
 
107
+ # miner_id_24
108
 
109
  This model is a fine-tuned version of [Artples/L-MChat-7b](https://huggingface.co/Artples/L-MChat-7b) on the None dataset.
 
 
110
 
111
  ## Model description
112
 
 
134
  - optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
135
  - lr_scheduler_type: cosine
136
  - lr_scheduler_warmup_steps: 100
137
+ - training_steps: 50
138
 
139
  ### Training results
140
 
 
 
 
 
 
 
141
 
142
 
143
  ### Framework versions
adapter_config.json CHANGED
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "k_proj",
24
- "o_proj",
25
  "up_proj",
26
  "q_proj",
27
  "down_proj",
28
- "v_proj",
29
- "gate_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
+ "v_proj",
24
+ "gate_proj",
25
  "up_proj",
26
  "q_proj",
27
  "down_proj",
28
+ "o_proj",
29
+ "k_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9990e0e744a6b31e416b1cb206f26ebc6d1304f61fa4266e1ac7e6a9a9a9ee0f
3
  size 84047370
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a270d95a390cb4737825983ffdea47ca6fb7c14f85cfde4db6219c4c0b80bd76
3
  size 84047370
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc34008e5a29a99fc60ab96d896178d2b4f20a21f213d20b84d7205e7d1e51f4
3
  size 83945296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:825b5fcf901f4a8bf145e82e49be7ae3dc4d2e605a8df0750b726c39d0e0ddd7
3
  size 83945296
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c4912750e0525668697e67bfbdf8863ec73474c1ab1b2a41b7a08d6ec81c640
3
  size 6776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad2e424f0d3e85ad37a3506914e5792898a4d236d25069f96df22040323b28f0
3
  size 6776