hendrydong commited on
Commit
0981bb9
·
1 Parent(s): 70da7f6

Upload 9 files

Browse files
robin-7b/README.md CHANGED
@@ -1,20 +1,20 @@
1
  ---
2
- license: other
3
  tags:
4
  - generated_from_trainer
5
  datasets:
6
  - customized
7
  model-index:
8
- - name: xl_031_lora
9
  results: []
10
  ---
11
 
12
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
13
  should probably proofread and complete it, then remove this comment. -->
14
 
15
- # xl_031_lora
16
 
17
- This model is a fine-tuned version of [aleksickx/llama-7b-hf](https://huggingface.co/aleksickx/llama-7b-hf) on the customized dataset.
18
 
19
  ## Model description
20
 
@@ -33,19 +33,18 @@ More information needed
33
  ### Training hyperparameters
34
 
35
  The following hyperparameters were used during training:
36
- - learning_rate: 0.0008
37
  - train_batch_size: 4
38
  - eval_batch_size: 8
39
  - seed: 42
40
  - distributed_type: multi-GPU
41
- - num_devices: 8
42
- - gradient_accumulation_steps: 4
43
- - total_train_batch_size: 128
44
- - total_eval_batch_size: 64
45
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
46
  - lr_scheduler_type: cosine
47
  - lr_scheduler_warmup_ratio: 0.03
48
- - num_epochs: 10.0
49
 
50
  ### Training results
51
 
 
1
  ---
2
+ license: apache-2.0
3
  tags:
4
  - generated_from_trainer
5
  datasets:
6
  - customized
7
  model-index:
8
+ - name: h10
9
  results: []
10
  ---
11
 
12
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
13
  should probably proofread and complete it, then remove this comment. -->
14
 
15
+ # h10
16
 
17
+ This model is a fine-tuned version of [pinkmanlove/llama-7b-hf](https://huggingface.co/pinkmanlove/llama-7b-hf) on the customized dataset.
18
 
19
  ## Model description
20
 
 
33
  ### Training hyperparameters
34
 
35
  The following hyperparameters were used during training:
36
+ - learning_rate: 0.0001
37
  - train_batch_size: 4
38
  - eval_batch_size: 8
39
  - seed: 42
40
  - distributed_type: multi-GPU
41
+ - num_devices: 4
42
+ - total_train_batch_size: 16
43
+ - total_eval_batch_size: 32
 
44
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
  - lr_scheduler_type: cosine
46
  - lr_scheduler_warmup_ratio: 0.03
47
+ - num_epochs: 5.0
48
 
49
  ### Training results
50
 
robin-7b/adapter_config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "base_model_name_or_path": "aleksickx/llama-7b-hf",
3
  "bias": "none",
4
  "enable_lora": null,
5
  "fan_in_fan_out": false,
@@ -10,10 +10,12 @@
10
  "merge_weights": false,
11
  "modules_to_save": null,
12
  "peft_type": "LORA",
13
- "r": 32,
14
  "target_modules": [
15
  "q_proj",
16
- "v_proj"
 
 
17
  ],
18
  "task_type": "CAUSAL_LM"
19
  }
 
1
  {
2
+ "base_model_name_or_path": "pinkmanlove/llama-7b-hf",
3
  "bias": "none",
4
  "enable_lora": null,
5
  "fan_in_fan_out": false,
 
10
  "merge_weights": false,
11
  "modules_to_save": null,
12
  "peft_type": "LORA",
13
+ "r": 128,
14
  "target_modules": [
15
  "q_proj",
16
+ "k_proj",
17
+ "v_proj",
18
+ "o_proj"
19
  ],
20
  "task_type": "CAUSAL_LM"
21
  }
robin-7b/adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:320829da8b0ace9f5ada5155c6cb119b8e4c03d9c507c0103ee4765fd8c47d83
3
- size 33574781
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:250ed2611b7e1071a390509be29f05e25d5ecb2c703955b97b2a640ddd6ce337
3
+ size 268476157
robin-7b/all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 9.99,
3
- "train_loss": 0.8784972819876163,
4
- "train_runtime": 13393.5014,
5
- "train_samples": 72257,
6
- "train_samples_per_second": 53.949,
7
- "train_steps_per_second": 0.421
8
  }
 
1
  {
2
+ "epoch": 5.0,
3
+ "train_loss": 0.9734652058462079,
4
+ "train_runtime": 41271.4588,
5
+ "train_samples": 142397,
6
+ "train_samples_per_second": 17.251,
7
+ "train_steps_per_second": 1.078
8
  }
robin-7b/tokenizer_config.json CHANGED
@@ -3,7 +3,7 @@
3
  "clean_up_tokenization_spaces": false,
4
  "eos_token": "",
5
  "model_max_length": 1000000000000000019884624838656,
6
- "special_tokens_map_file": "/home/xiangliu/.cache/huggingface/hub/models--aleksickx--llama-7b-hf/snapshots/d7d132438caf5e95800f35dfc46cf82c2be9b365/special_tokens_map.json",
7
  "tokenizer_class": "LlamaTokenizer",
8
  "unk_token": ""
9
  }
 
3
  "clean_up_tokenization_spaces": false,
4
  "eos_token": "",
5
  "model_max_length": 1000000000000000019884624838656,
6
+ "special_tokens_map_file": "/root/data/.cache/hub/models--pinkmanlove--llama-7b-hf/snapshots/b3cde76468bad3c085ead29707ee7481121a4ca0/special_tokens_map.json",
7
  "tokenizer_class": "LlamaTokenizer",
8
  "unk_token": ""
9
  }
robin-7b/train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 9.99,
3
- "train_loss": 0.8784972819876163,
4
- "train_runtime": 13393.5014,
5
- "train_samples": 72257,
6
- "train_samples_per_second": 53.949,
7
- "train_steps_per_second": 0.421
8
  }
 
1
  {
2
+ "epoch": 5.0,
3
+ "train_loss": 0.9734652058462079,
4
+ "train_runtime": 41271.4588,
5
+ "train_samples": 142397,
6
+ "train_samples_per_second": 17.251,
7
+ "train_steps_per_second": 1.078
8
  }
robin-7b/trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff