CodeIsAbstract commited on
Commit
3c4b1e5
·
verified ·
1 Parent(s): cb993ff

Upload fine-tuned model

Browse files
config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 128000,
8
+ "eos_token_id": 128001,
9
+ "head_dim": 64,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 2048,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 8192,
14
+ "max_position_embeddings": 131072,
15
+ "mlp_bias": false,
16
+ "model_type": "llama",
17
+ "num_attention_heads": 32,
18
+ "num_hidden_layers": 16,
19
+ "num_key_value_heads": 8,
20
+ "pretraining_tp": 1,
21
+ "rms_norm_eps": 1e-05,
22
+ "rope_scaling": {
23
+ "factor": 32.0,
24
+ "high_freq_factor": 4.0,
25
+ "low_freq_factor": 1.0,
26
+ "original_max_position_embeddings": 8192,
27
+ "rope_type": "llama3"
28
+ },
29
+ "rope_theta": 500000.0,
30
+ "tie_word_embeddings": true,
31
+ "torch_dtype": "float32",
32
+ "transformers_version": "4.51.3",
33
+ "use_cache": true,
34
+ "vocab_size": 128256
35
+ }
generation_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 128000,
4
+ "do_sample": true,
5
+ "eos_token_id": 128001,
6
+ "temperature": 0.6,
7
+ "top_p": 0.9,
8
+ "transformers_version": "4.51.3"
9
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4e7083f46f247845da972853910d8680ca649ab8c902c99ae0aef8b98f13b94
3
+ size 4943274328
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d79a6afb2552fb1cf64796402d4e175b61bc5b99be45450ff790ca57d1cdf3c8
3
+ size 2510808826
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c062f7f375beded48b5337f5a3f3a5cb38807fa3e85dbf3e294c0ab6b627bfc2
3
+ size 14244
scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7ab928fb6fc03d23ed0a52a122112f8e7b9f1b5afe619387db540b707cec3ec
3
+ size 988
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b81ef4084acb220d4aa4aaf816f556a4e6f57487225003cd5fc278dd5e90c942
3
+ size 1064
trainer_state.json ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 1.191904047976012,
6
+ "eval_steps": 25,
7
+ "global_step": 100,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.13193403298350825,
14
+ "grad_norm": 119.62747955322266,
15
+ "learning_rate": 1.6000000000000003e-05,
16
+ "loss": 60.0685,
17
+ "step": 11
18
+ },
19
+ {
20
+ "epoch": 0.2638680659670165,
21
+ "grad_norm": 76.251220703125,
22
+ "learning_rate": 1.983619906947144e-05,
23
+ "loss": 53.8545,
24
+ "step": 22
25
+ },
26
+ {
27
+ "epoch": 0.29985007496251875,
28
+ "eval_loss": 3.3249454498291016,
29
+ "eval_runtime": 89.9708,
30
+ "eval_samples_per_second": 5.557,
31
+ "eval_steps_per_second": 1.856,
32
+ "step": 25
33
+ },
34
+ {
35
+ "epoch": 0.39580209895052476,
36
+ "grad_norm": 67.91344451904297,
37
+ "learning_rate": 1.9199794436588244e-05,
38
+ "loss": 52.8293,
39
+ "step": 33
40
+ },
41
+ {
42
+ "epoch": 0.527736131934033,
43
+ "grad_norm": 75.1031265258789,
44
+ "learning_rate": 1.811377838556573e-05,
45
+ "loss": 52.3377,
46
+ "step": 44
47
+ },
48
+ {
49
+ "epoch": 0.5997001499250375,
50
+ "eval_loss": 3.2520751953125,
51
+ "eval_runtime": 89.7398,
52
+ "eval_samples_per_second": 5.572,
53
+ "eval_steps_per_second": 1.861,
54
+ "step": 50
55
+ },
56
+ {
57
+ "epoch": 0.6596701649175413,
58
+ "grad_norm": 70.66634368896484,
59
+ "learning_rate": 1.6631226582407954e-05,
60
+ "loss": 52.2969,
61
+ "step": 55
62
+ },
63
+ {
64
+ "epoch": 0.7916041979010495,
65
+ "grad_norm": 74.07559204101562,
66
+ "learning_rate": 1.4824594148071936e-05,
67
+ "loss": 51.8169,
68
+ "step": 66
69
+ },
70
+ {
71
+ "epoch": 0.8995502248875562,
72
+ "eval_loss": 3.2324743270874023,
73
+ "eval_runtime": 90.1521,
74
+ "eval_samples_per_second": 5.546,
75
+ "eval_steps_per_second": 1.852,
76
+ "step": 75
77
+ },
78
+ {
79
+ "epoch": 0.9235382308845578,
80
+ "grad_norm": 72.83753967285156,
81
+ "learning_rate": 1.2782174639164528e-05,
82
+ "loss": 51.5118,
83
+ "step": 77
84
+ },
85
+ {
86
+ "epoch": 1.047976011994003,
87
+ "grad_norm": 79.93111419677734,
88
+ "learning_rate": 1.0603784974222862e-05,
89
+ "loss": 48.0834,
90
+ "step": 88
91
+ },
92
+ {
93
+ "epoch": 1.1799100449775113,
94
+ "grad_norm": 89.3459243774414,
95
+ "learning_rate": 8.395887191422397e-06,
96
+ "loss": 50.378,
97
+ "step": 99
98
+ },
99
+ {
100
+ "epoch": 1.191904047976012,
101
+ "eval_loss": 3.2021567821502686,
102
+ "eval_runtime": 90.5661,
103
+ "eval_samples_per_second": 5.521,
104
+ "eval_steps_per_second": 1.844,
105
+ "step": 100
106
+ }
107
+ ],
108
+ "logging_steps": 11,
109
+ "max_steps": 166,
110
+ "num_input_tokens_seen": 0,
111
+ "num_train_epochs": 2,
112
+ "save_steps": 50,
113
+ "stateful_callbacks": {
114
+ "TrainerControl": {
115
+ "args": {
116
+ "should_epoch_stop": false,
117
+ "should_evaluate": false,
118
+ "should_log": false,
119
+ "should_save": true,
120
+ "should_training_stop": false
121
+ },
122
+ "attributes": {}
123
+ }
124
+ },
125
+ "total_flos": 1.4253959153713152e+17,
126
+ "train_batch_size": 3,
127
+ "trial_name": null,
128
+ "trial_params": null
129
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16d296347945cb67bfa6c7229aa45967a046d5ff07369d05446b90a909c9721c
3
+ size 5368