shorecode commited on
Commit
88ffb3b
·
1 Parent(s): a7d3079
Files changed (2) hide show
  1. adapter_config.json +0 -35
  2. training_params.json +0 -37
adapter_config.json DELETED
@@ -1,35 +0,0 @@
1
- {
2
- "alpha_pattern": {},
3
- "auto_mapping": {
4
- "base_model_class": "T5ForConditionalGeneration",
5
- "parent_library": "transformers.models.t5.modeling_t5"
6
- },
7
- "base_model_name_or_path": "google/t5-efficient-tiny-nh8",
8
- "bias": "none",
9
- "eva_config": null,
10
- "exclude_modules": null,
11
- "fan_in_fan_out": false,
12
- "inference_mode": true,
13
- "init_lora_weights": true,
14
- "layer_replication": null,
15
- "layers_pattern": null,
16
- "layers_to_transform": null,
17
- "loftq_config": {},
18
- "lora_alpha": 8,
19
- "lora_bias": false,
20
- "lora_dropout": 0.1,
21
- "megatron_config": null,
22
- "megatron_core": "megatron.core",
23
- "modules_to_save": null,
24
- "peft_type": "LORA",
25
- "r": 4,
26
- "rank_pattern": {},
27
- "revision": null,
28
- "target_modules": [
29
- "v",
30
- "q"
31
- ],
32
- "task_type": null,
33
- "use_dora": false,
34
- "use_rslora": false
35
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
training_params.json DELETED
@@ -1,37 +0,0 @@
1
- {
2
- "data_path": "gizemgg/wiki-eng-summary-trial-gen0-transformed-instruction",
3
- "model": "google/t5-efficient-tiny-nh8",
4
- "username": "shorecode",
5
- "seed": 42,
6
- "train_split": "train",
7
- "valid_split": "test",
8
- "project_name": "autotrain-x906d-mvlef",
9
- "push_to_hub": true,
10
- "text_column": "text",
11
- "target_column": "summ",
12
- "lr": 0.0001,
13
- "epochs": 3,
14
- "max_seq_length": 128,
15
- "max_target_length": 128,
16
- "batch_size": 2,
17
- "warmup_ratio": 0.1,
18
- "gradient_accumulation": 1,
19
- "optimizer": "adamw_torch",
20
- "scheduler": "linear",
21
- "weight_decay": 0.0,
22
- "max_grad_norm": 1.0,
23
- "logging_steps": -1,
24
- "eval_strategy": "epoch",
25
- "auto_find_batch_size": false,
26
- "mixed_precision": "fp16",
27
- "save_total_limit": 1,
28
- "peft": false,
29
- "quantization": "int8",
30
- "lora_r": 16,
31
- "lora_alpha": 32,
32
- "lora_dropout": 0.05,
33
- "target_modules": "all-linear",
34
- "log": "tensorboard",
35
- "early_stopping_patience": 5,
36
- "early_stopping_threshold": 0.01
37
- }