File size: 6,483 Bytes
eac8cc9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
model_args:
  attn_implementation: flash_attention_2
  bnb_4bit_quant_type: nf4
  load_in_4bit: false
  load_in_8bit: false
  lora_alpha: 32
  lora_dropout: 0.05
  lora_modules_to_save: null
  lora_r: 16
  lora_target_modules: null
  lora_task_type: CAUSAL_LM
  model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
  model_revision: main
  torch_dtype: bfloat16
  trust_remote_code: false
  use_bnb_nested_quant: false
  use_dora: false
  use_peft: false
  use_rslora: false
script_args:
  cosine_max_len: 1000
  cosine_max_value_correct: 1.0
  cosine_max_value_wrong: -0.5
  cosine_min_value_correct: 0.5
  cosine_min_value_wrong: 0.0
  dataset_config: null
  dataset_name: simone-papicchio/bird
  dataset_test_split: test
  dataset_train_split: train
  gradient_checkpointing_use_reentrant: false
  ignore_bias_buffers: false
  reward_funcs:
  - qatch_metrics
  - format
  - tag_count
training_args:
  _n_gpu: 1
  accelerator_config:
    dispatch_batches: null
    even_batches: true
    gradient_accumulation_kwargs: null
    non_blocking: false
    split_batches: false
    use_configured_state: false
    use_seedable_sampler: true
  adafactor: false
  adam_beta1: 0.9
  adam_beta2: 0.999
  adam_epsilon: 1.0e-08
  add_system_prompt: true
  add_validation: false
  auto_find_batch_size: false
  average_tokens_across_devices: false
  base_db_path: data/bird_train/train_databases
  batch_eval_metrics: false
  benchmarks: []
  beta: 0.04
  bf16: true
  bf16_full_eval: false
  cache_implementation: null
  cached_file_path: /workspaces/deep_thinking/cache_target_sql2execution_BIRD_train.pkl
  callbacks: {}
  chat_template: null
  data_seed: null
  dataloader_drop_last: false
  dataloader_num_workers: 0
  dataloader_persistent_workers: false
  dataloader_pin_memory: true
  dataloader_prefetch_factor: null
  dataset_test_split_name: validation
  ddp_backend: null
  ddp_broadcast_buffers: null
  ddp_bucket_cap_mb: null
  ddp_find_unused_parameters: null
  ddp_timeout: 1800
  debug: []
  deepspeed: null
  disable_tqdm: false
  do_eval: false
  do_predict: false
  do_train: false
  ds3_gather_for_generation: true
  epsilon: 0.2
  epsilon_high: null
  eval_accumulation_steps: null
  eval_delay: 0
  eval_do_concat_batches: true
  eval_on_start: false
  eval_steps: null
  eval_strategy: !!python/object/apply:transformers.trainer_utils.IntervalStrategy
  - 'no'
  eval_use_gather_object: false
  fp16: false
  fp16_backend: auto
  fp16_full_eval: false
  fp16_opt_level: O1
  fsdp: []
  fsdp_config:
    min_num_params: 0
    xla: false
    xla_fsdp_grad_ckpt: false
    xla_fsdp_v2: false
  fsdp_min_num_params: 0
  fsdp_transformer_layer_cls_to_wrap: null
  full_determinism: false
  gradient_accumulation_steps: 16
  gradient_checkpointing: true
  gradient_checkpointing_kwargs:
    use_reentrant: false
  greater_is_better: false
  group_by_length: false
  half_precision_backend: auto
  hub_always_push: false
  hub_model_id: Qwen2.5-1.5B-Open-R1-GRPO
  hub_model_revision: main
  hub_private_repo: null
  hub_strategy: !!python/object/apply:transformers.trainer_utils.HubStrategy
  - every_save
  hub_token: null
  ignore_data_skip: false
  include_for_metrics: []
  include_inputs_for_metrics: false
  include_num_input_tokens_seen: false
  include_tokens_per_second: false
  jit_mode_eval: false
  label_names: null
  label_smoothing_factor: 0.0
  learning_rate: 1.0e-06
  length_column_name: length
  load_best_model_at_end: false
  local_rank: 0
  log_completions: true
  log_level: info
  log_level_replica: warning
  log_on_each_node: true
  logging_dir: ./.tensorboard_logging/f5655cd2/
  logging_first_step: true
  logging_nan_inf_filter: true
  logging_steps: 5
  logging_strategy: !!python/object/apply:transformers.trainer_utils.IntervalStrategy
  - steps
  lr_scheduler_kwargs: {}
  lr_scheduler_type: !!python/object/apply:transformers.trainer_utils.SchedulerType
  - constant_with_warmup
  max_completion_length: 4096
  max_grad_norm: 0.2
  max_prompt_length: 2048
  max_steps: -1
  metric_for_best_model: loss
  min_p: null
  model_init_kwargs: '{''revision'': ''main'', ''trust_remote_code'': False, ''attn_implementation'':
    ''flash_attention_2'', ''torch_dtype'': torch.bfloat16, ''use_cache'': False}'
  mp_parameters: ''
  neftune_noise_alpha: null
  no_cuda: false
  num_completions_to_print: 1
  num_generations: 16
  num_iterations: 1
  num_train_epochs: 1.0
  optim: !!python/object/apply:transformers.training_args.OptimizerNames
  - adamw_8bit
  optim_args: null
  optim_target_modules: null
  output_dir: base_models/grpo/Qwen/Qwen2.5-Coder-7B-Instruct/bs_256_ml_4096_gen_16_f5655cd2_RL
  overwrite_hub_revision: false
  overwrite_output_dir: false
  past_index: -1
  per_device_eval_batch_size: 8
  per_device_train_batch_size: 8
  per_gpu_eval_batch_size: null
  per_gpu_train_batch_size: null
  prediction_loss_only: false
  prompt_name: text2sql_model_grpo
  push_to_hub: false
  push_to_hub_model_id: null
  push_to_hub_organization: null
  push_to_hub_revision: false
  push_to_hub_token: null
  ray_scope: last
  ref_model_mixup_alpha: 0.6
  ref_model_sync_steps: 512
  remove_unused_columns: false
  repetition_penalty: 1.0
  report_to:
  - tensorboard
  - wandb
  restore_callback_states_from_checkpoint: false
  resume_from_checkpoint: 'True'
  reward_weights:
  - 0.85
  - 0.1
  - 0.05
  run_name: exp-9-7B-QATCH
  save_on_each_node: false
  save_only_model: false
  save_safetensors: true
  save_steps: 0.1
  save_strategy: !!python/object/apply:transformers.trainer_utils.SaveStrategy
  - steps
  save_total_limit: 3
  scale_rewards: true
  seed: 42
  skip_memory_metrics: true
  stratified_by_complexity: false
  sync_ref_model: false
  temperature: 0.7
  tf32: null
  top_k: 50
  top_p: 1.0
  torch_compile: false
  torch_compile_backend: null
  torch_compile_mode: null
  torch_empty_cache_steps: null
  torchdynamo: null
  tp_size: 0
  tpu_metrics_debug: false
  tpu_num_cores: null
  use_cpu: false
  use_ipex: false
  use_legacy_prediction_loop: false
  use_liger_kernel: false
  use_liger_loss: false
  use_mps_device: false
  use_vllm: true
  validation_split: 0.2
  vllm_device: auto
  vllm_dtype: bfloat16
  vllm_enable_prefix_caching: null
  vllm_gpu_memory_utilization: 0.7
  vllm_guided_decoding_regex: null
  vllm_max_model_len: null
  vllm_server_host: 127.0.0.1
  vllm_server_port: 24879
  vllm_server_timeout: 120.0
  wandb_log_unique_prompts: true
  warmup_ratio: 0.1
  warmup_steps: 0
  weight_decay: 0.0