vwxyzjn commited on
Commit
60d5466
·
verified ·
1 Parent(s): c4874ac

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +71 -1
README.md CHANGED
@@ -197,7 +197,77 @@ Below are the other eval scores over steps for Llama-3.1-Tulu-3.1-8B:
197
  ![](Llama-3.1-Tulu-3.1-8B-other-evals-overtime.png)
198
 
199
 
200
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
 
202
  ## License and use
203
 
 
197
  ![](Llama-3.1-Tulu-3.1-8B-other-evals-overtime.png)
198
 
199
 
200
+ ## Reproduction command
201
+
202
+
203
+
204
+ ```bash
205
+ # clone and check out commit
206
+ git clone https://github.com/allenai/open-instruct.git
207
+ git checkout 3f37c29ddc97d2c108a7658692d2d2c3708ef182
208
+
209
+ # run my exact command for launching exps
210
+ for learning_rate in 5e-7; do
211
+ for beta in 0.01; do
212
+ for nspp in 16; do
213
+ for m in half-m ; do
214
+ for kl_estimator in kl3; do
215
+ local_rollout_batch_size=8
216
+ # `half-m` is the same as setting number of mini-batches to be 2.
217
+ if [ $m == "half-m" ]; then
218
+ local_mini_batch_size=$(($local_rollout_batch_size * $nspp / 2))
219
+ else
220
+ local_mini_batch_size=$(($local_rollout_batch_size * $nspp))
221
+ fi
222
+ exp_name="0204_lr_scan_grpo_math_lr_${learning_rate}_${kl_estimator}_${beta}_${nspp}_${m}_${RANDOM}"
223
+ echo $exp_name:
224
+ echo --- local_mini_batch_size=$local_mini_batch_size
225
+ echo --- num_gradient_updates=$(($local_rollout_batch_size * $nspp / $local_mini_batch_size))
226
+ python open_instruct/grpo_vllm_thread_ray_gtrl.py \
227
+ --exp_name $exp_name \
228
+ --beta $beta \
229
+ --local_mini_batch_size $local_mini_batch_size \
230
+ --number_samples_per_prompt $nspp \
231
+ --output_dir output/$exp_name \
232
+ --local_rollout_batch_size $local_rollout_batch_size \
233
+ --kl_estimator $kl_estimator \
234
+ --learning_rate $learning_rate \
235
+ --dataset_mixer_list allenai/RLVR-GSM-MATH-IF-Mixed-Constraints 1.0 \
236
+ --dataset_mixer_list_splits train \
237
+ --dataset_mixer_eval_list allenai/RLVR-GSM-MATH-IF-Mixed-Constraints 16 \
238
+ --dataset_mixer_eval_list_splits train \
239
+ --max_token_length 2048 \
240
+ --max_prompt_token_length 2048 \
241
+ --response_length 2048 \
242
+ --model_name_or_path allenai/Llama-3.1-Tulu-3-8B-DPO \
243
+ --non_stop_penalty \
244
+ --stop_token eos \
245
+ --temperature 1.0 \
246
+ --ground_truths_key ground_truth \
247
+ --chat_template_name tulu \
248
+ --sft_messages_key messages \
249
+ --total_episodes 10000000 \
250
+ --penalty_reward_value 0.0 \
251
+ --deepspeed_stage 2 \
252
+ --per_device_train_batch_size 2 \
253
+ --local_rollout_forward_batch_size 2 \
254
+ --actor_num_gpus_per_node 6 \
255
+ --num_epochs 1 \
256
+ --vllm_tensor_parallel_size 2 \
257
+ --lr_scheduler_type constant \
258
+ --apply_verifiable_reward true \
259
+ --seed 1 \
260
+ --num_evals 30 \
261
+ --save_freq 40 \
262
+ --reward_model_multiplier 0.0 \
263
+ --gradient_checkpointing \
264
+ --with_tracking
265
+ done
266
+ done
267
+ done
268
+ done
269
+ done
270
+ ```
271
 
272
  ## License and use
273