shpotes
/

xls-r-et

@@ -1,25 +1,26 @@
 #!/bin/sh
-export CUDA_VISIBLE_DEVICES=1,2
 python src/run_speech_recognition_ctc_bnb.py \
        --dataset_name="mozilla-foundation/common_voice_7_0" \
        --model_name_or_path="facebook/wav2vec2-xls-r-1b" \
        --dataset_config_name="et" \
-       --output_dir="./" \
        --overwrite_output_dir \
        --num_train_epochs=100 \
-       --per_device_train_batch_size=32 \
-       --per_device_eval_batch_size=32 \
        --gradient_accumulation_steps=2 \
        --learning_rate=3e-4 \
-       --save_total_limit=3 \
        --warmup_steps=500 \
        --evaluation_strategy=steps \
        --text_column_name=sentence \
        --length_column_name=input_length \
-       --save_steps=500 \
-       --eval_steps=500 \
        --logging_steps=100 \
        --layerdrop=0.0 \
        --freeze_feature_encoder \
@@ -33,6 +34,7 @@ python src/run_speech_recognition_ctc_bnb.py \
        --mask_time_length=10 \
        --report_to=wandb \
        --run_name="cosine+drop_proj+low_specaugment-1b" \
-       --do_train --do_eval \
-       --use_auth_token \
-       --push_to_hub

 #!/bin/sh
+export WANDB_PROJECT="xls-r-estonian"
+export CUDA_VISIBLE_DEVICES=1
 python src/run_speech_recognition_ctc_bnb.py \
        --dataset_name="mozilla-foundation/common_voice_7_0" \
        --model_name_or_path="facebook/wav2vec2-xls-r-1b" \
        --dataset_config_name="et" \
+       --output_dir="./1B" \
        --overwrite_output_dir \
        --num_train_epochs=100 \
+       --per_device_train_batch_size=48 \
+       --per_device_eval_batch_size=48 \
        --gradient_accumulation_steps=2 \
        --learning_rate=3e-4 \
+       --save_total_limit=1 \
        --warmup_steps=500 \
        --evaluation_strategy=steps \
        --text_column_name=sentence \
        --length_column_name=input_length \
+       --save_steps=1000 \
+       --eval_steps=250 \
        --logging_steps=100 \
        --layerdrop=0.0 \
        --freeze_feature_encoder \
        --mask_time_length=10 \
        --report_to=wandb \
        --run_name="cosine+drop_proj+low_specaugment-1b" \
+       --do_train --do_eval
+#--use_auth_token
+#--push_to_hub

run-300M.sh ADDED Viewed

+#!/bin/sh
+export WANDB_PROJECT="xls-r-estonian"
+export CUDA_VISIBLE_DEVICES=2
+python src/run_speech_recognition_ctc_bnb.py \
+       --dataset_name="mozilla-foundation/common_voice_7_0" \
+       --model_name_or_path="facebook/wav2vec2-xls-r-300m" \
+       --dataset_config_name="et" \
+       --output_dir="./" \
+       --overwrite_output_dir \
+       --num_train_epochs=100 \
+       --per_device_train_batch_size=80 \
+       --per_device_eval_batch_size=80 \
+       --gradient_accumulation_steps=2 \
+       --learning_rate=3e-4 \
+       --save_total_limit=1 \
+       --warmup_steps=500 \
+       --evaluation_strategy=steps \
+       --text_column_name=sentence \
+       --length_column_name=input_length \
+       --save_steps=1000 \
+       --eval_steps=250 \
+       --logging_steps=100 \
+       --layerdrop=0.0 \
+       --freeze_feature_encoder \
+       --feat_proj_dropout=0.1 \
+       --chars_to_ignore , ? . ! \- \; \: \" “ % ‘ ” � — ’ … – \
+       --gradient_checkpointing \
+       --lr_scheduler_type=cosine \
+       --fp16 \
+       --group_by_length \
+       --mask_time_prob=0.1 \
+       --mask_time_length=10 \
+       --report_to=wandb \
+       --run_name="cosine+drop_proj+low_specaugment-300M" \
+       --do_train --do_eval \
+       --use_auth_token --push_to_hub