# en-ro/ro-en: 38.5/38.5 -> CUDA_VISIBLE_DEVICES=0 python run_translation.py \ --model_name_or_path facebook/mbart-large-en-ro \ --do_train \ --do_eval \ --dataset_name wmt16 \ --dataset_config_name ro-en \ --source_lang en_XX \ --target_lang ro_RO \ --output_dir ./baseline \ --per_device_train_batch_size=4 \ --per_device_eval_batch_size=4 \ --overwrite_output_dir \ --predict_with_generate \ --apply-trp --trp-depths 1 --trp-p 0.1 --trp-lambdas 0.4 0.2 0.1