import os import torch from transformers import Trainer, TrainingArguments from datasets import load_dataset import subprocess # Install required packages subprocess.run("pip install git+https://github.com/canopyai/Orpheus-TTS.git", shell=True) subprocess.run("pip install orpheus-speech vllm==0.7.3", shell=True) # Load the dataset dataset = load_dataset("Emotional_Speech_Dataset_(ESD)") # Get the model from transformers import AutoModelForCausalLM, AutoTokenizer model = AutoModelForCausalLM.from_pretrained("canopylabs/orpheus-3b-0.1-pretrained") tokenizer = AutoTokenizer.from_pretrained("canopylabs/orpheus-3b-0.1-pretrained") # Setup training arguments training_args = TrainingArguments( output_dir="./orpheus-finetuned", per_device_train_batch_size=2, gradient_accumulation_steps=4, learning_rate=5e-5, num_train_epochs=3, save_strategy="steps", save_steps=500, ) # Start training trainer = Trainer( model=model, args=training_args, train_dataset=dataset, tokenizer=tokenizer, ) trainer.train() # Save the model model.save_pretrained("./orpheus-finetuned-model") tokenizer.save_pretrained("./orpheus-finetuned-model")