Hiber-Voice-Unmasking-CUDA-V1 / training_config.py
hibernatesai's picture
Upload 11 files
45e1a77 verified
from dataclasses import dataclass, field
from typing import Optional, List
import os
import json
from transformers import TrainingArguments, Trainer
import torch
@dataclass
class AudioTrainingConfig:
# Model configuration
model_name: str = "wav2vec2"
hidden_size: int = 1024
num_attention_heads: int = 16
num_hidden_layers: int = 24
# Training parameters
output_dir: str = field(default="./results")
num_train_epochs: int = 5
per_device_train_batch_size: int = 8
per_device_eval_batch_size: int = 8
gradient_accumulation_steps: int = 4
learning_rate: float = 3e-5
warmup_ratio: float = 0.1
# Optimization
fp16: bool = True
bf16: bool = False
gradient_checkpointing: bool = True
optim: str = "adamw_torch"
weight_decay: float = 0.01
max_grad_norm: float = 1.0
# Logging & Evaluation
logging_dir: str = field(default="./logs")
logging_steps: int = 100
eval_steps: int = 500
save_steps: int = 500
save_strategy: str = "epoch"
evaluation_strategy: str = "epoch"
# Performance
dataloader_num_workers: int = 4
group_by_length: bool = True
remove_unused_columns: bool = True
label_smoothing_factor: float = 0.1
# Advanced features
use_mps_device: bool = field(
default=False,
metadata={"help": "Whether to use Apple M1/M2 GPU acceleration"}
)
mixed_precision: str = field(
default="fp16",
metadata={"help": "Mixed precision mode: 'no', 'fp16', 'bf16'"}
)
def __post_init__(self):
# Create output directories if they don't exist
os.makedirs(self.output_dir, exist_ok=True)
os.makedirs(self.logging_dir, exist_ok=True)
# Adjust settings based on hardware
if torch.cuda.is_available():
self.device = "cuda"
self.n_gpu = torch.cuda.device_count()
elif torch.backends.mps.is_available() and self.use_mps_device:
self.device = "mps"
self.n_gpu = 1
else:
self.device = "cpu"
self.n_gpu = 0
self.fp16 = False
self.bf16 = False
def get_training_args(self) -> TrainingArguments:
return TrainingArguments(
output_dir=self.output_dir,
num_train_epochs=self.num_train_epochs,
per_device_train_batch_size=self.per_device_train_batch_size,
per_device_eval_batch_size=self.per_device_eval_batch_size,
gradient_accumulation_steps=self.gradient_accumulation_steps,
learning_rate=self.learning_rate,
warmup_ratio=self.warmup_ratio,
logging_dir=self.logging_dir,
logging_steps=self.logging_steps,
save_strategy=self.save_strategy,
evaluation_strategy=self.evaluation_strategy,
eval_steps=self.eval_steps,
save_steps=self.save_steps,
load_best_model_at_end=True,
metric_for_best_model="accuracy",
greater_is_better=True,
fp16=self.fp16 and self.mixed_precision == "fp16",
bf16=self.bf16 and self.mixed_precision == "bf16",
dataloader_num_workers=self.dataloader_num_workers,
group_by_length=self.group_by_length,
remove_unused_columns=self.remove_unused_columns,
label_smoothing_factor=self.label_smoothing_factor,
gradient_checkpointing=self.gradient_checkpointing,
optim=self.optim,
weight_decay=self.weight_decay,
max_grad_norm=self.max_grad_norm,
)
def save_config(self, filepath: str = "training_config.json"):
"""Save configuration to JSON file"""
config_dict = {k: v for k, v in self.__dict__.items() if not k.startswith('_')}
with open(filepath, 'w') as f:
json.dump(config_dict, f, indent=2)
@classmethod
def load_config(cls, filepath: str = "training_config.json") -> 'AudioTrainingConfig':
"""Load configuration from JSON file"""
with open(filepath, 'r') as f:
config_dict = json.load(f)
return cls(**config_dict)
def main():
# Initialize configuration
config = AudioTrainingConfig()
# Save both formats
config.save_config("training_config.json")
training_args = config.get_training_args()
training_args.save_to_json("training_args.bin")
print(f"Training will use device: {config.device} with {config.n_gpu} GPUs")
print(f"Mixed precision: {config.mixed_precision}")
print(f"Configuration saved to: training_config.json and training_args.bin")
if __name__ == "__main__":
main()