Spaces:

facebook
/

MelodyFlow

Running

MelodyFlow / config /solver /watermark /default.yaml

Gael Le Lan

Initial commit

9d0d223 12 months ago

3.99 kB

	# @package __global__

	defaults:
	- /solver/default
	- /augmentations/default
	- override /dset: audio/example
	- _self_

	solver: watermarking # standard name to load the solver using builders
	sample_rate: ???
	channels: ???

	# all the defaults form compression
	losses:
	adv: 4.
	feat: 4.
	l1: 0.1
	mel: 0.0
	msspec: 2.0
	sisnr: 0.0
	wm_detection: 1.0 # loss for first 2 bits cannot be 0
	wm_mb: 1.0 # loss for the rest of the bits (wm message)
	tf_loudnessratio: 10.0

	balancer:
	balance_grads: true
	ema_decay: 0.999
	per_batch_item: true
	total_norm: 1.

	crop:
	prob: 0.4
	shuffle_prob: 0.2
	pad_prob: 0.2 # shuffle_prob + pad_prob + prob <= 1
	size: 0.5
	max_n_windows: 5

	adversarial:
	every: 1
	adversaries: [msstftd]
	adv_loss: hinge
	feat_loss: l1

	tf_loudnessratio:
	sample_rate: ${sample_rate}
	segment: 0.5
	overlap: 0.5
	n_bands: 16
	temperature: 1.0

	# watermarking: audioseal

	# losses hyperparameters
	l1: {}
	l2: {}

	wm_detection:
	p_weight: 1
	n_weight: 1

	wm_mb:
	loss_type: bce # loss between decoded and original
	temperature: 0.1 # decoded is divided by temperature before loss computation

	spec_range:
	n_fft: 2048
	min_frequency: 300.0
	max_frequency: 15000.0
	sample_rate: ${sample_rate}
	spec_entropy_range:
	n_fft: 2048
	min_frequency: 300.0
	max_frequency: 15000.0
	sample_rate: ${sample_rate}
	mrstft:
	factor_sc: .5
	factor_mag: .5
	normalized: false
	mel:
	sample_rate: ${sample_rate}
	n_fft: 1024
	hop_length: 256
	win_length: 1024
	n_mels: 64
	f_min: 64
	f_max: null
	normalized: false
	floor_level: 1e-5
	sisnr:
	sample_rate: ${sample_rate}
	segment: 5.
	msspec:
	sample_rate: ${sample_rate}
	range_start: 6
	range_end: 11
	n_mels: 64
	f_min: 64
	f_max: null
	normalized: true
	alphas: false
	floor_level: 1e-5

	# metrics
	metrics:
	visqol:
	mode: audio
	bin: null # path to visqol install
	model: tcdaudio14_aacvopus_coresv_svrnsim_n.68_g.01_c1.model # visqol v3

	# adversaries hyperparameters
	msstftd:
	in_channels: 1
	out_channels: 1
	filters: 32
	norm: weight_norm
	n_ffts: [1024, 2048, 512, 256, 128]
	hop_lengths: [256, 512, 128, 64, 32]
	win_lengths: [1024, 2048, 512, 256, 128]
	activation: LeakyReLU
	activation_params: { negative_slope: 0.3 }
	msd:
	in_channels: 1
	out_channels: 1
	scale_norms: [spectral_norm, weight_norm, weight_norm]
	kernel_sizes: [5, 3]
	filters: 16
	max_filters: 1024
	downsample_scales: [4, 4, 4, 4]
	inner_kernel_sizes: null
	groups: [4, 4, 4, 4]
	strides: null
	paddings: null
	activation: LeakyReLU
	activation_params: { negative_slope: 0.3 }
	mpd:
	in_channels: 1
	out_channels: 1
	periods: [2, 3, 5, 7, 11]
	n_layers: 5
	kernel_size: 5
	stride: 3
	filters: 8
	filter_scales: 4
	max_filters: 1024
	activation: LeakyReLU
	activation_params: { negative_slope: 0.3 }
	norm: weight_norm

	# data hyperparameters
	dataset:
	batch_size: 16
	num_workers: 10
	segment_duration: 1
	train:
	num_samples: 500000
	valid:
	num_samples: 10000
	evaluate:
	batch_size: 16
	num_samples: 10000
	segment_duration: 10

	generate:
	batch_size: 16
	num_samples: 50
	segment_duration: 30

	# solver hyperparameters
	evaluate:
	every: 10
	num_workers: 5
	metrics:
	visqol: false
	sisnr: true
	generate:
	every: 10
	num_workers: 5
	audio:
	sample_rate: ${sample_rate}

	# checkpointing schedule
	checkpoint:
	save_last: true
	save_every: 25
	keep_last: 10
	keep_every_states: null



	# optimization hyperparameters
	optim:
	epochs: 300
	updates_per_epoch: 2000
	lr: 5e-5
	max_norm: 3.0
	optimizer: adam
	adam:
	betas: [0.5, 0.9]
	weight_decay: 0.
	ema:
	use: true # whether to use EMA or not
	updates: 1 # update at every step
	device: ${device} # device for EMA, can be put on GPU if more frequent updates
	decay: 0.99 # EMA decay value, if null, no EMA is used


	schedule:
	lr_scheduler: "cosine"
	cosine:
	warmup: 4000
	lr_min_ratio: 0.0
	cycle_length: 1.0