hishamcse
/

mortal-kombat-3-ppo-diambra

Reinforcement Learning

ultimate-mortal-kombat-3

deep-reinforcement-learning

stable-baseline3

Model card Files Files and versions Community

mortal-kombat-3-ppo-diambra / config.yaml

hishamcse's picture

Upload 3 files

cf3c783 verified 7 months ago

1.68 kB

	folders:
	parent_dir: "./results/"
	model_name: "sr6_128x4_das_nc"

	settings:
	game_id: "umk3"
	step_ratio: 6
	frame_shape: !!python/tuple [128, 128, 1]
	continue_game: 0.0
	action_space: "discrete"
	characters: "Skorpion"
	difficulty: 5

	wrappers_settings:
	normalize_reward: true
	no_attack_buttons_combinations: true
	stack_frames: 4
	dilation: 1
	add_last_action: true
	stack_actions: 12
	scale: true
	exclude_image_scaling: true
	role_relative: true
	flatten: true
	filter_keys: ["action", "own_health", "opp_health", "own_side", "opp_side", "opp_character", "stage", "timer"]

	# optuna results
	# Best hyperparameters: {'gamma': 0.05944028113410932, 'max_grad_norm': 3.5407661656818026,
	# 'exponent_n_steps': 5, 'n_epochs': 14, 'batch_size': 512, 'lr': 0.014638860976621421,
	# 'ent_coef': 2.361611947920214e-06, 'clip_range': 0.3, 'gae_lambda': 0.9520674913500098,
	# 'vf_coef': 0.6420316461542878, 'net_arch': 'medium', 'activation_fn': 'leaky_relu'}

	policy_kwargs:
	#net_arch: [{ pi: [64, 64], vf: [32, 32] }]
	net_arch: [256, 256]
	activation_fn: "leaky_relu"

	ppo_settings:
	gamma: 0.94
	model_checkpoint: "660000" # 0: No checkpoint, else: Load checkpoint (if previously trained)
	learning_rate: [1.0e-3, 2.5e-6] # To start
	clip_range: [0.3, 0.015] # To start
	batch_size: 512 #8 #nminibatches gave different batch size depending on the number of environments: batch_size = (n_steps * n_envs) // nminibatches
	n_epochs: 14
	n_steps: 512
	gae_lambda: 0.9520674913500098
	ent_coef: 2.361611947920214e-06
	vf_coef: 0.6420316461542878
	autosave_freq: 50000
	time_steps: 1000000