metadata
tags:
- reinforcement-learning
- atari-alien
- atari-amidar
- atari-assault
- atari-asterix
- atari-asteroids
- atari-atlantis
- atari-bankheist
- atari-battlezone
- atari-beamrider
- atari-berzerk
- atari-bowling
- atari-boxing
- atari-breakout
- atari-centipede
- atari-choppercommand
- atari-crazyclimber
- atari-defender
- atari-demonattack
- atari-doubledunk
- atari-enduro
- atari-fishingderby
- atari-freeway
- atari-frostbite
- atari-gopher
- atari-gravitar
- atari-hero
- atari-icehockey
- atari-jamesbond
- atari-kangaroo
- atari-krull
- atari-kungfumaster
- atari-montezumarevenge
- atari-mspacman
- atari-namethisgame
- atari-phoenix
- atari-pitfall
- atari-pong
- atari-privateeye
- atari-qbert
- atari-riverraid
- atari-roadrunner
- atari-robotank
- atari-seaquest
- atari-skiing
- atari-solaris
- atari-spaceinvaders
- atari-stargunner
- atari-surround
- atari-tennis
- atari-timepilot
- atari-tutankham
- atari-upndown
- atari-venture
- atari-videopinball
- atari-wizardofwor
- atari-yarsrevenge
- atari-zaxxon
- babyai-action-obj-door
- babyai-blocked-unlock-pickup
- babyai-boss-level-no-unlock
- babyai-boss-level
- babyai-find-obj-s5
- babyai-go-to-door
- babyai-go-to-imp-unlock
- babyai-go-to-local
- babyai-go-to-obj-door
- babyai-go-to-obj
- babyai-go-to-red-ball-grey
- babyai-go-to-red-ball-no-dists
- babyai-go-to-red-ball
- babyai-go-to-red-blue-ball
- babyai-go-to-seq
- babyai-go-to
- babyai-key-corridor
- babyai-mini-boss-level
- babyai-move-two-across-s8n9
- babyai-one-room-s8
- babyai-open-door
- babyai-open-doors-order-n4
- babyai-open-red-door
- babyai-open-two-doors
- babyai-open
- babyai-pickup-above
- babyai-pickup-dist
- babyai-pickup-loc
- babyai-pickup
- babyai-put-next-local
- babyai-put-next
- babyai-synth-loc
- babyai-synth-seq
- babyai-synth
- babyai-unblock-pickup
- babyai-unlock-local
- babyai-unlock-pickup
- babyai-unlock-to-unlock
- babyai-unlock
- metaworld-assembly
- metaworld-basketball
- metaworld-bin-picking
- metaworld-box-close
- metaworld-button-press-topdown-wall
- metaworld-button-press-topdown
- metaworld-button-press-wall
- metaworld-button-press
- metaworld-coffee-button
- metaworld-coffee-pull
- metaworld-coffee-push
- metaworld-dial-turn
- metaworld-disassemble
- metaworld-door-close
- metaworld-door-lock
- metaworld-door-open
- metaworld-door-unlock
- metaworld-drawer-close
- metaworld-drawer-open
- metaworld-faucet-close
- metaworld-faucet-open
- metaworld-hammer
- metaworld-hand-insert
- metaworld-handle-press-side
- metaworld-handle-press
- metaworld-handle-pull-side
- metaworld-handle-pull
- metaworld-lever-pull
- metaworld-peg-insert-side
- metaworld-peg-unplug-side
- metaworld-pick-out-of-hole
- metaworld-pick-place-wall
- metaworld-pick-place
- metaworld-plate-slide-back-side
- metaworld-plate-slide-back
- metaworld-plate-slide-side
- metaworld-plate-slide
- metaworld-push-back
- metaworld-push-wall
- metaworld-push
- metaworld-reach-wall
- metaworld-reach
- metaworld-shelf-place
- metaworld-soccer
- metaworld-stick-pull
- metaworld-stick-push
- metaworld-sweep-into
- metaworld-sweep
- metaworld-window-close
- metaworld-window-open
- mujoco-ant
- mujoco-doublependulum
- mujoco-halfcheetah
- mujoco-hopper
- mujoco-humanoid
- mujoco-pendulum
- mujoco-pusher
- mujoco-reacher
- mujoco-standup
- mujoco-swimmer
- mujoco-walker
datasets: jat-project/jat-dataset
pipeline_tag: reinforcement-learning
model-index:
- name: jat-project/jat
results:
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Atari 57
type: atari
metrics:
- type: iqm_expert_normalized_total_reward
value: 0.14 [0.14, 0.15]
name: IQM expert normalized total reward
- type: iqm_human_normalized_total_reward
value: 0.38 [0.37, 0.38]
name: IQM human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: BabyAI
type: babyai
metrics:
- type: iqm_expert_normalized_total_reward
value: 0.99 [0.99, 0.99]
name: IQM expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: MetaWorld
type: metaworld
metrics:
- type: iqm_expert_normalized_total_reward
value: 0.68 [0.67, 0.69]
name: IQM expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: MuJoCo
type: mujoco
metrics:
- type: iqm_expert_normalized_total_reward
value: 0.81 [0.80, 0.82]
name: IQM expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Alien
type: atari-alien
metrics:
- type: total_reward
value: 1474.90 +/- 588.75
name: Total reward
- type: expert_normalized_total_reward
value: 0.07 +/- 0.04
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 0.18 +/- 0.09
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Amidar
type: atari-amidar
metrics:
- type: total_reward
value: 104.89 +/- 103.52
name: Total reward
- type: expert_normalized_total_reward
value: 0.05 +/- 0.05
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 0.06 +/- 0.06
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Assault
type: atari-assault
metrics:
- type: total_reward
value: 1650.07 +/- 820.99
name: Total reward
- type: expert_normalized_total_reward
value: 0.09 +/- 0.05
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 2.75 +/- 1.58
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Asterix
type: atari-asterix
metrics:
- type: total_reward
value: 800.00 +/- 584.85
name: Total reward
- type: expert_normalized_total_reward
value: 0.17 +/- 0.17
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 0.07 +/- 0.07
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Asteroids
type: atari-asteroids
metrics:
- type: total_reward
value: 1385.30 +/- 507.53
name: Total reward
- type: expert_normalized_total_reward
value: 0.00 +/- 0.00
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 0.01 +/- 0.01
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Atlantis
type: atari-atlantis
metrics:
- type: total_reward
value: 66980.00 +/- 158449.73
name: Total reward
- type: expert_normalized_total_reward
value: 0.18 +/- 0.51
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 3.35 +/- 9.79
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Bank Heist
type: atari-bankheist
metrics:
- type: total_reward
value: 948.30 +/- 199.86
name: Total reward
- type: expert_normalized_total_reward
value: 0.71 +/- 0.15
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 1.26 +/- 0.27
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Battle Zone
type: atari-battlezone
metrics:
- type: total_reward
value: 17420.00 +/- 6071.54
name: Total reward
- type: expert_normalized_total_reward
value: 0.06 +/- 0.02
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 0.47 +/- 0.16
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Beam Rider
type: atari-beamrider
metrics:
- type: total_reward
value: 797.32 +/- 328.31
name: Total reward
- type: expert_normalized_total_reward
value: 0.01 +/- 0.01
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 0.03 +/- 0.02
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Berzerk
type: atari-berzerk
metrics:
- type: total_reward
value: 687.30 +/- 331.91
name: Total reward
- type: expert_normalized_total_reward
value: 0.01 +/- 0.01
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 0.22 +/- 0.13
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Bowling
type: atari-bowling
metrics:
- type: total_reward
value: 22.41 +/- 5.57
name: Total reward
- type: expert_normalized_total_reward
value: 1.00 +/- 0.00
name: Expert normalized total reward
- type: human_normalized_total_reward
value: '-0.01 +/- 0.04'
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Boxing
type: atari-boxing
metrics:
- type: total_reward
value: 90.10 +/- 23.05
name: Total reward
- type: expert_normalized_total_reward
value: 0.92 +/- 0.24
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 7.50 +/- 1.92
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Breakout
type: atari-breakout
metrics:
- type: total_reward
value: 8.82 +/- 5.63
name: Total reward
- type: expert_normalized_total_reward
value: 0.01 +/- 0.01
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 0.25 +/- 0.20
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Centipede
type: atari-centipede
metrics:
- type: total_reward
value: 5589.92 +/- 2567.26
name: Total reward
- type: expert_normalized_total_reward
value: 0.37 +/- 0.27
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 0.35 +/- 0.26
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Chopper Command
type: atari-choppercommand
metrics:
- type: total_reward
value: 2417.00 +/- 1489.90
name: Total reward
- type: expert_normalized_total_reward
value: 0.02 +/- 0.02
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 0.24 +/- 0.23
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Crazy Climber
type: atari-crazyclimber
metrics:
- type: total_reward
value: 97639.00 +/- 26184.68
name: Total reward
- type: expert_normalized_total_reward
value: 0.52 +/- 0.16
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 3.47 +/- 1.05
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Defender
type: atari-defender
metrics:
- type: total_reward
value: 39323.50 +/- 15202.98
name: Total reward
- type: expert_normalized_total_reward
value: 0.10 +/- 0.04
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 2.30 +/- 0.96
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Demon Attack
type: atari-demonattack
metrics:
- type: total_reward
value: 815.30 +/- 989.67
name: Total reward
- type: expert_normalized_total_reward
value: 0.01 +/- 0.01
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 0.36 +/- 0.54
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Double Dunk
type: atari-doubledunk
metrics:
- type: total_reward
value: 14.42 +/- 9.97
name: Total reward
- type: expert_normalized_total_reward
value: 0.84 +/- 0.25
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 0.94 +/- 0.28
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Enduro
type: atari-enduro
metrics:
- type: total_reward
value: 108.52 +/- 42.73
name: Total reward
- type: expert_normalized_total_reward
value: 0.05 +/- 0.02
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 0.13 +/- 0.05
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Fishing Derby
type: atari-fishingderby
metrics:
- type: total_reward
value: '-30.35 +/- 24.37'
name: Total reward
- type: expert_normalized_total_reward
value: 0.62 +/- 0.25
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 0.47 +/- 0.19
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Freeway
type: atari-freeway
metrics:
- type: total_reward
value: 27.49 +/- 1.63
name: Total reward
- type: expert_normalized_total_reward
value: 0.81 +/- 0.05
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 0.93 +/- 0.06
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Frostbite
type: atari-frostbite
metrics:
- type: total_reward
value: 2769.60 +/- 1445.61
name: Total reward
- type: expert_normalized_total_reward
value: 0.21 +/- 0.11
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 0.63 +/- 0.34
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Gopher
type: atari-gopher
metrics:
- type: total_reward
value: 5340.60 +/- 2547.07
name: Total reward
- type: expert_normalized_total_reward
value: 0.06 +/- 0.03
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 2.36 +/- 1.18
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Gravitar
type: atari-gravitar
metrics:
- type: total_reward
value: 1269.50 +/- 902.99
name: Total reward
- type: expert_normalized_total_reward
value: 0.29 +/- 0.24
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 0.34 +/- 0.28
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: H.E.R.O.
type: atari-hero
metrics:
- type: total_reward
value: 11709.65 +/- 3233.53
name: Total reward
- type: expert_normalized_total_reward
value: 0.24 +/- 0.07
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 0.36 +/- 0.11
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Ice Hockey
type: atari-icehockey
metrics:
- type: total_reward
value: 7.48 +/- 5.60
name: Total reward
- type: expert_normalized_total_reward
value: 0.51 +/- 0.15
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 1.54 +/- 0.46
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: James Bond
type: atari-jamesbond
metrics:
- type: total_reward
value: 327.50 +/- 123.16
name: Total reward
- type: expert_normalized_total_reward
value: 0.01 +/- 0.00
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 1.09 +/- 0.45
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Kangaroo
type: atari-kangaroo
metrics:
- type: total_reward
value: 378.00 +/- 343.97
name: Total reward
- type: expert_normalized_total_reward
value: 0.62 +/- 0.66
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 0.11 +/- 0.12
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Krull
type: atari-krull
metrics:
- type: total_reward
value: 10720.50 +/- 1284.13
name: Total reward
- type: expert_normalized_total_reward
value: 0.93 +/- 0.13
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 8.55 +/- 1.20
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Kung-Fu Master
type: atari-kungfumaster
metrics:
- type: total_reward
value: 288.00 +/- 255.06
name: Total reward
- type: expert_normalized_total_reward
value: 0.00 +/- 0.01
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 0.00 +/- 0.01
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Montezuma's Revenge
type: atari-montezumarevenge
metrics:
- type: total_reward
value: 0.00 +/- 0.00
name: Total reward
- type: expert_normalized_total_reward
value: 0.00 +/- 0.00
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 0.00 +/- 0.00
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Ms. Pacman
type: atari-mspacman
metrics:
- type: total_reward
value: 1573.10 +/- 483.96
name: Total reward
- type: expert_normalized_total_reward
value: 0.19 +/- 0.07
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 0.19 +/- 0.07
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Name This Game
type: atari-namethisgame
metrics:
- type: total_reward
value: 7523.30 +/- 2471.38
name: Total reward
- type: expert_normalized_total_reward
value: 0.25 +/- 0.12
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 0.91 +/- 0.43
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Phoenix
type: atari-phoenix
metrics:
- type: total_reward
value: 2197.90 +/- 1795.38
name: Total reward
- type: expert_normalized_total_reward
value: 0.00 +/- 0.00
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 0.22 +/- 0.28
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: PitFall
type: atari-pitfall
metrics:
- type: total_reward
value: '-6.68 +/- 19.05'
name: Total reward
- type: expert_normalized_total_reward
value: 0.98 +/- 0.08
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 0.03 +/- 0.00
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Pong
type: atari-pong
metrics:
- type: total_reward
value: 13.69 +/- 13.35
name: Total reward
- type: expert_normalized_total_reward
value: 0.82 +/- 0.32
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 0.97 +/- 0.38
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Private Eye
type: atari-privateeye
metrics:
- type: total_reward
value: 44.00 +/- 49.64
name: Total reward
- type: expert_normalized_total_reward
value: 0.25 +/- 0.66
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 0.00 +/- 0.00
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Q*Bert
type: atari-qbert
metrics:
- type: total_reward
value: 1951.50 +/- 2577.24
name: Total reward
- type: expert_normalized_total_reward
value: 0.04 +/- 0.06
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 0.13 +/- 0.19
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: River Raid
type: atari-riverraid
metrics:
- type: total_reward
value: 3758.50 +/- 1536.66
name: Total reward
- type: expert_normalized_total_reward
value: 0.18 +/- 0.11
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 0.15 +/- 0.10
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Road Runner
type: atari-roadrunner
metrics:
- type: total_reward
value: 6407.00 +/- 4847.36
name: Total reward
- type: expert_normalized_total_reward
value: 0.08 +/- 0.06
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 0.82 +/- 0.62
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Robotank
type: atari-robotank
metrics:
- type: total_reward
value: 11.34 +/- 5.52
name: Total reward
- type: expert_normalized_total_reward
value: 0.12 +/- 0.07
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 0.94 +/- 0.57
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Seaquest
type: atari-seaquest
metrics:
- type: total_reward
value: 804.00 +/- 403.33
name: Total reward
- type: expert_normalized_total_reward
value: 0.29 +/- 0.16
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 0.02 +/- 0.01
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Skiing
type: atari-skiing
metrics:
- type: total_reward
value: '-16231.54 +/- 6060.48'
name: Total reward
- type: expert_normalized_total_reward
value: 0.14 +/- 0.95
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 0.07 +/- 0.47
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Solaris
type: atari-solaris
metrics:
- type: total_reward
value: 1286.60 +/- 446.70
name: Total reward
- type: expert_normalized_total_reward
value: 0.43 +/- 3.81
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 0.00 +/- 0.04
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Space Invaders
type: atari-spaceinvaders
metrics:
- type: total_reward
value: 325.45 +/- 163.36
name: Total reward
- type: expert_normalized_total_reward
value: 0.01 +/- 0.01
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 0.12 +/- 0.11
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Star Gunner
type: atari-stargunner
metrics:
- type: total_reward
value: 4379.00 +/- 3027.22
name: Total reward
- type: expert_normalized_total_reward
value: 0.01 +/- 0.01
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 0.39 +/- 0.32
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Surround
type: atari-surround
metrics:
- type: total_reward
value: 2.67 +/- 4.74
name: Total reward
- type: expert_normalized_total_reward
value: 0.65 +/- 0.24
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 0.77 +/- 0.29
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Tennis
type: atari-tennis
metrics:
- type: total_reward
value: '-13.46 +/- 3.80'
name: Total reward
- type: expert_normalized_total_reward
value: 0.30 +/- 0.11
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 0.32 +/- 0.12
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Time Pilot
type: atari-timepilot
metrics:
- type: total_reward
value: 13028.00 +/- 5222.57
name: Total reward
- type: expert_normalized_total_reward
value: 0.14 +/- 0.08
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 5.69 +/- 3.14
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Tutankham
type: atari-tutankham
metrics:
- type: total_reward
value: 85.66 +/- 61.77
name: Total reward
- type: expert_normalized_total_reward
value: 0.27 +/- 0.22
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 0.48 +/- 0.40
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Up and Down
type: atari-upndown
metrics:
- type: total_reward
value: 17768.70 +/- 10321.95
name: Total reward
- type: expert_normalized_total_reward
value: 0.04 +/- 0.02
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 1.54 +/- 0.92
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Venture
type: atari-venture
metrics:
- type: total_reward
value: 0.00 +/- 0.00
name: Total reward
- type: expert_normalized_total_reward
value: 1.00 +/- 0.00
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 0.00 +/- 0.00
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Video Pinball
type: atari-videopinball
metrics:
- type: total_reward
value: 11917.43 +/- 8204.28
name: Total reward
- type: expert_normalized_total_reward
value: 0.03 +/- 0.02
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 0.67 +/- 0.46
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Wizard of Wor
type: atari-wizardofwor
metrics:
- type: total_reward
value: 2544.00 +/- 2902.42
name: Total reward
- type: expert_normalized_total_reward
value: 0.04 +/- 0.06
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 0.47 +/- 0.69
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Yars Revenge
type: atari-yarsrevenge
metrics:
- type: total_reward
value: 12532.70 +/- 8062.85
name: Total reward
- type: expert_normalized_total_reward
value: 0.04 +/- 0.03
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 0.18 +/- 0.16
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Zaxxon
type: atari-zaxxon
metrics:
- type: total_reward
value: 6902.00 +/- 3206.09
name: Total reward
- type: expert_normalized_total_reward
value: 0.09 +/- 0.04
name: Expert normalized total reward
- type: human_normalized_total_reward
value: 0.75 +/- 0.35
name: Human normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Action Obj Door
type: babyai-action-obj-door
metrics:
- type: total_reward
value: 0.95 +/- 0.13
name: Total reward
- type: expert_normalized_total_reward
value: 0.94 +/- 0.22
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Blocked Unlock Pickup
type: babyai-blocked-unlock-pickup
metrics:
- type: total_reward
value: 0.95 +/- 0.01
name: Total reward
- type: expert_normalized_total_reward
value: 1.00 +/- 0.01
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Boss Level No Unlock
type: babyai-boss-level-no-unlock
metrics:
- type: total_reward
value: 0.44 +/- 0.45
name: Total reward
- type: expert_normalized_total_reward
value: 0.43 +/- 0.51
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Boss Level
type: babyai-boss-level
metrics:
- type: total_reward
value: 0.48 +/- 0.45
name: Total reward
- type: expert_normalized_total_reward
value: 0.48 +/- 0.51
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Find Obj S5
type: babyai-find-obj-s5
metrics:
- type: total_reward
value: 0.95 +/- 0.03
name: Total reward
- type: expert_normalized_total_reward
value: 1.00 +/- 0.04
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Go To Door
type: babyai-go-to-door
metrics:
- type: total_reward
value: 0.99 +/- 0.01
name: Total reward
- type: expert_normalized_total_reward
value: 1.00 +/- 0.01
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Go To Imp Unlock
type: babyai-go-to-imp-unlock
metrics:
- type: total_reward
value: 0.50 +/- 0.44
name: Total reward
- type: expert_normalized_total_reward
value: 0.56 +/- 0.59
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Go To Local
type: babyai-go-to-local
metrics:
- type: total_reward
value: 0.88 +/- 0.14
name: Total reward
- type: expert_normalized_total_reward
value: 0.94 +/- 0.18
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Go To Obj Door
type: babyai-go-to-obj-door
metrics:
- type: total_reward
value: 0.98 +/- 0.04
name: Total reward
- type: expert_normalized_total_reward
value: 0.97 +/- 0.08
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Go To Obj
type: babyai-go-to-obj
metrics:
- type: total_reward
value: 0.93 +/- 0.04
name: Total reward
- type: expert_normalized_total_reward
value: 0.99 +/- 0.05
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Go To Red Ball Grey
type: babyai-go-to-red-ball-grey
metrics:
- type: total_reward
value: 0.91 +/- 0.06
name: Total reward
- type: expert_normalized_total_reward
value: 0.99 +/- 0.08
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Go To Red Ball No Dists
type: babyai-go-to-red-ball-no-dists
metrics:
- type: total_reward
value: 0.93 +/- 0.03
name: Total reward
- type: expert_normalized_total_reward
value: 1.00 +/- 0.04
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Go To Red Ball
type: babyai-go-to-red-ball
metrics:
- type: total_reward
value: 0.91 +/- 0.08
name: Total reward
- type: expert_normalized_total_reward
value: 0.98 +/- 0.11
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Go To Red Blue Ball
type: babyai-go-to-red-blue-ball
metrics:
- type: total_reward
value: 0.88 +/- 0.11
name: Total reward
- type: expert_normalized_total_reward
value: 0.96 +/- 0.13
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Go To Seq
type: babyai-go-to-seq
metrics:
- type: total_reward
value: 0.73 +/- 0.34
name: Total reward
- type: expert_normalized_total_reward
value: 0.75 +/- 0.40
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Go To
type: babyai-go-to
metrics:
- type: total_reward
value: 0.80 +/- 0.27
name: Total reward
- type: expert_normalized_total_reward
value: 0.85 +/- 0.35
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Key Corridor
type: babyai-key-corridor
metrics:
- type: total_reward
value: 0.88 +/- 0.10
name: Total reward
- type: expert_normalized_total_reward
value: 0.97 +/- 0.11
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Mini Boss Level
type: babyai-mini-boss-level
metrics:
- type: total_reward
value: 0.69 +/- 0.35
name: Total reward
- type: expert_normalized_total_reward
value: 0.76 +/- 0.43
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Move Two Across S8N9
type: babyai-move-two-across-s8n9
metrics:
- type: total_reward
value: 0.03 +/- 0.15
name: Total reward
- type: expert_normalized_total_reward
value: 0.03 +/- 0.16
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: One Room S8
type: babyai-one-room-s8
metrics:
- type: total_reward
value: 0.92 +/- 0.03
name: Total reward
- type: expert_normalized_total_reward
value: 1.00 +/- 0.04
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Open Door
type: babyai-open-door
metrics:
- type: total_reward
value: 0.99 +/- 0.00
name: Total reward
- type: expert_normalized_total_reward
value: 1.00 +/- 0.01
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Open Doors Order N4
type: babyai-open-doors-order-n4
metrics:
- type: total_reward
value: 0.96 +/- 0.11
name: Total reward
- type: expert_normalized_total_reward
value: 0.97 +/- 0.13
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Open Red Door
type: babyai-open-red-door
metrics:
- type: total_reward
value: 0.92 +/- 0.02
name: Total reward
- type: expert_normalized_total_reward
value: 1.00 +/- 0.03
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Open Two Doors
type: babyai-open-two-doors
metrics:
- type: total_reward
value: 0.98 +/- 0.00
name: Total reward
- type: expert_normalized_total_reward
value: 1.00 +/- 0.00
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Open
type: babyai-open
metrics:
- type: total_reward
value: 0.93 +/- 0.11
name: Total reward
- type: expert_normalized_total_reward
value: 0.97 +/- 0.13
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Pickup Above
type: babyai-pickup-above
metrics:
- type: total_reward
value: 0.92 +/- 0.06
name: Total reward
- type: expert_normalized_total_reward
value: 1.01 +/- 0.07
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Pickup Dist
type: babyai-pickup-dist
metrics:
- type: total_reward
value: 0.88 +/- 0.13
name: Total reward
- type: expert_normalized_total_reward
value: 1.03 +/- 0.18
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Pickup Loc
type: babyai-pickup-loc
metrics:
- type: total_reward
value: 0.84 +/- 0.20
name: Total reward
- type: expert_normalized_total_reward
value: 0.91 +/- 0.24
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Pickup
type: babyai-pickup
metrics:
- type: total_reward
value: 0.72 +/- 0.34
name: Total reward
- type: expert_normalized_total_reward
value: 0.77 +/- 0.40
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Put Next Local
type: babyai-put-next-local
metrics:
- type: total_reward
value: 0.60 +/- 0.36
name: Total reward
- type: expert_normalized_total_reward
value: 0.65 +/- 0.39
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Put Next S7N4
type: babyai-put-next
metrics:
- type: total_reward
value: 0.82 +/- 0.26
name: Total reward
- type: expert_normalized_total_reward
value: 0.86 +/- 0.27
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Synth Loc
type: babyai-synth-loc
metrics:
- type: total_reward
value: 0.82 +/- 0.31
name: Total reward
- type: expert_normalized_total_reward
value: 0.85 +/- 0.38
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Synth Seq
type: babyai-synth-seq
metrics:
- type: total_reward
value: 0.57 +/- 0.44
name: Total reward
- type: expert_normalized_total_reward
value: 0.57 +/- 0.50
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Synth
type: babyai-synth
metrics:
- type: total_reward
value: 0.68 +/- 0.39
name: Total reward
- type: expert_normalized_total_reward
value: 0.69 +/- 0.47
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Unblock Pickup
type: babyai-unblock-pickup
metrics:
- type: total_reward
value: 0.76 +/- 0.33
name: Total reward
- type: expert_normalized_total_reward
value: 0.82 +/- 0.39
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Unlock Local
type: babyai-unlock-local
metrics:
- type: total_reward
value: 0.98 +/- 0.01
name: Total reward
- type: expert_normalized_total_reward
value: 1.00 +/- 0.01
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Unlock Pickup
type: babyai-unlock-pickup
metrics:
- type: total_reward
value: 0.76 +/- 0.03
name: Total reward
- type: expert_normalized_total_reward
value: 1.01 +/- 0.04
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Unlock To Unlock
type: babyai-unlock-to-unlock
metrics:
- type: total_reward
value: 0.86 +/- 0.29
name: Total reward
- type: expert_normalized_total_reward
value: 0.89 +/- 0.30
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Unlock
type: babyai-unlock
metrics:
- type: total_reward
value: 0.55 +/- 0.42
name: Total reward
- type: expert_normalized_total_reward
value: 0.63 +/- 0.50
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Assembly
type: metaworld-assembly
metrics:
- type: total_reward
value: 238.32 +/- 32.98
name: Total reward
- type: expert_normalized_total_reward
value: 0.96 +/- 0.16
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Basketball
type: metaworld-basketball
metrics:
- type: total_reward
value: 1.59 +/- 0.43
name: Total reward
- type: expert_normalized_total_reward
value: '-0.00 +/- 0.00'
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: BinPicking
type: metaworld-bin-picking
metrics:
- type: total_reward
value: 374.18 +/- 168.23
name: Total reward
- type: expert_normalized_total_reward
value: 0.88 +/- 0.40
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Box Close
type: metaworld-box-close
metrics:
- type: total_reward
value: 510.10 +/- 117.47
name: Total reward
- type: expert_normalized_total_reward
value: 0.99 +/- 0.27
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Button Press Topdown Wall
type: metaworld-button-press-topdown-wall
metrics:
- type: total_reward
value: 260.07 +/- 67.75
name: Total reward
- type: expert_normalized_total_reward
value: 0.49 +/- 0.14
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Button Press Topdown
type: metaworld-button-press-topdown
metrics:
- type: total_reward
value: 265.16 +/- 77.93
name: Total reward
- type: expert_normalized_total_reward
value: 0.51 +/- 0.17
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Button Press Wall
type: metaworld-button-press-wall
metrics:
- type: total_reward
value: 621.75 +/- 137.13
name: Total reward
- type: expert_normalized_total_reward
value: 0.92 +/- 0.21
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Button Press
type: metaworld-button-press
metrics:
- type: total_reward
value: 556.75 +/- 198.85
name: Total reward
- type: expert_normalized_total_reward
value: 0.86 +/- 0.33
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Coffee Button
type: metaworld-coffee-button
metrics:
- type: total_reward
value: 250.50 +/- 266.92
name: Total reward
- type: expert_normalized_total_reward
value: 0.31 +/- 0.38
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Coffee Pull
type: metaworld-coffee-pull
metrics:
- type: total_reward
value: 55.13 +/- 96.96
name: Total reward
- type: expert_normalized_total_reward
value: 0.20 +/- 0.38
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Coffee Push
type: metaworld-coffee-push
metrics:
- type: total_reward
value: 269.17 +/- 237.82
name: Total reward
- type: expert_normalized_total_reward
value: 0.54 +/- 0.48
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Dial Turn
type: metaworld-dial-turn
metrics:
- type: total_reward
value: 738.22 +/- 168.43
name: Total reward
- type: expert_normalized_total_reward
value: 0.93 +/- 0.22
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Disassemble
type: metaworld-disassemble
metrics:
- type: total_reward
value: 39.14 +/- 11.85
name: Total reward
- type: expert_normalized_total_reward
value: '-0.47 +/- 4.70'
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Door Close
type: metaworld-door-close
metrics:
- type: total_reward
value: 528.17 +/- 29.90
name: Total reward
- type: expert_normalized_total_reward
value: 1.00 +/- 0.06
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Door Lock
type: metaworld-door-lock
metrics:
- type: total_reward
value: 676.51 +/- 192.68
name: Total reward
- type: expert_normalized_total_reward
value: 0.81 +/- 0.28
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Door Open
type: metaworld-door-open
metrics:
- type: total_reward
value: 572.76 +/- 57.53
name: Total reward
- type: expert_normalized_total_reward
value: 0.98 +/- 0.11
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Door Unlock
type: metaworld-door-unlock
metrics:
- type: total_reward
value: 654.94 +/- 260.64
name: Total reward
- type: expert_normalized_total_reward
value: 0.79 +/- 0.37
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Drawer Close
type: metaworld-drawer-close
metrics:
- type: total_reward
value: 663.02 +/- 214.51
name: Total reward
- type: expert_normalized_total_reward
value: 0.73 +/- 0.29
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Drawer Open
type: metaworld-drawer-open
metrics:
- type: total_reward
value: 489.07 +/- 21.28
name: Total reward
- type: expert_normalized_total_reward
value: 0.99 +/- 0.06
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Faucet Close
type: metaworld-faucet-close
metrics:
- type: total_reward
value: 361.32 +/- 72.28
name: Total reward
- type: expert_normalized_total_reward
value: 0.22 +/- 0.14
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Faucet Open
type: metaworld-faucet-open
metrics:
- type: total_reward
value: 637.86 +/- 134.50
name: Total reward
- type: expert_normalized_total_reward
value: 0.85 +/- 0.29
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Hammer
type: metaworld-hammer
metrics:
- type: total_reward
value: 691.72 +/- 25.25
name: Total reward
- type: expert_normalized_total_reward
value: 1.00 +/- 0.04
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Hand Insert
type: metaworld-hand-insert
metrics:
- type: total_reward
value: 719.57 +/- 99.26
name: Total reward
- type: expert_normalized_total_reward
value: 0.97 +/- 0.13
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Handle Press Side
type: metaworld-handle-press-side
metrics:
- type: total_reward
value: 84.25 +/- 113.34
name: Total reward
- type: expert_normalized_total_reward
value: 0.03 +/- 0.14
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Handle Press
type: metaworld-handle-press
metrics:
- type: total_reward
value: 731.94 +/- 261.90
name: Total reward
- type: expert_normalized_total_reward
value: 0.84 +/- 0.34
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Handle Pull Side
type: metaworld-handle-pull-side
metrics:
- type: total_reward
value: 233.11 +/- 199.49
name: Total reward
- type: expert_normalized_total_reward
value: 0.60 +/- 0.52
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Handle Pull
type: metaworld-handle-pull
metrics:
- type: total_reward
value: 501.29 +/- 209.45
name: Total reward
- type: expert_normalized_total_reward
value: 0.74 +/- 0.32
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Lever Pull
type: metaworld-lever-pull
metrics:
- type: total_reward
value: 250.18 +/- 228.59
name: Total reward
- type: expert_normalized_total_reward
value: 0.34 +/- 0.41
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Peg Insert Side
type: metaworld-peg-insert-side
metrics:
- type: total_reward
value: 288.02 +/- 157.87
name: Total reward
- type: expert_normalized_total_reward
value: 0.91 +/- 0.50
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Peg Unplug Side
type: metaworld-peg-unplug-side
metrics:
- type: total_reward
value: 68.48 +/- 125.34
name: Total reward
- type: expert_normalized_total_reward
value: 0.14 +/- 0.28
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Pick Out Of Hole
type: metaworld-pick-out-of-hole
metrics:
- type: total_reward
value: 2.08 +/- 0.05
name: Total reward
- type: expert_normalized_total_reward
value: 0.00 +/- 0.00
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Pick Place Wall
type: metaworld-pick-place-wall
metrics:
- type: total_reward
value: 6.87 +/- 44.99
name: Total reward
- type: expert_normalized_total_reward
value: 0.02 +/- 0.10
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Pick Place
type: metaworld-pick-place
metrics:
- type: total_reward
value: 264.18 +/- 195.69
name: Total reward
- type: expert_normalized_total_reward
value: 0.63 +/- 0.47
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Plate Slide Back Side
type: metaworld-plate-slide-back-side
metrics:
- type: total_reward
value: 697.54 +/- 137.79
name: Total reward
- type: expert_normalized_total_reward
value: 0.95 +/- 0.20
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Plate Slide Back
type: metaworld-plate-slide-back
metrics:
- type: total_reward
value: 196.80 +/- 1.73
name: Total reward
- type: expert_normalized_total_reward
value: 0.24 +/- 0.00
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Plate Slide Side
type: metaworld-plate-slide-side
metrics:
- type: total_reward
value: 122.61 +/- 24.52
name: Total reward
- type: expert_normalized_total_reward
value: 0.16 +/- 0.04
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Plate Slide
type: metaworld-plate-slide
metrics:
- type: total_reward
value: 497.42 +/- 168.74
name: Total reward
- type: expert_normalized_total_reward
value: 0.93 +/- 0.37
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Push Back
type: metaworld-push-back
metrics:
- type: total_reward
value: 91.41 +/- 115.05
name: Total reward
- type: expert_normalized_total_reward
value: 1.08 +/- 1.37
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Push Wall
type: metaworld-push-wall
metrics:
- type: total_reward
value: 116.49 +/- 208.05
name: Total reward
- type: expert_normalized_total_reward
value: 0.15 +/- 0.28
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Push
type: metaworld-push
metrics:
- type: total_reward
value: 604.25 +/- 261.90
name: Total reward
- type: expert_normalized_total_reward
value: 0.80 +/- 0.35
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Reach Wall
type: metaworld-reach-wall
metrics:
- type: total_reward
value: 634.57 +/- 231.40
name: Total reward
- type: expert_normalized_total_reward
value: 0.81 +/- 0.38
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Reach
type: metaworld-reach
metrics:
- type: total_reward
value: 325.27 +/- 159.21
name: Total reward
- type: expert_normalized_total_reward
value: 0.33 +/- 0.30
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Shelf Place
type: metaworld-shelf-place
metrics:
- type: total_reward
value: 124.60 +/- 112.83
name: Total reward
- type: expert_normalized_total_reward
value: 0.52 +/- 0.47
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Soccer
type: metaworld-soccer
metrics:
- type: total_reward
value: 364.50 +/- 175.45
name: Total reward
- type: expert_normalized_total_reward
value: 0.97 +/- 0.47
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Stick Pull
type: metaworld-stick-pull
metrics:
- type: total_reward
value: 398.64 +/- 205.60
name: Total reward
- type: expert_normalized_total_reward
value: 0.76 +/- 0.39
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Stick Push
type: metaworld-stick-push
metrics:
- type: total_reward
value: 158.29 +/- 264.59
name: Total reward
- type: expert_normalized_total_reward
value: 0.25 +/- 0.42
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Sweep Into
type: metaworld-sweep-into
metrics:
- type: total_reward
value: 775.30 +/- 119.00
name: Total reward
- type: expert_normalized_total_reward
value: 0.97 +/- 0.15
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Sweep
type: metaworld-sweep
metrics:
- type: total_reward
value: 15.64 +/- 9.29
name: Total reward
- type: expert_normalized_total_reward
value: 0.01 +/- 0.02
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Window Close
type: metaworld-window-close
metrics:
- type: total_reward
value: 423.33 +/- 203.92
name: Total reward
- type: expert_normalized_total_reward
value: 0.69 +/- 0.38
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Window Open
type: metaworld-window-open
metrics:
- type: total_reward
value: 593.10 +/- 54.83
name: Total reward
- type: expert_normalized_total_reward
value: 1.00 +/- 0.10
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Ant
type: mujoco-ant
metrics:
- type: total_reward
value: 5268.02 +/- 1495.39
name: Total reward
- type: expert_normalized_total_reward
value: 0.90 +/- 0.25
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Inverted Double Pendulum
type: mujoco-doublependulum
metrics:
- type: total_reward
value: 4750.14 +/- 931.20
name: Total reward
- type: expert_normalized_total_reward
value: 0.51 +/- 0.10
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Half Cheetah
type: mujoco-halfcheetah
metrics:
- type: total_reward
value: 6659.69 +/- 409.71
name: Total reward
- type: expert_normalized_total_reward
value: 0.90 +/- 0.05
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Hopper
type: mujoco-hopper
metrics:
- type: total_reward
value: 1835.93 +/- 532.21
name: Total reward
- type: expert_normalized_total_reward
value: 0.99 +/- 0.29
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Humanoid
type: mujoco-humanoid
metrics:
- type: total_reward
value: 697.44 +/- 108.06
name: Total reward
- type: expert_normalized_total_reward
value: 0.09 +/- 0.02
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Inverted Pendulum
type: mujoco-pendulum
metrics:
- type: total_reward
value: 116.34 +/- 20.19
name: Total reward
- type: expert_normalized_total_reward
value: 0.23 +/- 0.04
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Pusher
type: mujoco-pusher
metrics:
- type: total_reward
value: '-26.33 +/- 6.32'
name: Total reward
- type: expert_normalized_total_reward
value: 0.99 +/- 0.05
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Reacher
type: mujoco-reacher
metrics:
- type: total_reward
value: '-6.06 +/- 2.64'
name: Total reward
- type: expert_normalized_total_reward
value: 0.99 +/- 0.07
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Humanoid Standup
type: mujoco-standup
metrics:
- type: total_reward
value: 118125.15 +/- 24880.28
name: Total reward
- type: expert_normalized_total_reward
value: 0.35 +/- 0.10
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Swimmer
type: mujoco-swimmer
metrics:
- type: total_reward
value: 93.26 +/- 3.78
name: Total reward
- type: expert_normalized_total_reward
value: 1.01 +/- 0.04
name: Expert normalized total reward
- task:
type: reinforcement-learning
name: Reinforcement Learning
dataset:
name: Walker 2d
type: mujoco-walker
metrics:
- type: total_reward
value: 4662.43 +/- 762.67
name: Total reward
- type: expert_normalized_total_reward
value: 1.01 +/- 0.16
name: Expert normalized total reward
Model Card for Jat
This is a multi-modal and multi-task model.
Model Details
Model Description
- Developed by: The JAT Team
- License: Apache 2.0
Model Sources
- Repository: https://github.com/huggingface/jat
- Paper: https://huggingface.co/papers/2402.09844
- Demo: Coming soon
Training
The model was trained on the following tasks:
- Alien - Amidar - Assault - Asterix - Asteroids - Atlantis - Bank Heist - Battle Zone - Beam Rider - Berzerk - Bowling - Boxing - Breakout - Centipede - Chopper Command - Crazy Climber - Defender - Demon Attack - Double Dunk - Enduro - Fishing Derby - Freeway - Frostbite - Gopher - Gravitar - H.E.R.O. - Ice Hockey - James Bond - Kangaroo - Krull - Kung-Fu Master - Montezuma's Revenge - Ms. Pacman - Name This Game - Phoenix - PitFall - Pong - Private Eye - Q*Bert - River Raid - Road Runner - Robotank - Seaquest - Skiing - Solaris - Space Invaders - Star Gunner - Surround - Tennis - Time Pilot - Tutankham - Up and Down - Venture - Video Pinball - Wizard of Wor - Yars Revenge - Zaxxon - Action Obj Door - Blocked Unlock Pickup - Boss Level No Unlock - Boss Level - Find Obj S5 - Go To Door - Go To Imp Unlock - Go To Local - Go To Obj Door - Go To Obj - Go To Red Ball Grey - Go To Red Ball No Dists - Go To Red Ball - Go To Red Blue Ball - Go To Seq - Go To - Key Corridor - Mini Boss Level - Move Two Across S8N9 - One Room S8 - Open Door - Open Doors Order N4 - Open Red Door - Open Two Doors - Open - Pickup Above - Pickup Dist - Pickup Loc - Pickup - Put Next Local - Put Next S7N4 - Synth Loc - Synth Seq - Synth - Unblock Pickup - Unlock Local - Unlock Pickup - Unlock To Unlock - Unlock - Assembly - Basketball - BinPicking - Box Close - Button Press Topdown Wall - Button Press Topdown - Button Press Wall - Button Press - Coffee Button - Coffee Pull - Coffee Push - Dial Turn - Disassemble - Door Close - Door Lock - Door Open - Door Unlock - Drawer Close - Drawer Open - Faucet Close - Faucet Open - Hammer - Hand Insert - Handle Press Side - Handle Press - Handle Pull Side - Handle Pull - Lever Pull - Peg Insert Side - Peg Unplug Side - Pick Out Of Hole - Pick Place Wall - Pick Place - Plate Slide Back Side - Plate Slide Back - Plate Slide Side - Plate Slide - Push Back - Push Wall - Push - Reach Wall - Reach - Shelf Place - Soccer - Stick Pull - Stick Push - Sweep Into - Sweep - Window Close - Window Open - Ant - Inverted Double Pendulum - Half Cheetah - Hopper - Humanoid - Inverted Pendulum - Pusher - Reacher - Humanoid Standup - Swimmer - Walker 2dHow to Get Started with the Model
Use the code below to get started with the model.
from transformers import AutoModelForCausalLM
model = AutoModelForCausalLM.from_pretrained("jat-project/jat")