Train an Agent using Adversarial Inverse Reinforcement Learning#
As usual, we first need an expert. Again, we download one from the HuggingFace model hub for convenience.
Note that we now use a variant of the CartPole environment from the seals package, which has fixed episode durations. Read more about why we do this here.
import numpy as np
from imitation.policies.serialize import load_policy
from imitation.util.util import make_vec_env
from imitation.data.wrappers import RolloutInfoWrapper
SEED = 42
FAST = True
if FAST:
N_RL_TRAIN_STEPS = 100_000
else:
N_RL_TRAIN_STEPS = 2_000_000
venv = make_vec_env(
"seals:seals/CartPole-v0",
rng=np.random.default_rng(SEED),
n_envs=8,
post_wrappers=[
lambda env, _: RolloutInfoWrapper(env)
], # needed for computing rollouts later
)
expert = load_policy(
"ppo-huggingface",
organization="HumanCompatibleAI",
env_name="seals/CartPole-v0",
venv=venv,
)
We generate some expert trajectories, that the discriminator needs to distinguish from the learner’s trajectories.
from imitation.data import rollout
rollouts = rollout.rollout(
expert,
venv,
rollout.make_sample_until(min_timesteps=None, min_episodes=60),
rng=np.random.default_rng(SEED),
)
Now we are ready to set up our AIRL trainer.
Note, that the reward_net
is actually the network of the discriminator.
We evaluate the learner before and after training so we can see if it made any progress.
from imitation.algorithms.adversarial.airl import AIRL
from imitation.rewards.reward_nets import BasicShapedRewardNet
from imitation.util.networks import RunningNorm
from stable_baselines3 import PPO
from stable_baselines3.ppo import MlpPolicy
from stable_baselines3.common.evaluation import evaluate_policy
learner = PPO(
env=venv,
policy=MlpPolicy,
batch_size=64,
ent_coef=0.0,
learning_rate=0.0005,
gamma=0.95,
clip_range=0.1,
vf_coef=0.1,
n_epochs=5,
seed=SEED,
)
reward_net = BasicShapedRewardNet(
observation_space=venv.observation_space,
action_space=venv.action_space,
normalize_input_layer=RunningNorm,
)
airl_trainer = AIRL(
demonstrations=rollouts,
demo_batch_size=2048,
gen_replay_buffer_capacity=512,
n_disc_updates_per_round=16,
venv=venv,
gen_algo=learner,
reward_net=reward_net,
)
venv.seed(SEED)
learner_rewards_before_training, _ = evaluate_policy(
learner, venv, 100, return_episode_rewards=True
)
airl_trainer.train(N_RL_TRAIN_STEPS)
venv.seed(SEED)
learner_rewards_after_training, _ = evaluate_policy(
learner, venv, 100, return_episode_rewards=True
)
------------------------------------------
| raw/ | |
| gen/rollout/ep_len_mean | 500 |
| gen/rollout/ep_rew_mean | 33.1 |
| gen/time/fps | 3535 |
| gen/time/iterations | 1 |
| gen/time/time_elapsed | 4 |
| gen/time/total_timesteps | 16384 |
------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.581 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.162 |
| disc/disc_entropy | 0.664 |
| disc/disc_loss | 0.676 |
| disc/disc_proportion_expert_pred | 0.919 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 1 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.586 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.172 |
| disc/disc_entropy | 0.664 |
| disc/disc_loss | 0.673 |
| disc/disc_proportion_expert_pred | 0.914 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 1 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.593 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.186 |
| disc/disc_entropy | 0.665 |
| disc/disc_loss | 0.669 |
| disc/disc_proportion_expert_pred | 0.907 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 1 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.591 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.182 |
| disc/disc_entropy | 0.666 |
| disc/disc_loss | 0.672 |
| disc/disc_proportion_expert_pred | 0.909 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 1 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.598 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.197 |
| disc/disc_entropy | 0.666 |
| disc/disc_loss | 0.665 |
| disc/disc_proportion_expert_pred | 0.902 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 1 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.606 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.211 |
| disc/disc_entropy | 0.666 |
| disc/disc_loss | 0.662 |
| disc/disc_proportion_expert_pred | 0.894 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 1 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.605 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.21 |
| disc/disc_entropy | 0.667 |
| disc/disc_loss | 0.659 |
| disc/disc_proportion_expert_pred | 0.895 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 1 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.598 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.196 |
| disc/disc_entropy | 0.667 |
| disc/disc_loss | 0.66 |
| disc/disc_proportion_expert_pred | 0.902 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 1 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.613 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.226 |
| disc/disc_entropy | 0.668 |
| disc/disc_loss | 0.654 |
| disc/disc_proportion_expert_pred | 0.887 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 1 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.623 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.246 |
| disc/disc_entropy | 0.668 |
| disc/disc_loss | 0.65 |
| disc/disc_proportion_expert_pred | 0.877 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 1 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.617 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.235 |
| disc/disc_entropy | 0.668 |
| disc/disc_loss | 0.651 |
| disc/disc_proportion_expert_pred | 0.883 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 1 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.632 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.264 |
| disc/disc_entropy | 0.668 |
| disc/disc_loss | 0.645 |
| disc/disc_proportion_expert_pred | 0.868 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 1 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.629 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.258 |
| disc/disc_entropy | 0.668 |
| disc/disc_loss | 0.644 |
| disc/disc_proportion_expert_pred | 0.871 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 1 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.643 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.286 |
| disc/disc_entropy | 0.669 |
| disc/disc_loss | 0.641 |
| disc/disc_proportion_expert_pred | 0.857 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 1 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.646 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.292 |
| disc/disc_entropy | 0.669 |
| disc/disc_loss | 0.637 |
| disc/disc_proportion_expert_pred | 0.854 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 1 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.653 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.305 |
| disc/disc_entropy | 0.668 |
| disc/disc_loss | 0.633 |
| disc/disc_proportion_expert_pred | 0.847 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 1 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| mean/ | |
| disc/disc_acc | 0.613 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.227 |
| disc/disc_entropy | 0.667 |
| disc/disc_loss | 0.656 |
| disc/disc_proportion_expert_pred | 0.887 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 1 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
| gen/rollout/ep_len_mean | 500 |
| gen/rollout/ep_rew_mean | 33.1 |
| gen/time/fps | 3.54e+03 |
| gen/time/iterations | 1 |
| gen/time/time_elapsed | 4 |
| gen/time/total_timesteps | 1.64e+04 |
| gen/train/approx_kl | 0.00136 |
| gen/train/clip_fraction | 0.0238 |
| gen/train/clip_range | 0.1 |
| gen/train/entropy_loss | -0.692 |
| gen/train/explained_variance | -0.0116 |
| gen/train/learning_rate | 0.0005 |
| gen/train/loss | 3.17 |
| gen/train/n_updates | 5 |
| gen/train/policy_gradient_loss | 7.75e-06 |
| gen/train/value_loss | 117 |
--------------------------------------------------
-----------------------------------------------------
| raw/ | |
| gen/rollout/ep_len_mean | 500 |
| gen/rollout/ep_rew_mean | 34.6 |
| gen/rollout/ep_rew_wrapped_mean | -525 |
| gen/time/fps | 3538 |
| gen/time/iterations | 1 |
| gen/time/time_elapsed | 4 |
| gen/time/total_timesteps | 32768 |
| gen/train/approx_kl | 0.0013636536 |
| gen/train/clip_fraction | 0.0238 |
| gen/train/clip_range | 0.1 |
| gen/train/entropy_loss | -0.692 |
| gen/train/explained_variance | -0.0116 |
| gen/train/learning_rate | 0.0005 |
| gen/train/loss | 3.17 |
| gen/train/n_updates | 5 |
| gen/train/policy_gradient_loss | 7.75e-06 |
| gen/train/value_loss | 117 |
-----------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.68 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.36 |
| disc/disc_entropy | 0.664 |
| disc/disc_loss | 0.618 |
| disc/disc_proportion_expert_pred | 0.82 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 2 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.687 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.375 |
| disc/disc_entropy | 0.664 |
| disc/disc_loss | 0.615 |
| disc/disc_proportion_expert_pred | 0.813 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 2 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.684 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.368 |
| disc/disc_entropy | 0.665 |
| disc/disc_loss | 0.615 |
| disc/disc_proportion_expert_pred | 0.816 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 2 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.688 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.376 |
| disc/disc_entropy | 0.666 |
| disc/disc_loss | 0.617 |
| disc/disc_proportion_expert_pred | 0.812 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 2 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.687 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.373 |
| disc/disc_entropy | 0.667 |
| disc/disc_loss | 0.616 |
| disc/disc_proportion_expert_pred | 0.813 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 2 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.684 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.368 |
| disc/disc_entropy | 0.668 |
| disc/disc_loss | 0.619 |
| disc/disc_proportion_expert_pred | 0.816 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 2 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.677 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.353 |
| disc/disc_entropy | 0.668 |
| disc/disc_loss | 0.62 |
| disc/disc_proportion_expert_pred | 0.823 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 2 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.683 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.366 |
| disc/disc_entropy | 0.669 |
| disc/disc_loss | 0.619 |
| disc/disc_proportion_expert_pred | 0.817 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 2 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.69 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.38 |
| disc/disc_entropy | 0.667 |
| disc/disc_loss | 0.614 |
| disc/disc_proportion_expert_pred | 0.81 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 2 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.69 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.381 |
| disc/disc_entropy | 0.669 |
| disc/disc_loss | 0.615 |
| disc/disc_proportion_expert_pred | 0.81 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 2 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.688 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.377 |
| disc/disc_entropy | 0.67 |
| disc/disc_loss | 0.617 |
| disc/disc_proportion_expert_pred | 0.812 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 2 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.706 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.412 |
| disc/disc_entropy | 0.669 |
| disc/disc_loss | 0.61 |
| disc/disc_proportion_expert_pred | 0.794 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 2 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.697 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.395 |
| disc/disc_entropy | 0.67 |
| disc/disc_loss | 0.613 |
| disc/disc_proportion_expert_pred | 0.803 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 2 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.706 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.412 |
| disc/disc_entropy | 0.67 |
| disc/disc_loss | 0.608 |
| disc/disc_proportion_expert_pred | 0.794 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 2 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.713 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.426 |
| disc/disc_entropy | 0.67 |
| disc/disc_loss | 0.607 |
| disc/disc_proportion_expert_pred | 0.787 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 2 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.705 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.409 |
| disc/disc_entropy | 0.669 |
| disc/disc_loss | 0.605 |
| disc/disc_proportion_expert_pred | 0.795 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 2 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
---------------------------------------------------
| mean/ | |
| disc/disc_acc | 0.692 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.383 |
| disc/disc_entropy | 0.668 |
| disc/disc_loss | 0.614 |
| disc/disc_proportion_expert_pred | 0.808 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 2 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
| gen/rollout/ep_len_mean | 500 |
| gen/rollout/ep_rew_mean | 34.6 |
| gen/rollout/ep_rew_wrapped_mean | -525 |
| gen/time/fps | 3.54e+03 |
| gen/time/iterations | 1 |
| gen/time/time_elapsed | 4 |
| gen/time/total_timesteps | 3.28e+04 |
| gen/train/approx_kl | 0.0011 |
| gen/train/clip_fraction | 0.00289 |
| gen/train/clip_range | 0.1 |
| gen/train/entropy_loss | -0.691 |
| gen/train/explained_variance | 0.178 |
| gen/train/learning_rate | 0.0005 |
| gen/train/loss | 171 |
| gen/train/n_updates | 10 |
| gen/train/policy_gradient_loss | -7.06e-06 |
| gen/train/value_loss | 4.82e+03 |
---------------------------------------------------
-----------------------------------------------------
| raw/ | |
| gen/rollout/ep_len_mean | 500 |
| gen/rollout/ep_rew_mean | 35.4 |
| gen/rollout/ep_rew_wrapped_mean | -1.47e+03 |
| gen/time/fps | 3526 |
| gen/time/iterations | 1 |
| gen/time/time_elapsed | 4 |
| gen/time/total_timesteps | 49152 |
| gen/train/approx_kl | 0.0010964434 |
| gen/train/clip_fraction | 0.00289 |
| gen/train/clip_range | 0.1 |
| gen/train/entropy_loss | -0.691 |
| gen/train/explained_variance | 0.178 |
| gen/train/learning_rate | 0.0005 |
| gen/train/loss | 171 |
| gen/train/n_updates | 10 |
| gen/train/policy_gradient_loss | -7.06e-06 |
| gen/train/value_loss | 4.82e+03 |
-----------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.707 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.413 |
| disc/disc_entropy | 0.673 |
| disc/disc_loss | 0.633 |
| disc/disc_proportion_expert_pred | 0.793 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 3 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.711 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.422 |
| disc/disc_entropy | 0.673 |
| disc/disc_loss | 0.631 |
| disc/disc_proportion_expert_pred | 0.789 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 3 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.721 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.442 |
| disc/disc_entropy | 0.674 |
| disc/disc_loss | 0.63 |
| disc/disc_proportion_expert_pred | 0.779 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 3 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.719 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.438 |
| disc/disc_entropy | 0.673 |
| disc/disc_loss | 0.629 |
| disc/disc_proportion_expert_pred | 0.781 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 3 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.726 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.451 |
| disc/disc_entropy | 0.673 |
| disc/disc_loss | 0.626 |
| disc/disc_proportion_expert_pred | 0.774 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 3 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.729 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.458 |
| disc/disc_entropy | 0.674 |
| disc/disc_loss | 0.623 |
| disc/disc_proportion_expert_pred | 0.771 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 3 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.737 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.474 |
| disc/disc_entropy | 0.674 |
| disc/disc_loss | 0.62 |
| disc/disc_proportion_expert_pred | 0.763 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 3 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.749 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.497 |
| disc/disc_entropy | 0.675 |
| disc/disc_loss | 0.615 |
| disc/disc_proportion_expert_pred | 0.751 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 3 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.743 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.485 |
| disc/disc_entropy | 0.674 |
| disc/disc_loss | 0.618 |
| disc/disc_proportion_expert_pred | 0.757 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 3 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.74 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.479 |
| disc/disc_entropy | 0.675 |
| disc/disc_loss | 0.617 |
| disc/disc_proportion_expert_pred | 0.76 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 3 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.752 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.504 |
| disc/disc_entropy | 0.675 |
| disc/disc_loss | 0.611 |
| disc/disc_proportion_expert_pred | 0.748 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 3 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.764 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.528 |
| disc/disc_entropy | 0.674 |
| disc/disc_loss | 0.609 |
| disc/disc_proportion_expert_pred | 0.736 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 3 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.758 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.516 |
| disc/disc_entropy | 0.674 |
| disc/disc_loss | 0.61 |
| disc/disc_proportion_expert_pred | 0.742 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 3 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.759 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.519 |
| disc/disc_entropy | 0.675 |
| disc/disc_loss | 0.609 |
| disc/disc_proportion_expert_pred | 0.741 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 3 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.769 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.537 |
| disc/disc_entropy | 0.674 |
| disc/disc_loss | 0.604 |
| disc/disc_proportion_expert_pred | 0.731 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 3 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.778 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.555 |
| disc/disc_entropy | 0.674 |
| disc/disc_loss | 0.599 |
| disc/disc_proportion_expert_pred | 0.722 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 3 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
---------------------------------------------------
| mean/ | |
| disc/disc_acc | 0.741 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.482 |
| disc/disc_entropy | 0.674 |
| disc/disc_loss | 0.618 |
| disc/disc_proportion_expert_pred | 0.759 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 3 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
| gen/rollout/ep_len_mean | 500 |
| gen/rollout/ep_rew_mean | 35.4 |
| gen/rollout/ep_rew_wrapped_mean | -1.47e+03 |
| gen/time/fps | 3.53e+03 |
| gen/time/iterations | 1 |
| gen/time/time_elapsed | 4 |
| gen/time/total_timesteps | 4.92e+04 |
| gen/train/approx_kl | 0.00162 |
| gen/train/clip_fraction | 0.0488 |
| gen/train/clip_range | 0.1 |
| gen/train/entropy_loss | -0.691 |
| gen/train/explained_variance | 0.66 |
| gen/train/learning_rate | 0.0005 |
| gen/train/loss | 89.1 |
| gen/train/n_updates | 15 |
| gen/train/policy_gradient_loss | -0.00034 |
| gen/train/value_loss | 1.37e+03 |
---------------------------------------------------
-----------------------------------------------------
| raw/ | |
| gen/rollout/ep_len_mean | 500 |
| gen/rollout/ep_rew_mean | 38.2 |
| gen/rollout/ep_rew_wrapped_mean | -1.52e+03 |
| gen/time/fps | 3447 |
| gen/time/iterations | 1 |
| gen/time/time_elapsed | 4 |
| gen/time/total_timesteps | 65536 |
| gen/train/approx_kl | 0.0016218722 |
| gen/train/clip_fraction | 0.0488 |
| gen/train/clip_range | 0.1 |
| gen/train/entropy_loss | -0.691 |
| gen/train/explained_variance | 0.66 |
| gen/train/learning_rate | 0.0005 |
| gen/train/loss | 89.1 |
| gen/train/n_updates | 15 |
| gen/train/policy_gradient_loss | -0.00034 |
| gen/train/value_loss | 1.37e+03 |
-----------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.782 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.564 |
| disc/disc_entropy | 0.666 |
| disc/disc_loss | 0.62 |
| disc/disc_proportion_expert_pred | 0.718 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 4 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.799 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.599 |
| disc/disc_entropy | 0.667 |
| disc/disc_loss | 0.614 |
| disc/disc_proportion_expert_pred | 0.701 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 4 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.787 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.574 |
| disc/disc_entropy | 0.667 |
| disc/disc_loss | 0.616 |
| disc/disc_proportion_expert_pred | 0.713 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 4 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.789 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.577 |
| disc/disc_entropy | 0.669 |
| disc/disc_loss | 0.616 |
| disc/disc_proportion_expert_pred | 0.711 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 4 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.79 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.58 |
| disc/disc_entropy | 0.668 |
| disc/disc_loss | 0.612 |
| disc/disc_proportion_expert_pred | 0.71 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 4 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.812 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.623 |
| disc/disc_entropy | 0.669 |
| disc/disc_loss | 0.604 |
| disc/disc_proportion_expert_pred | 0.688 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 4 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.806 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.612 |
| disc/disc_entropy | 0.669 |
| disc/disc_loss | 0.6 |
| disc/disc_proportion_expert_pred | 0.694 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 4 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.798 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.597 |
| disc/disc_entropy | 0.671 |
| disc/disc_loss | 0.605 |
| disc/disc_proportion_expert_pred | 0.702 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 4 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.804 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.609 |
| disc/disc_entropy | 0.67 |
| disc/disc_loss | 0.6 |
| disc/disc_proportion_expert_pred | 0.696 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 4 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.818 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.636 |
| disc/disc_entropy | 0.67 |
| disc/disc_loss | 0.593 |
| disc/disc_proportion_expert_pred | 0.682 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 4 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.813 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.626 |
| disc/disc_entropy | 0.67 |
| disc/disc_loss | 0.593 |
| disc/disc_proportion_expert_pred | 0.687 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 4 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.826 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.651 |
| disc/disc_entropy | 0.67 |
| disc/disc_loss | 0.588 |
| disc/disc_proportion_expert_pred | 0.674 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 4 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.824 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.647 |
| disc/disc_entropy | 0.67 |
| disc/disc_loss | 0.585 |
| disc/disc_proportion_expert_pred | 0.676 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 4 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.83 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.66 |
| disc/disc_entropy | 0.669 |
| disc/disc_loss | 0.581 |
| disc/disc_proportion_expert_pred | 0.67 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 4 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.829 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.658 |
| disc/disc_entropy | 0.67 |
| disc/disc_loss | 0.583 |
| disc/disc_proportion_expert_pred | 0.671 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 4 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.834 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.668 |
| disc/disc_entropy | 0.67 |
| disc/disc_loss | 0.579 |
| disc/disc_proportion_expert_pred | 0.666 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 4 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
---------------------------------------------------
| mean/ | |
| disc/disc_acc | 0.809 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.618 |
| disc/disc_entropy | 0.669 |
| disc/disc_loss | 0.599 |
| disc/disc_proportion_expert_pred | 0.691 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 4 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
| gen/rollout/ep_len_mean | 500 |
| gen/rollout/ep_rew_mean | 38.2 |
| gen/rollout/ep_rew_wrapped_mean | -1.52e+03 |
| gen/time/fps | 3.45e+03 |
| gen/time/iterations | 1 |
| gen/time/time_elapsed | 4 |
| gen/time/total_timesteps | 6.55e+04 |
| gen/train/approx_kl | 0.00297 |
| gen/train/clip_fraction | 0.146 |
| gen/train/clip_range | 0.1 |
| gen/train/entropy_loss | -0.687 |
| gen/train/explained_variance | 0.877 |
| gen/train/learning_rate | 0.0005 |
| gen/train/loss | 4.76 |
| gen/train/n_updates | 20 |
| gen/train/policy_gradient_loss | -0.00277 |
| gen/train/value_loss | 266 |
---------------------------------------------------
-----------------------------------------------------
| raw/ | |
| gen/rollout/ep_len_mean | 500 |
| gen/rollout/ep_rew_mean | 40.5 |
| gen/rollout/ep_rew_wrapped_mean | -1.69e+03 |
| gen/time/fps | 3531 |
| gen/time/iterations | 1 |
| gen/time/time_elapsed | 4 |
| gen/time/total_timesteps | 81920 |
| gen/train/approx_kl | 0.0029702676 |
| gen/train/clip_fraction | 0.146 |
| gen/train/clip_range | 0.1 |
| gen/train/entropy_loss | -0.687 |
| gen/train/explained_variance | 0.877 |
| gen/train/learning_rate | 0.0005 |
| gen/train/loss | 4.76 |
| gen/train/n_updates | 20 |
| gen/train/policy_gradient_loss | -0.00277 |
| gen/train/value_loss | 266 |
-----------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.712 |
| disc/disc_acc_expert | 0.998 |
| disc/disc_acc_gen | 0.426 |
| disc/disc_entropy | 0.682 |
| disc/disc_loss | 0.646 |
| disc/disc_proportion_expert_pred | 0.786 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 5 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.716 |
| disc/disc_acc_expert | 0.996 |
| disc/disc_acc_gen | 0.435 |
| disc/disc_entropy | 0.683 |
| disc/disc_loss | 0.647 |
| disc/disc_proportion_expert_pred | 0.781 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 5 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.728 |
| disc/disc_acc_expert | 0.999 |
| disc/disc_acc_gen | 0.457 |
| disc/disc_entropy | 0.683 |
| disc/disc_loss | 0.644 |
| disc/disc_proportion_expert_pred | 0.771 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 5 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.749 |
| disc/disc_acc_expert | 1 |
| disc/disc_acc_gen | 0.498 |
| disc/disc_entropy | 0.683 |
| disc/disc_loss | 0.643 |
| disc/disc_proportion_expert_pred | 0.751 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 5 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.767 |
| disc/disc_acc_expert | 0.997 |
| disc/disc_acc_gen | 0.537 |
| disc/disc_entropy | 0.684 |
| disc/disc_loss | 0.637 |
| disc/disc_proportion_expert_pred | 0.73 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 5 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.797 |
| disc/disc_acc_expert | 0.997 |
| disc/disc_acc_gen | 0.597 |
| disc/disc_entropy | 0.683 |
| disc/disc_loss | 0.63 |
| disc/disc_proportion_expert_pred | 0.7 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 5 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.807 |
| disc/disc_acc_expert | 0.998 |
| disc/disc_acc_gen | 0.617 |
| disc/disc_entropy | 0.683 |
| disc/disc_loss | 0.63 |
| disc/disc_proportion_expert_pred | 0.691 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 5 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.844 |
| disc/disc_acc_expert | 0.998 |
| disc/disc_acc_gen | 0.69 |
| disc/disc_entropy | 0.683 |
| disc/disc_loss | 0.622 |
| disc/disc_proportion_expert_pred | 0.654 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 5 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.845 |
| disc/disc_acc_expert | 0.999 |
| disc/disc_acc_gen | 0.692 |
| disc/disc_entropy | 0.683 |
| disc/disc_loss | 0.619 |
| disc/disc_proportion_expert_pred | 0.653 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 5 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.863 |
| disc/disc_acc_expert | 0.998 |
| disc/disc_acc_gen | 0.729 |
| disc/disc_entropy | 0.682 |
| disc/disc_loss | 0.614 |
| disc/disc_proportion_expert_pred | 0.635 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 5 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.866 |
| disc/disc_acc_expert | 0.999 |
| disc/disc_acc_gen | 0.733 |
| disc/disc_entropy | 0.682 |
| disc/disc_loss | 0.611 |
| disc/disc_proportion_expert_pred | 0.633 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 5 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.866 |
| disc/disc_acc_expert | 0.999 |
| disc/disc_acc_gen | 0.733 |
| disc/disc_entropy | 0.682 |
| disc/disc_loss | 0.609 |
| disc/disc_proportion_expert_pred | 0.633 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 5 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.876 |
| disc/disc_acc_expert | 0.998 |
| disc/disc_acc_gen | 0.754 |
| disc/disc_entropy | 0.681 |
| disc/disc_loss | 0.605 |
| disc/disc_proportion_expert_pred | 0.622 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 5 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.891 |
| disc/disc_acc_expert | 0.999 |
| disc/disc_acc_gen | 0.784 |
| disc/disc_entropy | 0.68 |
| disc/disc_loss | 0.599 |
| disc/disc_proportion_expert_pred | 0.608 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 5 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.893 |
| disc/disc_acc_expert | 0.998 |
| disc/disc_acc_gen | 0.788 |
| disc/disc_entropy | 0.68 |
| disc/disc_loss | 0.598 |
| disc/disc_proportion_expert_pred | 0.605 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 5 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.893 |
| disc/disc_acc_expert | 0.997 |
| disc/disc_acc_gen | 0.788 |
| disc/disc_entropy | 0.68 |
| disc/disc_loss | 0.597 |
| disc/disc_proportion_expert_pred | 0.604 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 5 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
---------------------------------------------------
| mean/ | |
| disc/disc_acc | 0.82 |
| disc/disc_acc_expert | 0.998 |
| disc/disc_acc_gen | 0.641 |
| disc/disc_entropy | 0.682 |
| disc/disc_loss | 0.622 |
| disc/disc_proportion_expert_pred | 0.678 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 5 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
| gen/rollout/ep_len_mean | 500 |
| gen/rollout/ep_rew_mean | 40.5 |
| gen/rollout/ep_rew_wrapped_mean | -1.69e+03 |
| gen/time/fps | 3.53e+03 |
| gen/time/iterations | 1 |
| gen/time/time_elapsed | 4 |
| gen/time/total_timesteps | 8.19e+04 |
| gen/train/approx_kl | 0.00237 |
| gen/train/clip_fraction | 0.136 |
| gen/train/clip_range | 0.1 |
| gen/train/entropy_loss | -0.686 |
| gen/train/explained_variance | 0.799 |
| gen/train/learning_rate | 0.0005 |
| gen/train/loss | 12.1 |
| gen/train/n_updates | 25 |
| gen/train/policy_gradient_loss | -0.00326 |
| gen/train/value_loss | 37.5 |
---------------------------------------------------
---------------------------------------------------
| raw/ | |
| gen/rollout/ep_len_mean | 500 |
| gen/rollout/ep_rew_mean | 43.8 |
| gen/rollout/ep_rew_wrapped_mean | -1.38e+03 |
| gen/time/fps | 3527 |
| gen/time/iterations | 1 |
| gen/time/time_elapsed | 4 |
| gen/time/total_timesteps | 98304 |
| gen/train/approx_kl | 0.00236941 |
| gen/train/clip_fraction | 0.136 |
| gen/train/clip_range | 0.1 |
| gen/train/entropy_loss | -0.686 |
| gen/train/explained_variance | 0.799 |
| gen/train/learning_rate | 0.0005 |
| gen/train/loss | 12.1 |
| gen/train/n_updates | 25 |
| gen/train/policy_gradient_loss | -0.00326 |
| gen/train/value_loss | 37.5 |
---------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.955 |
| disc/disc_acc_expert | 0.94 |
| disc/disc_acc_gen | 0.971 |
| disc/disc_entropy | 0.646 |
| disc/disc_loss | 0.515 |
| disc/disc_proportion_expert_pred | 0.485 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 6 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.952 |
| disc/disc_acc_expert | 0.935 |
| disc/disc_acc_gen | 0.97 |
| disc/disc_entropy | 0.645 |
| disc/disc_loss | 0.514 |
| disc/disc_proportion_expert_pred | 0.483 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 6 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.942 |
| disc/disc_acc_expert | 0.917 |
| disc/disc_acc_gen | 0.967 |
| disc/disc_entropy | 0.646 |
| disc/disc_loss | 0.515 |
| disc/disc_proportion_expert_pred | 0.475 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 6 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.936 |
| disc/disc_acc_expert | 0.908 |
| disc/disc_acc_gen | 0.964 |
| disc/disc_entropy | 0.644 |
| disc/disc_loss | 0.513 |
| disc/disc_proportion_expert_pred | 0.472 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 6 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.935 |
| disc/disc_acc_expert | 0.906 |
| disc/disc_acc_gen | 0.965 |
| disc/disc_entropy | 0.645 |
| disc/disc_loss | 0.513 |
| disc/disc_proportion_expert_pred | 0.47 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 6 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.954 |
| disc/disc_acc_expert | 0.936 |
| disc/disc_acc_gen | 0.971 |
| disc/disc_entropy | 0.643 |
| disc/disc_loss | 0.509 |
| disc/disc_proportion_expert_pred | 0.482 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 6 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.948 |
| disc/disc_acc_expert | 0.93 |
| disc/disc_acc_gen | 0.966 |
| disc/disc_entropy | 0.642 |
| disc/disc_loss | 0.508 |
| disc/disc_proportion_expert_pred | 0.482 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 6 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.956 |
| disc/disc_acc_expert | 0.944 |
| disc/disc_acc_gen | 0.968 |
| disc/disc_entropy | 0.642 |
| disc/disc_loss | 0.506 |
| disc/disc_proportion_expert_pred | 0.488 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 6 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.958 |
| disc/disc_acc_expert | 0.957 |
| disc/disc_acc_gen | 0.958 |
| disc/disc_entropy | 0.639 |
| disc/disc_loss | 0.503 |
| disc/disc_proportion_expert_pred | 0.499 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 6 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.963 |
| disc/disc_acc_expert | 0.959 |
| disc/disc_acc_gen | 0.967 |
| disc/disc_entropy | 0.639 |
| disc/disc_loss | 0.501 |
| disc/disc_proportion_expert_pred | 0.496 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 6 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.963 |
| disc/disc_acc_expert | 0.965 |
| disc/disc_acc_gen | 0.961 |
| disc/disc_entropy | 0.639 |
| disc/disc_loss | 0.5 |
| disc/disc_proportion_expert_pred | 0.502 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 6 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.97 |
| disc/disc_acc_expert | 0.973 |
| disc/disc_acc_gen | 0.968 |
| disc/disc_entropy | 0.636 |
| disc/disc_loss | 0.495 |
| disc/disc_proportion_expert_pred | 0.502 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 6 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.973 |
| disc/disc_acc_expert | 0.979 |
| disc/disc_acc_gen | 0.966 |
| disc/disc_entropy | 0.637 |
| disc/disc_loss | 0.497 |
| disc/disc_proportion_expert_pred | 0.507 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 6 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.97 |
| disc/disc_acc_expert | 0.971 |
| disc/disc_acc_gen | 0.969 |
| disc/disc_entropy | 0.634 |
| disc/disc_loss | 0.493 |
| disc/disc_proportion_expert_pred | 0.501 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 6 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.978 |
| disc/disc_acc_expert | 0.988 |
| disc/disc_acc_gen | 0.968 |
| disc/disc_entropy | 0.635 |
| disc/disc_loss | 0.494 |
| disc/disc_proportion_expert_pred | 0.51 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 6 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
--------------------------------------------------
| raw/ | |
| disc/disc_acc | 0.976 |
| disc/disc_acc_expert | 0.984 |
| disc/disc_acc_gen | 0.967 |
| disc/disc_entropy | 0.634 |
| disc/disc_loss | 0.49 |
| disc/disc_proportion_expert_pred | 0.509 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 6 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
--------------------------------------------------
---------------------------------------------------
| mean/ | |
| disc/disc_acc | 0.958 |
| disc/disc_acc_expert | 0.95 |
| disc/disc_acc_gen | 0.967 |
| disc/disc_entropy | 0.64 |
| disc/disc_loss | 0.504 |
| disc/disc_proportion_expert_pred | 0.491 |
| disc/disc_proportion_expert_true | 0.5 |
| disc/global_step | 6 |
| disc/n_expert | 2.05e+03 |
| disc/n_generated | 2.05e+03 |
| gen/rollout/ep_len_mean | 500 |
| gen/rollout/ep_rew_mean | 43.8 |
| gen/rollout/ep_rew_wrapped_mean | -1.38e+03 |
| gen/time/fps | 3.53e+03 |
| gen/time/iterations | 1 |
| gen/time/time_elapsed | 4 |
| gen/time/total_timesteps | 9.83e+04 |
| gen/train/approx_kl | 0.002 |
| gen/train/clip_fraction | 0.0806 |
| gen/train/clip_range | 0.1 |
| gen/train/entropy_loss | -0.684 |
| gen/train/explained_variance | 0.47 |
| gen/train/learning_rate | 0.0005 |
| gen/train/loss | 2.66 |
| gen/train/n_updates | 30 |
| gen/train/policy_gradient_loss | -0.00117 |
| gen/train/value_loss | 94.9 |
---------------------------------------------------
We can see that an untrained policy performs poorly, while AIRL brings an improvement. To make it match the expert performance (500), set the flag FAST
to False
in the first cell.
print(
"Rewards before training:",
np.mean(learner_rewards_before_training),
"+/-",
np.std(learner_rewards_before_training),
)
print(
"Rewards after training:",
np.mean(learner_rewards_after_training),
"+/-",
np.std(learner_rewards_after_training),
)
Rewards before training: 102.6 +/- 24.11514047232568
Rewards after training: 43.02 +/- 3.4379645140693347