diff --git "a/sf_log.txt" "b/sf_log.txt"
--- "a/sf_log.txt"
+++ "b/sf_log.txt"
@@ -6454,3 +6454,1482 @@ main_loop: 33.2571
 [2024-11-07 14:56:52,732][04584] Avg episode rewards: #0: 4.588, true rewards: #0: 4.088
 [2024-11-07 14:56:52,736][04584] Avg episode reward: 4.588, avg true_objective: 4.088
 [2024-11-07 14:57:02,444][04584] Replay video saved to /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/replay.mp4!
+[2024-11-07 14:57:10,868][04584] The model has been pushed to https://huggingface.co/alidenewade/rl_course_vizdoom_health_gathering_supreme
+[2024-11-07 14:59:42,796][04584] Environment doom_basic already registered, overwriting...
+[2024-11-07 14:59:42,798][04584] Environment doom_two_colors_easy already registered, overwriting...
+[2024-11-07 14:59:42,800][04584] Environment doom_two_colors_hard already registered, overwriting...
+[2024-11-07 14:59:42,802][04584] Environment doom_dm already registered, overwriting...
+[2024-11-07 14:59:42,803][04584] Environment doom_dwango5 already registered, overwriting...
+[2024-11-07 14:59:42,804][04584] Environment doom_my_way_home_flat_actions already registered, overwriting...
+[2024-11-07 14:59:42,805][04584] Environment doom_defend_the_center_flat_actions already registered, overwriting...
+[2024-11-07 14:59:42,806][04584] Environment doom_my_way_home already registered, overwriting...
+[2024-11-07 14:59:42,808][04584] Environment doom_deadly_corridor already registered, overwriting...
+[2024-11-07 14:59:42,809][04584] Environment doom_defend_the_center already registered, overwriting...
+[2024-11-07 14:59:42,813][04584] Environment doom_defend_the_line already registered, overwriting...
+[2024-11-07 14:59:42,814][04584] Environment doom_health_gathering already registered, overwriting...
+[2024-11-07 14:59:42,815][04584] Environment doom_health_gathering_supreme already registered, overwriting...
+[2024-11-07 14:59:42,817][04584] Environment doom_battle already registered, overwriting...
+[2024-11-07 14:59:42,820][04584] Environment doom_battle2 already registered, overwriting...
+[2024-11-07 14:59:42,822][04584] Environment doom_duel_bots already registered, overwriting...
+[2024-11-07 14:59:42,825][04584] Environment doom_deathmatch_bots already registered, overwriting...
+[2024-11-07 14:59:42,828][04584] Environment doom_duel already registered, overwriting...
+[2024-11-07 14:59:42,829][04584] Environment doom_deathmatch_full already registered, overwriting...
+[2024-11-07 14:59:42,831][04584] Environment doom_benchmark already registered, overwriting...
+[2024-11-07 14:59:42,833][04584] register_encoder_factory: <function make_vizdoom_encoder at 0x7f3d19f46950>
+[2024-11-07 15:01:10,944][04584] Environment doom_basic already registered, overwriting...
+[2024-11-07 15:01:10,947][04584] Environment doom_two_colors_easy already registered, overwriting...
+[2024-11-07 15:01:10,949][04584] Environment doom_two_colors_hard already registered, overwriting...
+[2024-11-07 15:01:10,950][04584] Environment doom_dm already registered, overwriting...
+[2024-11-07 15:01:10,951][04584] Environment doom_dwango5 already registered, overwriting...
+[2024-11-07 15:01:10,953][04584] Environment doom_my_way_home_flat_actions already registered, overwriting...
+[2024-11-07 15:01:10,954][04584] Environment doom_defend_the_center_flat_actions already registered, overwriting...
+[2024-11-07 15:01:10,956][04584] Environment doom_my_way_home already registered, overwriting...
+[2024-11-07 15:01:10,958][04584] Environment doom_deadly_corridor already registered, overwriting...
+[2024-11-07 15:01:10,960][04584] Environment doom_defend_the_center already registered, overwriting...
+[2024-11-07 15:01:10,962][04584] Environment doom_defend_the_line already registered, overwriting...
+[2024-11-07 15:01:10,963][04584] Environment doom_health_gathering already registered, overwriting...
+[2024-11-07 15:01:10,965][04584] Environment doom_health_gathering_supreme already registered, overwriting...
+[2024-11-07 15:01:10,967][04584] Environment doom_battle already registered, overwriting...
+[2024-11-07 15:01:10,969][04584] Environment doom_battle2 already registered, overwriting...
+[2024-11-07 15:01:10,971][04584] Environment doom_duel_bots already registered, overwriting...
+[2024-11-07 15:01:10,974][04584] Environment doom_deathmatch_bots already registered, overwriting...
+[2024-11-07 15:01:10,975][04584] Environment doom_duel already registered, overwriting...
+[2024-11-07 15:01:10,976][04584] Environment doom_deathmatch_full already registered, overwriting...
+[2024-11-07 15:01:10,979][04584] Environment doom_benchmark already registered, overwriting...
+[2024-11-07 15:01:10,983][04584] register_encoder_factory: <function make_vizdoom_encoder at 0x7f3d19f46950>
+[2024-11-07 15:01:11,005][04584] Loading existing experiment configuration from /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/config.json
+[2024-11-07 15:01:11,008][04584] Overriding arg 'num_workers' with value 10 passed from command line
+[2024-11-07 15:01:11,010][04584] Overriding arg 'num_envs_per_worker' with value 6 passed from command line
+[2024-11-07 15:01:11,011][04584] Overriding arg 'train_for_env_steps' with value 16000000 passed from command line
+[2024-11-07 15:01:11,021][04584] Experiment dir /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment already exists!
+[2024-11-07 15:01:11,022][04584] Resuming existing experiment from /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment...
+[2024-11-07 15:01:11,024][04584] Weights and Biases integration disabled
+[2024-11-07 15:01:11,027][04584] Environment var CUDA_VISIBLE_DEVICES is 0
+
+[2024-11-07 15:01:16,848][04584] Starting experiment with the following configuration:
+help=False
+algo=APPO
+env=doom_health_gathering_supreme
+experiment=default_experiment
+train_dir=/root/hfRL/ml/LunarLander-v2/train_dir
+restart_behavior=resume
+device=gpu
+seed=None
+num_policies=1
+async_rl=True
+serial_mode=False
+batched_sampling=False
+num_batches_to_accumulate=2
+worker_num_splits=2
+policy_workers_per_policy=1
+max_policy_lag=1000
+num_workers=10
+num_envs_per_worker=6
+batch_size=1024
+num_batches_per_epoch=1
+num_epochs=1
+rollout=32
+recurrence=32
+shuffle_minibatches=False
+gamma=0.99
+reward_scale=1.0
+reward_clip=1000.0
+value_bootstrap=False
+normalize_returns=True
+exploration_loss_coeff=0.001
+value_loss_coeff=0.5
+kl_loss_coeff=0.0
+exploration_loss=symmetric_kl
+gae_lambda=0.95
+ppo_clip_ratio=0.1
+ppo_clip_value=0.2
+with_vtrace=False
+vtrace_rho=1.0
+vtrace_c=1.0
+optimizer=adam
+adam_eps=1e-06
+adam_beta1=0.9
+adam_beta2=0.999
+max_grad_norm=4.0
+learning_rate=0.0001
+lr_schedule=constant
+lr_schedule_kl_threshold=0.008
+lr_adaptive_min=1e-06
+lr_adaptive_max=0.01
+obs_subtract_mean=0.0
+obs_scale=255.0
+normalize_input=True
+normalize_input_keys=None
+decorrelate_experience_max_seconds=0
+decorrelate_envs_on_one_worker=True
+actor_worker_gpus=[]
+set_workers_cpu_affinity=True
+force_envs_single_thread=False
+default_niceness=0
+log_to_file=True
+experiment_summaries_interval=10
+flush_summaries_interval=30
+stats_avg=100
+summaries_use_frameskip=True
+heartbeat_interval=20
+heartbeat_reporting_interval=600
+train_for_env_steps=16000000
+train_for_seconds=10000000000
+save_every_sec=120
+keep_checkpoints=2
+load_checkpoint_kind=latest
+save_milestones_sec=-1
+save_best_every_sec=5
+save_best_metric=reward
+save_best_after=100000
+benchmark=False
+encoder_mlp_layers=[512, 512]
+encoder_conv_architecture=convnet_simple
+encoder_conv_mlp_layers=[512]
+use_rnn=True
+rnn_size=512
+rnn_type=gru
+rnn_num_layers=1
+decoder_mlp_layers=[]
+nonlinearity=elu
+policy_initialization=orthogonal
+policy_init_gain=1.0
+actor_critic_share_weights=True
+adaptive_stddev=True
+continuous_tanh_scale=0.0
+initial_stddev=1.0
+use_env_info_cache=False
+env_gpu_actions=False
+env_gpu_observations=True
+env_frameskip=4
+env_framestack=1
+pixel_format=CHW
+use_record_episode_statistics=False
+with_wandb=False
+wandb_user=None
+wandb_project=sample_factory
+wandb_group=None
+wandb_job_type=SF
+wandb_tags=[]
+with_pbt=False
+pbt_mix_policies_in_one_env=True
+pbt_period_env_steps=5000000
+pbt_start_mutation=20000000
+pbt_replace_fraction=0.3
+pbt_mutation_rate=0.15
+pbt_replace_reward_gap=0.1
+pbt_replace_reward_gap_absolute=1e-06
+pbt_optimize_gamma=False
+pbt_target_objective=true_objective
+pbt_perturb_min=1.1
+pbt_perturb_max=1.5
+num_agents=-1
+num_humans=0
+num_bots=-1
+start_bot_difficulty=None
+timelimit=None
+res_w=128
+res_h=72
+wide_aspect_ratio=False
+eval_env_frameskip=1
+fps=35
+command_line=--env=doom_health_gathering_supreme --num_workers=8 --num_envs_per_worker=4 --train_for_env_steps=4000000
+cli_args={'env': 'doom_health_gathering_supreme', 'num_workers': 8, 'num_envs_per_worker': 4, 'train_for_env_steps': 4000000}
+git_hash=unknown
+git_repo_name=not a git repository
+[2024-11-07 15:01:16,849][04584] Saving configuration to /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/config.json...
+[2024-11-07 15:01:16,851][04584] Rollout worker 0 uses device cpu
+[2024-11-07 15:01:16,852][04584] Rollout worker 1 uses device cpu
+[2024-11-07 15:01:16,854][04584] Rollout worker 2 uses device cpu
+[2024-11-07 15:01:16,855][04584] Rollout worker 3 uses device cpu
+[2024-11-07 15:01:16,857][04584] Rollout worker 4 uses device cpu
+[2024-11-07 15:01:16,859][04584] Rollout worker 5 uses device cpu
+[2024-11-07 15:01:16,862][04584] Rollout worker 6 uses device cpu
+[2024-11-07 15:01:16,863][04584] Rollout worker 7 uses device cpu
+[2024-11-07 15:01:16,866][04584] Rollout worker 8 uses device cpu
+[2024-11-07 15:01:16,868][04584] Rollout worker 9 uses device cpu
+[2024-11-07 15:01:17,011][04584] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2024-11-07 15:01:17,012][04584] InferenceWorker_p0-w0: min num requests: 3
+[2024-11-07 15:01:17,055][04584] Starting all processes...
+[2024-11-07 15:01:17,056][04584] Starting process learner_proc0
+[2024-11-07 15:01:17,097][04584] Starting all processes...
+[2024-11-07 15:01:17,104][04584] Starting process inference_proc0-0
+[2024-11-07 15:01:17,106][04584] Starting process rollout_proc0
+[2024-11-07 15:01:17,106][04584] Starting process rollout_proc1
+[2024-11-07 15:01:17,106][04584] Starting process rollout_proc2
+[2024-11-07 15:01:17,107][04584] Starting process rollout_proc3
+[2024-11-07 15:01:17,109][04584] Starting process rollout_proc4
+[2024-11-07 15:01:17,109][04584] Starting process rollout_proc5
+[2024-11-07 15:01:17,112][04584] Starting process rollout_proc6
+[2024-11-07 15:01:17,113][04584] Starting process rollout_proc7
+[2024-11-07 15:01:17,114][04584] Starting process rollout_proc8
+[2024-11-07 15:01:17,125][04584] Starting process rollout_proc9
+[2024-11-07 15:01:25,913][09025] Worker 0 uses CPU cores [0]
+[2024-11-07 15:01:25,954][09009] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2024-11-07 15:01:25,954][09009] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0
+[2024-11-07 15:01:26,005][09009] Num visible devices: 1
+[2024-11-07 15:01:26,051][09009] Starting seed is not provided
+[2024-11-07 15:01:26,051][09009] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2024-11-07 15:01:26,051][09009] Initializing actor-critic model on device cuda:0
+[2024-11-07 15:01:26,052][09009] RunningMeanStd input shape: (3, 72, 128)
+[2024-11-07 15:01:26,053][09009] RunningMeanStd input shape: (1,)
+[2024-11-07 15:01:26,083][09009] ConvEncoder: input_channels=3
+[2024-11-07 15:01:26,334][09028] Worker 5 uses CPU cores [5]
+[2024-11-07 15:01:26,676][09009] Conv encoder output size: 512
+[2024-11-07 15:01:26,677][09009] Policy head output size: 512
+[2024-11-07 15:01:26,705][09009] Created Actor Critic model with architecture:
+[2024-11-07 15:01:26,706][09009] ActorCriticSharedWeights(
+  (obs_normalizer): ObservationNormalizer(
+    (running_mean_std): RunningMeanStdDictInPlace(
+      (running_mean_std): ModuleDict(
+        (obs): RunningMeanStdInPlace()
+      )
+    )
+  )
+  (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace)
+  (encoder): VizdoomEncoder(
+    (basic_encoder): ConvEncoder(
+      (enc): RecursiveScriptModule(
+        original_name=ConvEncoderImpl
+        (conv_head): RecursiveScriptModule(
+          original_name=Sequential
+          (0): RecursiveScriptModule(original_name=Conv2d)
+          (1): RecursiveScriptModule(original_name=ELU)
+          (2): RecursiveScriptModule(original_name=Conv2d)
+          (3): RecursiveScriptModule(original_name=ELU)
+          (4): RecursiveScriptModule(original_name=Conv2d)
+          (5): RecursiveScriptModule(original_name=ELU)
+        )
+        (mlp_layers): RecursiveScriptModule(
+          original_name=Sequential
+          (0): RecursiveScriptModule(original_name=Linear)
+          (1): RecursiveScriptModule(original_name=ELU)
+        )
+      )
+    )
+  )
+  (core): ModelCoreRNN(
+    (core): GRU(512, 512)
+  )
+  (decoder): MlpDecoder(
+    (mlp): Identity()
+  )
+  (critic_linear): Linear(in_features=512, out_features=1, bias=True)
+  (action_parameterization): ActionParameterizationDefault(
+    (distribution_linear): Linear(in_features=512, out_features=5, bias=True)
+  )
+)
+[2024-11-07 15:01:26,934][09037] Worker 6 uses CPU cores [6]
+[2024-11-07 15:01:27,144][09029] Worker 1 uses CPU cores [1]
+[2024-11-07 15:01:27,256][09024] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2024-11-07 15:01:27,257][09024] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0
+[2024-11-07 15:01:27,288][09038] Worker 7 uses CPU cores [0, 1, 2, 3, 4, 5, 6]
+[2024-11-07 15:01:27,300][09024] Num visible devices: 1
+[2024-11-07 15:01:27,315][09009] Using optimizer <class 'torch.optim.adam.Adam'>
+[2024-11-07 15:01:27,485][09026] Worker 3 uses CPU cores [3]
+[2024-11-07 15:01:27,594][09027] Worker 2 uses CPU cores [2]
+[2024-11-07 15:01:27,608][09040] Worker 8 uses CPU cores [0, 1, 2, 3, 4, 5, 6]
+[2024-11-07 15:01:27,695][09039] Worker 9 uses CPU cores [0, 1, 2, 3, 4, 5, 6]
+[2024-11-07 15:01:27,770][09030] Worker 4 uses CPU cores [4]
+[2024-11-07 15:01:28,594][09009] Loading state from checkpoint /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000001957_8015872.pth...
+[2024-11-07 15:01:28,657][09009] Loading model from checkpoint
+[2024-11-07 15:01:28,659][09009] Loaded experiment state at self.train_step=1957, self.env_steps=8015872
+[2024-11-07 15:01:28,659][09009] Initialized policy 0 weights for model version 1957
+[2024-11-07 15:01:28,667][09009] LearnerWorker_p0 finished initialization!
+[2024-11-07 15:01:28,667][09009] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2024-11-07 15:01:28,872][09024] RunningMeanStd input shape: (3, 72, 128)
+[2024-11-07 15:01:28,873][09024] RunningMeanStd input shape: (1,)
+[2024-11-07 15:01:28,885][09024] ConvEncoder: input_channels=3
+[2024-11-07 15:01:28,989][09024] Conv encoder output size: 512
+[2024-11-07 15:01:28,990][09024] Policy head output size: 512
+[2024-11-07 15:01:29,034][04584] Inference worker 0-0 is ready!
+[2024-11-07 15:01:29,035][04584] All inference workers are ready! Signal rollout workers to start!
+[2024-11-07 15:01:29,114][09030] Doom resolution: 160x120, resize resolution: (128, 72)
+[2024-11-07 15:01:29,123][09028] Doom resolution: 160x120, resize resolution: (128, 72)
+[2024-11-07 15:01:29,124][09029] Doom resolution: 160x120, resize resolution: (128, 72)
+[2024-11-07 15:01:29,146][09037] Doom resolution: 160x120, resize resolution: (128, 72)
+[2024-11-07 15:01:29,171][09027] Doom resolution: 160x120, resize resolution: (128, 72)
+[2024-11-07 15:01:29,179][09026] Doom resolution: 160x120, resize resolution: (128, 72)
+[2024-11-07 15:01:29,195][09038] Doom resolution: 160x120, resize resolution: (128, 72)
+[2024-11-07 15:01:29,196][09040] Doom resolution: 160x120, resize resolution: (128, 72)
+[2024-11-07 15:01:29,201][09039] Doom resolution: 160x120, resize resolution: (128, 72)
+[2024-11-07 15:01:29,234][09025] Doom resolution: 160x120, resize resolution: (128, 72)
+[2024-11-07 15:01:29,686][09030] Decorrelating experience for 0 frames...
+[2024-11-07 15:01:29,772][09028] Decorrelating experience for 0 frames...
+[2024-11-07 15:01:29,822][09026] Decorrelating experience for 0 frames...
+[2024-11-07 15:01:29,854][09029] Decorrelating experience for 0 frames...
+[2024-11-07 15:01:29,856][09039] Decorrelating experience for 0 frames...
+[2024-11-07 15:01:29,892][09025] Decorrelating experience for 0 frames...
+[2024-11-07 15:01:30,044][09037] Decorrelating experience for 0 frames...
+[2024-11-07 15:01:30,158][09028] Decorrelating experience for 32 frames...
+[2024-11-07 15:01:30,194][09029] Decorrelating experience for 32 frames...
+[2024-11-07 15:01:30,250][09025] Decorrelating experience for 32 frames...
+[2024-11-07 15:01:30,266][09026] Decorrelating experience for 32 frames...
+[2024-11-07 15:01:30,378][09030] Decorrelating experience for 32 frames...
+[2024-11-07 15:01:30,568][09029] Decorrelating experience for 64 frames...
+[2024-11-07 15:01:30,574][09038] Decorrelating experience for 0 frames...
+[2024-11-07 15:01:30,755][09037] Decorrelating experience for 32 frames...
+[2024-11-07 15:01:30,762][09025] Decorrelating experience for 64 frames...
+[2024-11-07 15:01:30,781][09028] Decorrelating experience for 64 frames...
+[2024-11-07 15:01:30,817][09040] Decorrelating experience for 0 frames...
+[2024-11-07 15:01:31,015][09038] Decorrelating experience for 32 frames...
+[2024-11-07 15:01:31,028][04584] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 8015872. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2024-11-07 15:01:31,099][09029] Decorrelating experience for 96 frames...
+[2024-11-07 15:01:31,165][09037] Decorrelating experience for 64 frames...
+[2024-11-07 15:01:31,187][09039] Decorrelating experience for 32 frames...
+[2024-11-07 15:01:31,232][09027] Decorrelating experience for 0 frames...
+[2024-11-07 15:01:31,277][09040] Decorrelating experience for 32 frames...
+[2024-11-07 15:01:31,623][09025] Decorrelating experience for 96 frames...
+[2024-11-07 15:01:31,645][09030] Decorrelating experience for 64 frames...
+[2024-11-07 15:01:31,670][09027] Decorrelating experience for 32 frames...
+[2024-11-07 15:01:31,724][09038] Decorrelating experience for 64 frames...
+[2024-11-07 15:01:31,779][09039] Decorrelating experience for 64 frames...
+[2024-11-07 15:01:31,820][09037] Decorrelating experience for 96 frames...
+[2024-11-07 15:01:32,124][09028] Decorrelating experience for 96 frames...
+[2024-11-07 15:01:32,158][09040] Decorrelating experience for 64 frames...
+[2024-11-07 15:01:32,236][09025] Decorrelating experience for 128 frames...
+[2024-11-07 15:01:32,377][09037] Decorrelating experience for 128 frames...
+[2024-11-07 15:01:32,378][09039] Decorrelating experience for 96 frames...
+[2024-11-07 15:01:32,382][09027] Decorrelating experience for 64 frames...
+[2024-11-07 15:01:32,595][09029] Decorrelating experience for 128 frames...
+[2024-11-07 15:01:32,641][09038] Decorrelating experience for 96 frames...
+[2024-11-07 15:01:32,642][09028] Decorrelating experience for 128 frames...
+[2024-11-07 15:01:32,801][09025] Decorrelating experience for 160 frames...
+[2024-11-07 15:01:32,914][09037] Decorrelating experience for 160 frames...
+[2024-11-07 15:01:33,079][09039] Decorrelating experience for 128 frames...
+[2024-11-07 15:01:33,111][09029] Decorrelating experience for 160 frames...
+[2024-11-07 15:01:33,115][09040] Decorrelating experience for 96 frames...
+[2024-11-07 15:01:33,257][09030] Decorrelating experience for 96 frames...
+[2024-11-07 15:01:33,332][09028] Decorrelating experience for 160 frames...
+[2024-11-07 15:01:33,679][09027] Decorrelating experience for 96 frames...
+[2024-11-07 15:01:33,725][09038] Decorrelating experience for 128 frames...
+[2024-11-07 15:01:34,031][09040] Decorrelating experience for 128 frames...
+[2024-11-07 15:01:34,034][09026] Decorrelating experience for 64 frames...
+[2024-11-07 15:01:34,045][09039] Decorrelating experience for 160 frames...
+[2024-11-07 15:01:34,590][09027] Decorrelating experience for 128 frames...
+[2024-11-07 15:01:34,691][09038] Decorrelating experience for 160 frames...
+[2024-11-07 15:01:34,869][09030] Decorrelating experience for 128 frames...
+[2024-11-07 15:01:35,419][09026] Decorrelating experience for 96 frames...
+[2024-11-07 15:01:35,430][09027] Decorrelating experience for 160 frames...
+[2024-11-07 15:01:35,434][09040] Decorrelating experience for 160 frames...
+[2024-11-07 15:01:36,027][04584] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 8015872. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2024-11-07 15:01:36,611][09026] Decorrelating experience for 128 frames...
+[2024-11-07 15:01:36,611][09030] Decorrelating experience for 160 frames...
+[2024-11-07 15:01:37,001][04584] Heartbeat connected on Batcher_0
+[2024-11-07 15:01:37,006][04584] Heartbeat connected on LearnerWorker_p0
+[2024-11-07 15:01:37,020][04584] Heartbeat connected on RolloutWorker_w0
+[2024-11-07 15:01:37,022][04584] Heartbeat connected on RolloutWorker_w1
+[2024-11-07 15:01:37,030][04584] Heartbeat connected on RolloutWorker_w2
+[2024-11-07 15:01:37,037][04584] Heartbeat connected on RolloutWorker_w4
+[2024-11-07 15:01:37,040][04584] Heartbeat connected on RolloutWorker_w5
+[2024-11-07 15:01:37,047][04584] Heartbeat connected on RolloutWorker_w7
+[2024-11-07 15:01:37,050][04584] Heartbeat connected on RolloutWorker_w8
+[2024-11-07 15:01:37,054][04584] Heartbeat connected on RolloutWorker_w6
+[2024-11-07 15:01:37,056][04584] Heartbeat connected on RolloutWorker_w9
+[2024-11-07 15:01:37,083][04584] Heartbeat connected on InferenceWorker_p0-w0
+[2024-11-07 15:01:37,535][09026] Decorrelating experience for 160 frames...
+[2024-11-07 15:01:37,748][04584] Heartbeat connected on RolloutWorker_w3
+[2024-11-07 15:01:39,624][09009] Signal inference workers to stop experience collection...
+[2024-11-07 15:01:39,636][09024] InferenceWorker_p0-w0: stopping experience collection
+[2024-11-07 15:01:41,028][04584] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 8015872. Throughput: 0: 266.1. Samples: 2661. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2024-11-07 15:01:41,030][04584] Avg episode reward: [(0, '1.997')]
+[2024-11-07 15:01:46,027][04584] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 8015872. Throughput: 0: 322.8. Samples: 4842. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2024-11-07 15:01:46,029][04584] Avg episode reward: [(0, '1.997')]
+[2024-11-07 15:01:49,711][09009] Signal inference workers to resume experience collection...
+[2024-11-07 15:01:49,726][09024] InferenceWorker_p0-w0: resuming experience collection
+[2024-11-07 15:01:51,029][04584] Fps is (10 sec: 1638.4, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 8032256. Throughput: 0: 248.1. Samples: 4962. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
+[2024-11-07 15:01:51,034][04584] Avg episode reward: [(0, '2.057')]
+[2024-11-07 15:01:56,244][04584] Fps is (10 sec: 3207.3, 60 sec: 1299.5, 300 sec: 1299.5). Total num frames: 8048640. Throughput: 0: 315.9. Samples: 7965. Policy #0 lag: (min: 0.0, avg: 1.5, max: 5.0)
+[2024-11-07 15:01:56,253][04584] Avg episode reward: [(0, '3.230')]
+[2024-11-07 15:01:58,030][09024] Updated weights for policy 0, policy_version 1967 (0.0072)
+[2024-11-07 15:02:01,029][04584] Fps is (10 sec: 3276.4, 60 sec: 1638.3, 300 sec: 1638.3). Total num frames: 8065024. Throughput: 0: 400.0. Samples: 12000. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:02:01,049][04584] Avg episode reward: [(0, '4.097')]
+[2024-11-07 15:02:06,035][04584] Fps is (10 sec: 2928.5, 60 sec: 1755.1, 300 sec: 1755.1). Total num frames: 8077312. Throughput: 0: 478.1. Samples: 16737. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0)
+[2024-11-07 15:02:06,038][04584] Avg episode reward: [(0, '4.522')]
+[2024-11-07 15:02:09,709][09024] Updated weights for policy 0, policy_version 1977 (0.0075)
+[2024-11-07 15:02:11,028][04584] Fps is (10 sec: 3686.6, 60 sec: 2150.4, 300 sec: 2150.4). Total num frames: 8101888. Throughput: 0: 483.9. Samples: 19356. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:02:11,034][04584] Avg episode reward: [(0, '4.531')]
+[2024-11-07 15:02:16,028][04584] Fps is (10 sec: 5328.5, 60 sec: 2548.6, 300 sec: 2548.6). Total num frames: 8130560. Throughput: 0: 634.1. Samples: 28533. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:02:16,037][04584] Avg episode reward: [(0, '4.269')]
+[2024-11-07 15:02:16,968][09024] Updated weights for policy 0, policy_version 1987 (0.0049)
+[2024-11-07 15:02:21,028][04584] Fps is (10 sec: 5325.0, 60 sec: 2785.3, 300 sec: 2785.3). Total num frames: 8155136. Throughput: 0: 791.9. Samples: 35634. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0)
+[2024-11-07 15:02:21,040][04584] Avg episode reward: [(0, '4.001')]
+[2024-11-07 15:02:24,670][09024] Updated weights for policy 0, policy_version 1997 (0.0064)
+[2024-11-07 15:02:26,032][04584] Fps is (10 sec: 5731.8, 60 sec: 3127.6, 300 sec: 3127.6). Total num frames: 8187904. Throughput: 0: 829.0. Samples: 39969. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0)
+[2024-11-07 15:02:26,037][04584] Avg episode reward: [(0, '4.182')]
+[2024-11-07 15:02:31,028][04584] Fps is (10 sec: 4505.7, 60 sec: 3072.0, 300 sec: 3072.0). Total num frames: 8200192. Throughput: 0: 954.2. Samples: 47781. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0)
+[2024-11-07 15:02:31,029][04584] Avg episode reward: [(0, '4.225')]
+[2024-11-07 15:02:34,783][09024] Updated weights for policy 0, policy_version 2007 (0.0084)
+[2024-11-07 15:02:36,032][04584] Fps is (10 sec: 3686.7, 60 sec: 3481.4, 300 sec: 3213.6). Total num frames: 8224768. Throughput: 0: 1066.8. Samples: 52974. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:02:36,045][04584] Avg episode reward: [(0, '4.560')]
+[2024-11-07 15:02:41,030][04584] Fps is (10 sec: 5733.4, 60 sec: 4027.6, 300 sec: 3452.2). Total num frames: 8257536. Throughput: 0: 1122.3. Samples: 58227. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:02:41,181][04584] Avg episode reward: [(0, '4.499')]
+[2024-11-07 15:02:42,375][09024] Updated weights for policy 0, policy_version 2017 (0.0076)
+[2024-11-07 15:02:46,028][04584] Fps is (10 sec: 5736.4, 60 sec: 4437.3, 300 sec: 3549.8). Total num frames: 8282112. Throughput: 0: 1179.7. Samples: 65085. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:02:46,033][04584] Avg episode reward: [(0, '4.458')]
+[2024-11-07 15:02:48,966][09024] Updated weights for policy 0, policy_version 2027 (0.0057)
+[2024-11-07 15:02:51,044][04584] Fps is (10 sec: 5317.1, 60 sec: 4640.9, 300 sec: 3685.6). Total num frames: 8310784. Throughput: 0: 1271.3. Samples: 73959. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0)
+[2024-11-07 15:02:51,050][04584] Avg episode reward: [(0, '4.488')]
+[2024-11-07 15:02:55,463][09024] Updated weights for policy 0, policy_version 2037 (0.0045)
+[2024-11-07 15:02:56,027][04584] Fps is (10 sec: 6553.9, 60 sec: 5001.5, 300 sec: 3903.3). Total num frames: 8347648. Throughput: 0: 1327.8. Samples: 79104. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:02:56,029][04584] Avg episode reward: [(0, '4.301')]
+[2024-11-07 15:03:01,028][04584] Fps is (10 sec: 6974.4, 60 sec: 5256.6, 300 sec: 4050.5). Total num frames: 8380416. Throughput: 0: 1384.7. Samples: 90843. Policy #0 lag: (min: 0.0, avg: 1.6, max: 3.0)
+[2024-11-07 15:03:01,035][04584] Avg episode reward: [(0, '4.534')]
+[2024-11-07 15:03:01,342][09024] Updated weights for policy 0, policy_version 2047 (0.0066)
+[2024-11-07 15:03:06,028][04584] Fps is (10 sec: 4505.6, 60 sec: 5257.2, 300 sec: 3966.7). Total num frames: 8392704. Throughput: 0: 1321.1. Samples: 95085. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0)
+[2024-11-07 15:03:06,035][04584] Avg episode reward: [(0, '4.484')]
+[2024-11-07 15:03:11,030][04584] Fps is (10 sec: 4095.1, 60 sec: 5324.6, 300 sec: 4054.9). Total num frames: 8421376. Throughput: 0: 1306.7. Samples: 98769. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:03:11,033][04584] Avg episode reward: [(0, '4.455')]
+[2024-11-07 15:03:11,076][09009] Saving /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000002056_8421376.pth...
+[2024-11-07 15:03:12,209][09009] Removing /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000001955_8007680.pth
+[2024-11-07 15:03:12,448][09024] Updated weights for policy 0, policy_version 2057 (0.0053)
+[2024-11-07 15:03:16,028][04584] Fps is (10 sec: 5324.7, 60 sec: 5256.5, 300 sec: 4096.0). Total num frames: 8445952. Throughput: 0: 1310.5. Samples: 106755. Policy #0 lag: (min: 0.0, avg: 1.9, max: 3.0)
+[2024-11-07 15:03:16,033][04584] Avg episode reward: [(0, '4.398')]
+[2024-11-07 15:03:18,413][09024] Updated weights for policy 0, policy_version 2067 (0.0046)
+[2024-11-07 15:03:21,028][04584] Fps is (10 sec: 6145.6, 60 sec: 5461.4, 300 sec: 4244.9). Total num frames: 8482816. Throughput: 0: 1428.1. Samples: 117234. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0)
+[2024-11-07 15:03:21,032][04584] Avg episode reward: [(0, '4.426')]
+[2024-11-07 15:03:23,406][09024] Updated weights for policy 0, policy_version 2077 (0.0053)
+[2024-11-07 15:03:26,029][04584] Fps is (10 sec: 8191.4, 60 sec: 5666.5, 300 sec: 4452.1). Total num frames: 8527872. Throughput: 0: 1449.0. Samples: 123429. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0)
+[2024-11-07 15:03:26,044][04584] Avg episode reward: [(0, '4.548')]
+[2024-11-07 15:03:28,644][09024] Updated weights for policy 0, policy_version 2087 (0.0052)
+[2024-11-07 15:03:31,028][04584] Fps is (10 sec: 8601.3, 60 sec: 6144.0, 300 sec: 4608.0). Total num frames: 8568832. Throughput: 0: 1560.3. Samples: 135300. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:03:31,038][04584] Avg episode reward: [(0, '4.514')]
+[2024-11-07 15:03:33,631][09024] Updated weights for policy 0, policy_version 2097 (0.0036)
+[2024-11-07 15:03:36,147][04584] Fps is (10 sec: 6882.2, 60 sec: 6200.4, 300 sec: 4648.6). Total num frames: 8597504. Throughput: 0: 1612.3. Samples: 146679. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:03:36,153][04584] Avg episode reward: [(0, '4.475')]
+[2024-11-07 15:03:41,028][04584] Fps is (10 sec: 4096.1, 60 sec: 5871.1, 300 sec: 4568.6). Total num frames: 8609792. Throughput: 0: 1535.9. Samples: 148221. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0)
+[2024-11-07 15:03:41,049][04584] Avg episode reward: [(0, '4.297')]
+[2024-11-07 15:03:43,494][09024] Updated weights for policy 0, policy_version 2107 (0.0056)
+[2024-11-07 15:03:46,039][04584] Fps is (10 sec: 4554.6, 60 sec: 6006.4, 300 sec: 4641.8). Total num frames: 8642560. Throughput: 0: 1440.9. Samples: 155700. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0)
+[2024-11-07 15:03:46,062][04584] Avg episode reward: [(0, '4.360')]
+[2024-11-07 15:03:50,512][09024] Updated weights for policy 0, policy_version 2117 (0.0062)
+[2024-11-07 15:03:51,029][04584] Fps is (10 sec: 6552.9, 60 sec: 6077.3, 300 sec: 4710.4). Total num frames: 8675328. Throughput: 0: 1533.7. Samples: 164103. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:03:51,034][04584] Avg episode reward: [(0, '4.391')]
+[2024-11-07 15:03:56,028][04584] Fps is (10 sec: 6560.9, 60 sec: 6007.5, 300 sec: 4774.0). Total num frames: 8708096. Throughput: 0: 1600.7. Samples: 170796. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
+[2024-11-07 15:03:56,034][04584] Avg episode reward: [(0, '4.322')]
+[2024-11-07 15:03:56,064][09024] Updated weights for policy 0, policy_version 2127 (0.0079)
+[2024-11-07 15:04:01,028][04584] Fps is (10 sec: 6554.4, 60 sec: 6007.5, 300 sec: 4833.3). Total num frames: 8740864. Throughput: 0: 1627.9. Samples: 180012. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:04:01,029][04584] Avg episode reward: [(0, '4.624')]
+[2024-11-07 15:04:03,142][09024] Updated weights for policy 0, policy_version 2137 (0.0080)
+[2024-11-07 15:04:06,031][04584] Fps is (10 sec: 6551.5, 60 sec: 6348.5, 300 sec: 4888.7). Total num frames: 8773632. Throughput: 0: 1591.7. Samples: 188865. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:04:06,033][04584] Avg episode reward: [(0, '4.471')]
+[2024-11-07 15:04:09,383][09024] Updated weights for policy 0, policy_version 2147 (0.0045)
+[2024-11-07 15:04:11,030][04584] Fps is (10 sec: 6142.3, 60 sec: 6348.8, 300 sec: 4915.1). Total num frames: 8802304. Throughput: 0: 1567.7. Samples: 193977. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0)
+[2024-11-07 15:04:11,034][04584] Avg episode reward: [(0, '4.431')]
+[2024-11-07 15:04:16,028][04584] Fps is (10 sec: 5326.3, 60 sec: 6348.8, 300 sec: 4915.2). Total num frames: 8826880. Throughput: 0: 1439.4. Samples: 200073. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0)
+[2024-11-07 15:04:16,031][04584] Avg episode reward: [(0, '4.391')]
+[2024-11-07 15:04:17,708][09024] Updated weights for policy 0, policy_version 2157 (0.0058)
+[2024-11-07 15:04:21,028][04584] Fps is (10 sec: 6145.7, 60 sec: 6348.8, 300 sec: 4987.5). Total num frames: 8863744. Throughput: 0: 1450.2. Samples: 211764. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:04:21,030][04584] Avg episode reward: [(0, '4.511')]
+[2024-11-07 15:04:22,029][09024] Updated weights for policy 0, policy_version 2167 (0.0046)
+[2024-11-07 15:04:26,028][04584] Fps is (10 sec: 7373.2, 60 sec: 6212.4, 300 sec: 5055.6). Total num frames: 8900608. Throughput: 0: 1557.3. Samples: 218301. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:04:26,031][04584] Avg episode reward: [(0, '4.462')]
+[2024-11-07 15:04:28,346][09024] Updated weights for policy 0, policy_version 2177 (0.0073)
+[2024-11-07 15:04:31,028][04584] Fps is (10 sec: 7372.6, 60 sec: 6144.0, 300 sec: 5120.0). Total num frames: 8937472. Throughput: 0: 1611.6. Samples: 228204. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:04:31,029][04584] Avg episode reward: [(0, '4.386')]
+[2024-11-07 15:04:33,371][09024] Updated weights for policy 0, policy_version 2187 (0.0044)
+[2024-11-07 15:04:36,034][04584] Fps is (10 sec: 8186.5, 60 sec: 6429.1, 300 sec: 5225.0). Total num frames: 8982528. Throughput: 0: 1702.5. Samples: 240723. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:04:36,038][04584] Avg episode reward: [(0, '4.766')]
+[2024-11-07 15:04:37,745][09024] Updated weights for policy 0, policy_version 2197 (0.0031)
+[2024-11-07 15:04:41,028][04584] Fps is (10 sec: 9011.4, 60 sec: 6963.2, 300 sec: 5324.8). Total num frames: 9027584. Throughput: 0: 1710.0. Samples: 247746. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:04:41,030][04584] Avg episode reward: [(0, '4.323')]
+[2024-11-07 15:04:42,365][09024] Updated weights for policy 0, policy_version 2207 (0.0034)
+[2024-11-07 15:04:47,914][04584] Fps is (10 sec: 7240.3, 60 sec: 6884.5, 300 sec: 5346.6). Total num frames: 9068544. Throughput: 0: 1731.7. Samples: 261207. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0)
+[2024-11-07 15:04:47,916][04584] Avg episode reward: [(0, '4.443')]
+[2024-11-07 15:04:48,941][09024] Updated weights for policy 0, policy_version 2217 (0.0039)
+[2024-11-07 15:04:51,029][04584] Fps is (10 sec: 6143.1, 60 sec: 6894.9, 300 sec: 5365.7). Total num frames: 9089024. Throughput: 0: 1786.5. Samples: 269256. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:04:51,038][04584] Avg episode reward: [(0, '4.315')]
+[2024-11-07 15:04:55,889][09024] Updated weights for policy 0, policy_version 2227 (0.0049)
+[2024-11-07 15:04:56,028][04584] Fps is (10 sec: 6563.0, 60 sec: 6894.9, 300 sec: 5394.7). Total num frames: 9121792. Throughput: 0: 1776.3. Samples: 273906. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0)
+[2024-11-07 15:04:56,030][04584] Avg episode reward: [(0, '4.585')]
+[2024-11-07 15:05:01,028][04584] Fps is (10 sec: 6145.0, 60 sec: 6826.7, 300 sec: 5402.8). Total num frames: 9150464. Throughput: 0: 1833.8. Samples: 282591. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:05:01,030][04584] Avg episode reward: [(0, '4.626')]
+[2024-11-07 15:05:02,542][09024] Updated weights for policy 0, policy_version 2237 (0.0078)
+[2024-11-07 15:05:06,028][04584] Fps is (10 sec: 7372.5, 60 sec: 7031.8, 300 sec: 5486.7). Total num frames: 9195520. Throughput: 0: 1828.4. Samples: 294042. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:05:06,031][04584] Avg episode reward: [(0, '4.594')]
+[2024-11-07 15:05:06,896][09024] Updated weights for policy 0, policy_version 2247 (0.0041)
+[2024-11-07 15:05:11,028][04584] Fps is (10 sec: 8601.0, 60 sec: 7236.5, 300 sec: 5548.2). Total num frames: 9236480. Throughput: 0: 1833.2. Samples: 300798. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0)
+[2024-11-07 15:05:11,030][04584] Avg episode reward: [(0, '4.237')]
+[2024-11-07 15:05:11,039][09009] Saving /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000002255_9236480.pth...
+[2024-11-07 15:05:11,180][09009] Removing /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000001957_8015872.pth
+[2024-11-07 15:05:11,798][09024] Updated weights for policy 0, policy_version 2257 (0.0037)
+[2024-11-07 15:05:16,028][04584] Fps is (10 sec: 6963.7, 60 sec: 7304.6, 300 sec: 5552.4). Total num frames: 9265152. Throughput: 0: 1843.3. Samples: 311151. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:05:16,029][04584] Avg episode reward: [(0, '4.511')]
+[2024-11-07 15:05:18,230][09024] Updated weights for policy 0, policy_version 2267 (0.0036)
+[2024-11-07 15:05:22,243][04584] Fps is (10 sec: 5478.3, 60 sec: 7092.5, 300 sec: 5544.8). Total num frames: 9297920. Throughput: 0: 1763.7. Samples: 322221. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:05:22,263][04584] Avg episode reward: [(0, '4.497')]
+[2024-11-07 15:05:25,579][09024] Updated weights for policy 0, policy_version 2277 (0.0035)
+[2024-11-07 15:05:26,028][04584] Fps is (10 sec: 6553.6, 60 sec: 7168.0, 300 sec: 5595.0). Total num frames: 9330688. Throughput: 0: 1691.6. Samples: 323868. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0)
+[2024-11-07 15:05:26,029][04584] Avg episode reward: [(0, '4.590')]
+[2024-11-07 15:05:30,294][09024] Updated weights for policy 0, policy_version 2287 (0.0041)
+[2024-11-07 15:05:31,028][04584] Fps is (10 sec: 8393.2, 60 sec: 7236.3, 300 sec: 5649.1). Total num frames: 9371648. Throughput: 0: 1751.2. Samples: 336708. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:05:31,030][04584] Avg episode reward: [(0, '4.708')]
+[2024-11-07 15:05:34,912][09024] Updated weights for policy 0, policy_version 2297 (0.0042)
+[2024-11-07 15:05:36,027][04584] Fps is (10 sec: 9011.3, 60 sec: 7305.4, 300 sec: 5734.4). Total num frames: 9420800. Throughput: 0: 1796.1. Samples: 350079. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:05:36,029][04584] Avg episode reward: [(0, '4.454')]
+[2024-11-07 15:05:39,037][09024] Updated weights for policy 0, policy_version 2307 (0.0063)
+[2024-11-07 15:05:41,029][04584] Fps is (10 sec: 8600.8, 60 sec: 7167.9, 300 sec: 5767.1). Total num frames: 9457664. Throughput: 0: 1853.6. Samples: 357318. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0)
+[2024-11-07 15:05:41,031][04584] Avg episode reward: [(0, '4.661')]
+[2024-11-07 15:05:45,685][09024] Updated weights for policy 0, policy_version 2317 (0.0065)
+[2024-11-07 15:05:46,028][04584] Fps is (10 sec: 7372.7, 60 sec: 7330.3, 300 sec: 5798.7). Total num frames: 9494528. Throughput: 0: 1890.9. Samples: 367683. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0)
+[2024-11-07 15:05:46,030][04584] Avg episode reward: [(0, '4.410')]
+[2024-11-07 15:05:50,912][09024] Updated weights for policy 0, policy_version 2327 (0.0035)
+[2024-11-07 15:05:51,028][04584] Fps is (10 sec: 7373.4, 60 sec: 7373.0, 300 sec: 5828.9). Total num frames: 9531392. Throughput: 0: 1887.5. Samples: 378978. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:05:51,031][04584] Avg episode reward: [(0, '4.357')]
+[2024-11-07 15:05:56,548][04584] Fps is (10 sec: 5450.5, 60 sec: 7106.3, 300 sec: 5784.9). Total num frames: 9551872. Throughput: 0: 1821.6. Samples: 383718. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0)
+[2024-11-07 15:05:56,552][04584] Avg episode reward: [(0, '4.499')]
+[2024-11-07 15:05:59,046][09024] Updated weights for policy 0, policy_version 2337 (0.0071)
+[2024-11-07 15:06:01,028][04584] Fps is (10 sec: 5324.8, 60 sec: 7236.3, 300 sec: 5810.3). Total num frames: 9584640. Throughput: 0: 1762.5. Samples: 390465. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0)
+[2024-11-07 15:06:01,030][04584] Avg episode reward: [(0, '4.315')]
+[2024-11-07 15:06:04,144][09024] Updated weights for policy 0, policy_version 2347 (0.0033)
+[2024-11-07 15:06:06,029][04584] Fps is (10 sec: 7777.0, 60 sec: 7168.0, 300 sec: 5853.5). Total num frames: 9625600. Throughput: 0: 1835.6. Samples: 402594. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:06:06,031][04584] Avg episode reward: [(0, '4.349')]
+[2024-11-07 15:06:09,038][09024] Updated weights for policy 0, policy_version 2357 (0.0025)
+[2024-11-07 15:06:11,028][04584] Fps is (10 sec: 8192.0, 60 sec: 7168.1, 300 sec: 5895.3). Total num frames: 9666560. Throughput: 0: 1899.2. Samples: 409332. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0)
+[2024-11-07 15:06:11,033][04584] Avg episode reward: [(0, '4.490')]
+[2024-11-07 15:06:14,224][09024] Updated weights for policy 0, policy_version 2367 (0.0037)
+[2024-11-07 15:06:16,028][04584] Fps is (10 sec: 8192.1, 60 sec: 7372.7, 300 sec: 5935.6). Total num frames: 9707520. Throughput: 0: 1871.7. Samples: 420936. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:06:16,030][04584] Avg episode reward: [(0, '4.367')]
+[2024-11-07 15:06:19,411][09024] Updated weights for policy 0, policy_version 2377 (0.0041)
+[2024-11-07 15:06:21,029][04584] Fps is (10 sec: 8190.8, 60 sec: 7664.4, 300 sec: 5974.5). Total num frames: 9748480. Throughput: 0: 1848.9. Samples: 433281. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0)
+[2024-11-07 15:06:21,032][04584] Avg episode reward: [(0, '4.296')]
+[2024-11-07 15:06:24,504][09024] Updated weights for policy 0, policy_version 2387 (0.0053)
+[2024-11-07 15:06:26,027][04584] Fps is (10 sec: 7783.2, 60 sec: 7577.6, 300 sec: 5998.2). Total num frames: 9785344. Throughput: 0: 1812.0. Samples: 438855. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0)
+[2024-11-07 15:06:26,031][04584] Avg episode reward: [(0, '4.495')]
+[2024-11-07 15:06:31,028][04584] Fps is (10 sec: 5735.2, 60 sec: 7236.3, 300 sec: 6067.6). Total num frames: 9805824. Throughput: 0: 1818.3. Samples: 449505. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0)
+[2024-11-07 15:06:31,030][04584] Avg episode reward: [(0, '4.282')]
+[2024-11-07 15:06:33,393][09024] Updated weights for policy 0, policy_version 2397 (0.0054)
+[2024-11-07 15:06:36,028][04584] Fps is (10 sec: 4505.4, 60 sec: 6826.6, 300 sec: 6150.9). Total num frames: 9830400. Throughput: 0: 1667.3. Samples: 454008. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0)
+[2024-11-07 15:06:36,033][04584] Avg episode reward: [(0, '4.324')]
+[2024-11-07 15:06:41,028][04584] Fps is (10 sec: 4505.6, 60 sec: 6553.7, 300 sec: 6220.4). Total num frames: 9850880. Throughput: 0: 1647.4. Samples: 456993. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:06:41,030][04584] Avg episode reward: [(0, '4.430')]
+[2024-11-07 15:06:42,724][09024] Updated weights for policy 0, policy_version 2407 (0.0111)
+[2024-11-07 15:06:46,037][04584] Fps is (10 sec: 4092.3, 60 sec: 6279.5, 300 sec: 6234.1). Total num frames: 9871360. Throughput: 0: 1631.1. Samples: 463878. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0)
+[2024-11-07 15:06:46,049][04584] Avg episode reward: [(0, '4.396')]
+[2024-11-07 15:06:50,217][09024] Updated weights for policy 0, policy_version 2417 (0.0070)
+[2024-11-07 15:06:51,028][04584] Fps is (10 sec: 5324.8, 60 sec: 6212.3, 300 sec: 6294.4). Total num frames: 9904128. Throughput: 0: 1541.4. Samples: 471957. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:06:51,033][04584] Avg episode reward: [(0, '4.471')]
+[2024-11-07 15:06:56,028][04584] Fps is (10 sec: 6559.7, 60 sec: 6473.3, 300 sec: 6345.4). Total num frames: 9936896. Throughput: 0: 1495.2. Samples: 476616. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:06:56,030][04584] Avg episode reward: [(0, '4.333')]
+[2024-11-07 15:06:56,741][09024] Updated weights for policy 0, policy_version 2427 (0.0055)
+[2024-11-07 15:07:01,028][04584] Fps is (10 sec: 6143.9, 60 sec: 6348.8, 300 sec: 6401.0). Total num frames: 9965568. Throughput: 0: 1452.2. Samples: 486282. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0)
+[2024-11-07 15:07:01,030][04584] Avg episode reward: [(0, '4.301')]
+[2024-11-07 15:07:06,028][04584] Fps is (10 sec: 4096.0, 60 sec: 5871.0, 300 sec: 6359.2). Total num frames: 9977856. Throughput: 0: 1286.4. Samples: 491169. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0)
+[2024-11-07 15:07:06,029][04584] Avg episode reward: [(0, '4.232')]
+[2024-11-07 15:07:06,836][09024] Updated weights for policy 0, policy_version 2437 (0.0050)
+[2024-11-07 15:07:11,028][04584] Fps is (10 sec: 2867.1, 60 sec: 5461.3, 300 sec: 6317.6). Total num frames: 9994240. Throughput: 0: 1219.8. Samples: 493749. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:07:11,049][04584] Avg episode reward: [(0, '4.415')]
+[2024-11-07 15:07:11,110][09009] Saving /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000002440_9994240.pth...
+[2024-11-07 15:07:12,466][09009] Removing /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000002056_8421376.pth
+[2024-11-07 15:07:16,031][04584] Fps is (10 sec: 3685.2, 60 sec: 5119.8, 300 sec: 6303.6). Total num frames: 10014720. Throughput: 0: 1105.2. Samples: 499242. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0)
+[2024-11-07 15:07:16,033][04584] Avg episode reward: [(0, '4.528')]
+[2024-11-07 15:07:17,973][09024] Updated weights for policy 0, policy_version 2447 (0.0103)
+[2024-11-07 15:07:21,028][04584] Fps is (10 sec: 4096.0, 60 sec: 4778.8, 300 sec: 6262.1). Total num frames: 10035200. Throughput: 0: 1144.8. Samples: 505524. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:07:21,031][04584] Avg episode reward: [(0, '4.479')]
+[2024-11-07 15:07:26,028][04584] Fps is (10 sec: 4097.4, 60 sec: 4505.6, 300 sec: 6289.8). Total num frames: 10055680. Throughput: 0: 1155.7. Samples: 509001. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0)
+[2024-11-07 15:07:26,030][04584] Avg episode reward: [(0, '4.388')]
+[2024-11-07 15:07:26,954][09024] Updated weights for policy 0, policy_version 2457 (0.0073)
+[2024-11-07 15:07:31,029][04584] Fps is (10 sec: 4914.9, 60 sec: 4642.1, 300 sec: 6303.7). Total num frames: 10084352. Throughput: 0: 1154.4. Samples: 515817. Policy #0 lag: (min: 0.0, avg: 1.3, max: 4.0)
+[2024-11-07 15:07:31,031][04584] Avg episode reward: [(0, '4.512')]
+[2024-11-07 15:07:34,484][09024] Updated weights for policy 0, policy_version 2467 (0.0077)
+[2024-11-07 15:07:36,039][04584] Fps is (10 sec: 5318.9, 60 sec: 4641.3, 300 sec: 6275.7). Total num frames: 10108928. Throughput: 0: 1161.7. Samples: 524247. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0)
+[2024-11-07 15:07:36,041][04584] Avg episode reward: [(0, '4.336')]
+[2024-11-07 15:07:41,029][04584] Fps is (10 sec: 4095.9, 60 sec: 4573.8, 300 sec: 6248.1). Total num frames: 10125312. Throughput: 0: 1109.8. Samples: 526557. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0)
+[2024-11-07 15:07:41,033][04584] Avg episode reward: [(0, '4.377')]
+[2024-11-07 15:07:44,723][09024] Updated weights for policy 0, policy_version 2477 (0.0055)
+[2024-11-07 15:07:46,028][04584] Fps is (10 sec: 4510.3, 60 sec: 4711.1, 300 sec: 6248.5). Total num frames: 10153984. Throughput: 0: 1039.3. Samples: 533052. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0)
+[2024-11-07 15:07:46,030][04584] Avg episode reward: [(0, '4.577')]
+[2024-11-07 15:07:51,028][04584] Fps is (10 sec: 5325.4, 60 sec: 4573.9, 300 sec: 6206.5). Total num frames: 10178560. Throughput: 0: 1120.3. Samples: 541584. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:07:51,030][04584] Avg episode reward: [(0, '4.321')]
+[2024-11-07 15:07:52,827][09024] Updated weights for policy 0, policy_version 2487 (0.0129)
+[2024-11-07 15:07:56,028][04584] Fps is (10 sec: 4915.5, 60 sec: 4437.3, 300 sec: 6178.7). Total num frames: 10203136. Throughput: 0: 1124.5. Samples: 544350. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:07:56,030][04584] Avg episode reward: [(0, '4.500')]
+[2024-11-07 15:07:59,814][09024] Updated weights for policy 0, policy_version 2497 (0.0061)
+[2024-11-07 15:08:01,029][04584] Fps is (10 sec: 5324.1, 60 sec: 4437.2, 300 sec: 6234.2). Total num frames: 10231808. Throughput: 0: 1192.4. Samples: 552897. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0)
+[2024-11-07 15:08:01,033][04584] Avg episode reward: [(0, '4.706')]
+[2024-11-07 15:08:06,028][04584] Fps is (10 sec: 6144.0, 60 sec: 4778.7, 300 sec: 6248.2). Total num frames: 10264576. Throughput: 0: 1246.2. Samples: 561603. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:08:06,029][04584] Avg episode reward: [(0, '4.515')]
+[2024-11-07 15:08:06,822][09024] Updated weights for policy 0, policy_version 2507 (0.0059)
+[2024-11-07 15:08:11,035][04584] Fps is (10 sec: 6140.4, 60 sec: 4982.9, 300 sec: 6261.9). Total num frames: 10293248. Throughput: 0: 1278.5. Samples: 566541. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:08:11,038][04584] Avg episode reward: [(0, '4.344')]
+[2024-11-07 15:08:15,470][09024] Updated weights for policy 0, policy_version 2517 (0.0058)
+[2024-11-07 15:08:16,028][04584] Fps is (10 sec: 4915.1, 60 sec: 4983.7, 300 sec: 6206.5). Total num frames: 10313728. Throughput: 0: 1262.9. Samples: 572646. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:08:16,040][04584] Avg episode reward: [(0, '4.524')]
+[2024-11-07 15:08:21,027][04584] Fps is (10 sec: 4918.9, 60 sec: 5120.0, 300 sec: 6151.0). Total num frames: 10342400. Throughput: 0: 1276.5. Samples: 581676. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:08:21,030][04584] Avg episode reward: [(0, '4.594')]
+[2024-11-07 15:08:22,084][09024] Updated weights for policy 0, policy_version 2527 (0.0055)
+[2024-11-07 15:08:26,028][04584] Fps is (10 sec: 6143.8, 60 sec: 5324.8, 300 sec: 6123.2). Total num frames: 10375168. Throughput: 0: 1339.8. Samples: 586845. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:08:26,030][04584] Avg episode reward: [(0, '4.716')]
+[2024-11-07 15:08:28,099][09024] Updated weights for policy 0, policy_version 2537 (0.0069)
+[2024-11-07 15:08:31,028][04584] Fps is (10 sec: 6553.3, 60 sec: 5393.1, 300 sec: 6139.5). Total num frames: 10407936. Throughput: 0: 1419.3. Samples: 596922. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:08:31,031][04584] Avg episode reward: [(0, '4.377')]
+[2024-11-07 15:08:34,340][09024] Updated weights for policy 0, policy_version 2547 (0.0045)
+[2024-11-07 15:08:36,028][04584] Fps is (10 sec: 6553.8, 60 sec: 5530.6, 300 sec: 6206.5). Total num frames: 10440704. Throughput: 0: 1443.5. Samples: 606540. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0)
+[2024-11-07 15:08:36,030][04584] Avg episode reward: [(0, '4.421')]
+[2024-11-07 15:08:40,592][09024] Updated weights for policy 0, policy_version 2557 (0.0049)
+[2024-11-07 15:08:41,028][04584] Fps is (10 sec: 6553.6, 60 sec: 5802.8, 300 sec: 6206.7). Total num frames: 10473472. Throughput: 0: 1493.3. Samples: 611547. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:08:41,034][04584] Avg episode reward: [(0, '4.527')]
+[2024-11-07 15:08:48,061][04584] Fps is (10 sec: 5446.4, 60 sec: 5678.6, 300 sec: 6164.0). Total num frames: 10506240. Throughput: 0: 1458.3. Samples: 621483. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0)
+[2024-11-07 15:08:48,062][04584] Avg episode reward: [(0, '4.465')]
+[2024-11-07 15:08:48,629][09024] Updated weights for policy 0, policy_version 2567 (0.0053)
+[2024-11-07 15:08:51,028][04584] Fps is (10 sec: 5324.6, 60 sec: 5802.6, 300 sec: 6164.8). Total num frames: 10526720. Throughput: 0: 1481.7. Samples: 628281. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0)
+[2024-11-07 15:08:51,037][04584] Avg episode reward: [(0, '4.470')]
+[2024-11-07 15:08:54,930][09024] Updated weights for policy 0, policy_version 2577 (0.0040)
+[2024-11-07 15:08:56,028][04584] Fps is (10 sec: 6683.4, 60 sec: 5939.2, 300 sec: 6164.8). Total num frames: 10559488. Throughput: 0: 1483.7. Samples: 633297. Policy #0 lag: (min: 0.0, avg: 1.6, max: 3.0)
+[2024-11-07 15:08:56,035][04584] Avg episode reward: [(0, '4.549')]
+[2024-11-07 15:09:00,969][09024] Updated weights for policy 0, policy_version 2587 (0.0046)
+[2024-11-07 15:09:01,028][04584] Fps is (10 sec: 6963.3, 60 sec: 6075.8, 300 sec: 6178.8). Total num frames: 10596352. Throughput: 0: 1563.5. Samples: 643005. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:09:01,030][04584] Avg episode reward: [(0, '4.366')]
+[2024-11-07 15:09:06,030][04584] Fps is (10 sec: 6552.4, 60 sec: 6007.3, 300 sec: 6178.7). Total num frames: 10625024. Throughput: 0: 1577.3. Samples: 652659. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:09:06,049][04584] Avg episode reward: [(0, '4.493')]
+[2024-11-07 15:09:08,985][09024] Updated weights for policy 0, policy_version 2597 (0.0064)
+[2024-11-07 15:09:11,030][04584] Fps is (10 sec: 4914.3, 60 sec: 5871.5, 300 sec: 6164.8). Total num frames: 10645504. Throughput: 0: 1533.3. Samples: 655848. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:09:11,036][04584] Avg episode reward: [(0, '4.463')]
+[2024-11-07 15:09:11,070][09009] Saving /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000002599_10645504.pth...
+[2024-11-07 15:09:11,734][09009] Removing /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000002255_9236480.pth
+[2024-11-07 15:09:16,028][04584] Fps is (10 sec: 4096.8, 60 sec: 5870.9, 300 sec: 6109.3). Total num frames: 10665984. Throughput: 0: 1445.5. Samples: 661971. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:09:16,032][04584] Avg episode reward: [(0, '4.449')]
+[2024-11-07 15:09:18,879][09024] Updated weights for policy 0, policy_version 2607 (0.0099)
+[2024-11-07 15:09:22,383][04584] Fps is (10 sec: 3247.1, 60 sec: 5541.0, 300 sec: 6012.2). Total num frames: 10682368. Throughput: 0: 1330.0. Samples: 668193. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:09:22,396][04584] Avg episode reward: [(0, '4.305')]
+[2024-11-07 15:09:26,028][04584] Fps is (10 sec: 3276.8, 60 sec: 5393.1, 300 sec: 5970.4). Total num frames: 10698752. Throughput: 0: 1279.7. Samples: 669135. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:09:26,030][04584] Avg episode reward: [(0, '4.290')]
+[2024-11-07 15:09:29,818][09024] Updated weights for policy 0, policy_version 2617 (0.0067)
+[2024-11-07 15:09:31,028][04584] Fps is (10 sec: 4738.2, 60 sec: 5256.6, 300 sec: 5901.2). Total num frames: 10723328. Throughput: 0: 1272.1. Samples: 676140. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0)
+[2024-11-07 15:09:31,030][04584] Avg episode reward: [(0, '4.222')]
+[2024-11-07 15:09:36,028][04584] Fps is (10 sec: 4505.6, 60 sec: 5051.7, 300 sec: 5817.7). Total num frames: 10743808. Throughput: 0: 1216.6. Samples: 683025. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:09:36,029][04584] Avg episode reward: [(0, '4.278')]
+[2024-11-07 15:09:39,151][09024] Updated weights for policy 0, policy_version 2627 (0.0095)
+[2024-11-07 15:09:41,028][04584] Fps is (10 sec: 4505.4, 60 sec: 4915.2, 300 sec: 5799.3). Total num frames: 10768384. Throughput: 0: 1181.2. Samples: 686451. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0)
+[2024-11-07 15:09:41,032][04584] Avg episode reward: [(0, '4.383')]
+[2024-11-07 15:09:46,028][04584] Fps is (10 sec: 4505.5, 60 sec: 4875.6, 300 sec: 5762.2). Total num frames: 10788864. Throughput: 0: 1113.2. Samples: 693099. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:09:46,043][04584] Avg episode reward: [(0, '4.335')]
+[2024-11-07 15:09:49,553][09024] Updated weights for policy 0, policy_version 2637 (0.0102)
+[2024-11-07 15:09:51,028][04584] Fps is (10 sec: 3686.5, 60 sec: 4642.2, 300 sec: 5706.6). Total num frames: 10805248. Throughput: 0: 1009.0. Samples: 698064. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:09:51,030][04584] Avg episode reward: [(0, '4.319')]
+[2024-11-07 15:09:56,739][04584] Fps is (10 sec: 2676.9, 60 sec: 4250.4, 300 sec: 5637.5). Total num frames: 10817536. Throughput: 0: 974.1. Samples: 700371. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0)
+[2024-11-07 15:09:56,745][04584] Avg episode reward: [(0, '4.441')]
+[2024-11-07 15:10:01,028][04584] Fps is (10 sec: 2867.2, 60 sec: 3959.5, 300 sec: 5553.9). Total num frames: 10833920. Throughput: 0: 930.9. Samples: 703860. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:10:01,033][04584] Avg episode reward: [(0, '4.393')]
+[2024-11-07 15:10:02,874][09024] Updated weights for policy 0, policy_version 2647 (0.0087)
+[2024-11-07 15:10:06,066][04584] Fps is (10 sec: 3513.6, 60 sec: 3752.5, 300 sec: 5469.9). Total num frames: 10850304. Throughput: 0: 949.9. Samples: 709686. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:10:06,070][04584] Avg episode reward: [(0, '4.337')]
+[2024-11-07 15:10:11,031][04584] Fps is (10 sec: 3685.4, 60 sec: 3754.6, 300 sec: 5442.8). Total num frames: 10870784. Throughput: 0: 963.9. Samples: 712512. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:10:11,047][04584] Avg episode reward: [(0, '4.329')]
+[2024-11-07 15:10:13,800][09024] Updated weights for policy 0, policy_version 2657 (0.0062)
+[2024-11-07 15:10:16,028][04584] Fps is (10 sec: 4111.2, 60 sec: 3754.7, 300 sec: 5423.5). Total num frames: 10891264. Throughput: 0: 943.7. Samples: 718608. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:10:16,060][04584] Avg episode reward: [(0, '4.453')]
+[2024-11-07 15:10:21,029][04584] Fps is (10 sec: 4096.4, 60 sec: 3911.2, 300 sec: 5359.5). Total num frames: 10911744. Throughput: 0: 910.6. Samples: 724002. Policy #0 lag: (min: 0.0, avg: 1.0, max: 4.0)
+[2024-11-07 15:10:21,035][04584] Avg episode reward: [(0, '4.501')]
+[2024-11-07 15:10:24,256][09024] Updated weights for policy 0, policy_version 2667 (0.0070)
+[2024-11-07 15:10:26,028][04584] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 5276.2). Total num frames: 10928128. Throughput: 0: 899.3. Samples: 726918. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:10:26,030][04584] Avg episode reward: [(0, '4.423')]
+[2024-11-07 15:10:31,028][04584] Fps is (10 sec: 2458.0, 60 sec: 3549.9, 300 sec: 5137.4). Total num frames: 10936320. Throughput: 0: 820.8. Samples: 730035. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0)
+[2024-11-07 15:10:31,030][04584] Avg episode reward: [(0, '4.404')]
+[2024-11-07 15:10:36,028][04584] Fps is (10 sec: 2457.6, 60 sec: 3481.6, 300 sec: 5067.9). Total num frames: 10952704. Throughput: 0: 815.3. Samples: 734751. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0)
+[2024-11-07 15:10:36,034][04584] Avg episode reward: [(0, '4.371')]
+[2024-11-07 15:10:39,724][09024] Updated weights for policy 0, policy_version 2677 (0.0092)
+[2024-11-07 15:10:41,028][04584] Fps is (10 sec: 3276.8, 60 sec: 3345.1, 300 sec: 4998.5). Total num frames: 10969088. Throughput: 0: 831.2. Samples: 737184. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:10:41,030][04584] Avg episode reward: [(0, '4.353')]
+[2024-11-07 15:10:46,030][04584] Fps is (10 sec: 3685.4, 60 sec: 3344.9, 300 sec: 4942.9). Total num frames: 10989568. Throughput: 0: 869.6. Samples: 742995. Policy #0 lag: (min: 0.0, avg: 1.0, max: 4.0)
+[2024-11-07 15:10:46,045][04584] Avg episode reward: [(0, '4.321')]
+[2024-11-07 15:10:49,813][09024] Updated weights for policy 0, policy_version 2687 (0.0083)
+[2024-11-07 15:10:51,028][04584] Fps is (10 sec: 4096.0, 60 sec: 3413.3, 300 sec: 4951.7). Total num frames: 11010048. Throughput: 0: 878.9. Samples: 749202. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0)
+[2024-11-07 15:10:51,037][04584] Avg episode reward: [(0, '4.182')]
+[2024-11-07 15:10:56,028][04584] Fps is (10 sec: 4097.0, 60 sec: 3592.4, 300 sec: 4901.3). Total num frames: 11030528. Throughput: 0: 881.2. Samples: 752163. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0)
+[2024-11-07 15:10:56,038][04584] Avg episode reward: [(0, '4.313')]
+[2024-11-07 15:10:59,310][09024] Updated weights for policy 0, policy_version 2697 (0.0070)
+[2024-11-07 15:11:01,028][04584] Fps is (10 sec: 4095.9, 60 sec: 3618.1, 300 sec: 4831.9). Total num frames: 11051008. Throughput: 0: 893.9. Samples: 758835. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0)
+[2024-11-07 15:11:01,030][04584] Avg episode reward: [(0, '4.453')]
+[2024-11-07 15:11:06,028][04584] Fps is (10 sec: 2867.3, 60 sec: 3483.8, 300 sec: 4720.8). Total num frames: 11059200. Throughput: 0: 858.9. Samples: 762651. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0)
+[2024-11-07 15:11:06,050][04584] Avg episode reward: [(0, '4.518')]
+[2024-11-07 15:11:11,039][04584] Fps is (10 sec: 2864.2, 60 sec: 3481.1, 300 sec: 4651.2). Total num frames: 11079680. Throughput: 0: 850.9. Samples: 765216. Policy #0 lag: (min: 1.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:11:11,054][04584] Avg episode reward: [(0, '4.563')]
+[2024-11-07 15:11:11,654][09009] Saving /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000002706_11083776.pth...
+[2024-11-07 15:11:12,410][09009] Removing /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000002440_9994240.pth
+[2024-11-07 15:11:12,676][09024] Updated weights for policy 0, policy_version 2707 (0.0102)
+[2024-11-07 15:11:16,028][04584] Fps is (10 sec: 4505.6, 60 sec: 3549.9, 300 sec: 4595.9). Total num frames: 11104256. Throughput: 0: 909.9. Samples: 770982. Policy #0 lag: (min: 0.0, avg: 1.6, max: 4.0)
+[2024-11-07 15:11:16,035][04584] Avg episode reward: [(0, '4.511')]
+[2024-11-07 15:11:20,515][09024] Updated weights for policy 0, policy_version 2717 (0.0108)
+[2024-11-07 15:11:21,029][04584] Fps is (10 sec: 4919.9, 60 sec: 3618.2, 300 sec: 4554.2). Total num frames: 11128832. Throughput: 0: 974.7. Samples: 778614. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:11:21,032][04584] Avg episode reward: [(0, '4.433')]
+[2024-11-07 15:11:26,028][04584] Fps is (10 sec: 4505.6, 60 sec: 3686.4, 300 sec: 4554.2). Total num frames: 11149312. Throughput: 0: 994.3. Samples: 781929. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:11:26,032][04584] Avg episode reward: [(0, '4.463')]
+[2024-11-07 15:11:30,644][09024] Updated weights for policy 0, policy_version 2727 (0.0103)
+[2024-11-07 15:11:31,036][04584] Fps is (10 sec: 4092.9, 60 sec: 3890.6, 300 sec: 4540.2). Total num frames: 11169792. Throughput: 0: 1013.8. Samples: 788622. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:11:31,051][04584] Avg episode reward: [(0, '4.541')]
+[2024-11-07 15:11:36,028][04584] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 4526.4). Total num frames: 11186176. Throughput: 0: 990.2. Samples: 793761. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0)
+[2024-11-07 15:11:36,095][04584] Avg episode reward: [(0, '4.513')]
+[2024-11-07 15:11:41,029][04584] Fps is (10 sec: 2869.4, 60 sec: 3822.9, 300 sec: 4498.8). Total num frames: 11198464. Throughput: 0: 979.4. Samples: 796236. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0)
+[2024-11-07 15:11:41,032][04584] Avg episode reward: [(0, '4.443')]
+[2024-11-07 15:11:44,286][09024] Updated weights for policy 0, policy_version 2737 (0.0075)
+[2024-11-07 15:11:46,028][04584] Fps is (10 sec: 2867.2, 60 sec: 3754.8, 300 sec: 4443.1). Total num frames: 11214848. Throughput: 0: 908.7. Samples: 799725. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0)
+[2024-11-07 15:11:46,036][04584] Avg episode reward: [(0, '4.426')]
+[2024-11-07 15:11:51,028][04584] Fps is (10 sec: 4096.3, 60 sec: 3822.9, 300 sec: 4415.3). Total num frames: 11239424. Throughput: 0: 965.5. Samples: 806097. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:11:51,030][04584] Avg episode reward: [(0, '4.502')]
+[2024-11-07 15:11:53,698][09024] Updated weights for policy 0, policy_version 2747 (0.0120)
+[2024-11-07 15:11:56,031][04584] Fps is (10 sec: 4094.8, 60 sec: 3754.5, 300 sec: 4373.7). Total num frames: 11255808. Throughput: 0: 989.4. Samples: 809733. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:11:56,035][04584] Avg episode reward: [(0, '4.506')]
+[2024-11-07 15:12:01,028][04584] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 4387.6). Total num frames: 11272192. Throughput: 0: 975.5. Samples: 814878. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:12:01,061][04584] Avg episode reward: [(0, '4.458')]
+[2024-11-07 15:12:05,292][09024] Updated weights for policy 0, policy_version 2757 (0.0095)
+[2024-11-07 15:12:06,028][04584] Fps is (10 sec: 3687.5, 60 sec: 3891.2, 300 sec: 4401.5). Total num frames: 11292672. Throughput: 0: 924.8. Samples: 820227. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:12:06,030][04584] Avg episode reward: [(0, '4.342')]
+[2024-11-07 15:12:11,028][04584] Fps is (10 sec: 5324.8, 60 sec: 4096.7, 300 sec: 4443.2). Total num frames: 11325440. Throughput: 0: 946.0. Samples: 824499. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:12:11,035][04584] Avg episode reward: [(0, '4.363')]
+[2024-11-07 15:12:11,709][09024] Updated weights for policy 0, policy_version 2767 (0.0038)
+[2024-11-07 15:12:16,028][04584] Fps is (10 sec: 4915.2, 60 sec: 3959.5, 300 sec: 4429.2). Total num frames: 11341824. Throughput: 0: 948.7. Samples: 831306. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:12:16,036][04584] Avg episode reward: [(0, '4.397')]
+[2024-11-07 15:12:21,030][04584] Fps is (10 sec: 4505.0, 60 sec: 4027.7, 300 sec: 4457.0). Total num frames: 11370496. Throughput: 0: 1013.9. Samples: 839388. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:12:21,033][04584] Avg episode reward: [(0, '4.267')]
+[2024-11-07 15:12:21,441][09024] Updated weights for policy 0, policy_version 2777 (0.0056)
+[2024-11-07 15:12:26,029][04584] Fps is (10 sec: 5733.9, 60 sec: 4164.2, 300 sec: 4457.0). Total num frames: 11399168. Throughput: 0: 1048.7. Samples: 843426. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0)
+[2024-11-07 15:12:26,040][04584] Avg episode reward: [(0, '4.437')]
+[2024-11-07 15:12:27,849][09024] Updated weights for policy 0, policy_version 2787 (0.0065)
+[2024-11-07 15:12:31,033][04584] Fps is (10 sec: 6141.7, 60 sec: 4369.3, 300 sec: 4484.9). Total num frames: 11431936. Throughput: 0: 1182.1. Samples: 852924. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0)
+[2024-11-07 15:12:31,045][04584] Avg episode reward: [(0, '4.275')]
+[2024-11-07 15:12:34,138][09024] Updated weights for policy 0, policy_version 2797 (0.0074)
+[2024-11-07 15:12:36,035][04584] Fps is (10 sec: 6958.7, 60 sec: 4709.8, 300 sec: 4554.1). Total num frames: 11468800. Throughput: 0: 1266.2. Samples: 863085. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0)
+[2024-11-07 15:12:36,038][04584] Avg episode reward: [(0, '4.393')]
+[2024-11-07 15:12:40,396][09024] Updated weights for policy 0, policy_version 2807 (0.0059)
+[2024-11-07 15:12:41,029][04584] Fps is (10 sec: 6556.5, 60 sec: 4983.5, 300 sec: 4554.2). Total num frames: 11497472. Throughput: 0: 1290.9. Samples: 867822. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:12:41,032][04584] Avg episode reward: [(0, '4.360')]
+[2024-11-07 15:12:46,028][04584] Fps is (10 sec: 6558.3, 60 sec: 5324.8, 300 sec: 4595.9). Total num frames: 11534336. Throughput: 0: 1392.6. Samples: 877545. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:12:46,030][04584] Avg episode reward: [(0, '4.605')]
+[2024-11-07 15:12:48,617][09024] Updated weights for policy 0, policy_version 2817 (0.0046)
+[2024-11-07 15:12:51,028][04584] Fps is (10 sec: 5325.1, 60 sec: 5188.3, 300 sec: 4568.1). Total num frames: 11550720. Throughput: 0: 1424.7. Samples: 884337. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:12:51,031][04584] Avg episode reward: [(0, '4.664')]
+[2024-11-07 15:12:54,695][09024] Updated weights for policy 0, policy_version 2827 (0.0045)
+[2024-11-07 15:12:56,028][04584] Fps is (10 sec: 5325.0, 60 sec: 5529.9, 300 sec: 4595.9). Total num frames: 11587584. Throughput: 0: 1441.0. Samples: 889341. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0)
+[2024-11-07 15:12:56,033][04584] Avg episode reward: [(0, '4.448')]
+[2024-11-07 15:13:00,537][09024] Updated weights for policy 0, policy_version 2837 (0.0054)
+[2024-11-07 15:13:01,030][04584] Fps is (10 sec: 6961.8, 60 sec: 5802.5, 300 sec: 4595.8). Total num frames: 11620352. Throughput: 0: 1518.0. Samples: 899619. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:13:01,037][04584] Avg episode reward: [(0, '4.399')]
+[2024-11-07 15:13:06,034][04584] Fps is (10 sec: 6139.8, 60 sec: 5938.5, 300 sec: 4595.9). Total num frames: 11649024. Throughput: 0: 1536.9. Samples: 908556. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0)
+[2024-11-07 15:13:06,038][04584] Avg episode reward: [(0, '4.576')]
+[2024-11-07 15:13:07,824][09024] Updated weights for policy 0, policy_version 2847 (0.0046)
+[2024-11-07 15:13:11,028][04584] Fps is (10 sec: 5735.9, 60 sec: 5871.0, 300 sec: 4623.6). Total num frames: 11677696. Throughput: 0: 1546.8. Samples: 913032. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:13:11,030][04584] Avg episode reward: [(0, '4.395')]
+[2024-11-07 15:13:11,245][09009] Saving /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000002852_11681792.pth...
+[2024-11-07 15:13:11,431][09009] Removing /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000002599_10645504.pth
+[2024-11-07 15:13:14,223][09024] Updated weights for policy 0, policy_version 2857 (0.0048)
+[2024-11-07 15:13:16,028][04584] Fps is (10 sec: 6148.2, 60 sec: 6144.0, 300 sec: 4637.5). Total num frames: 11710464. Throughput: 0: 1545.3. Samples: 922455. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:13:16,030][04584] Avg episode reward: [(0, '4.332')]
+[2024-11-07 15:13:20,452][09024] Updated weights for policy 0, policy_version 2867 (0.0048)
+[2024-11-07 15:13:22,886][04584] Fps is (10 sec: 5526.7, 60 sec: 6025.9, 300 sec: 4608.5). Total num frames: 11743232. Throughput: 0: 1485.1. Samples: 932661. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0)
+[2024-11-07 15:13:22,894][04584] Avg episode reward: [(0, '4.160')]
+[2024-11-07 15:13:26,028][04584] Fps is (10 sec: 5734.4, 60 sec: 6144.1, 300 sec: 4609.7). Total num frames: 11767808. Throughput: 0: 1475.2. Samples: 934206. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:13:26,030][04584] Avg episode reward: [(0, '4.440')]
+[2024-11-07 15:13:28,531][09024] Updated weights for policy 0, policy_version 2877 (0.0041)
+[2024-11-07 15:13:31,027][04584] Fps is (10 sec: 7043.1, 60 sec: 6144.6, 300 sec: 4609.7). Total num frames: 11800576. Throughput: 0: 1484.0. Samples: 944325. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0)
+[2024-11-07 15:13:31,030][04584] Avg episode reward: [(0, '4.327')]
+[2024-11-07 15:13:35,141][09024] Updated weights for policy 0, policy_version 2887 (0.0033)
+[2024-11-07 15:13:36,033][04584] Fps is (10 sec: 6140.8, 60 sec: 6007.7, 300 sec: 4595.8). Total num frames: 11829248. Throughput: 0: 1536.6. Samples: 953490. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:13:36,034][04584] Avg episode reward: [(0, '4.494')]
+[2024-11-07 15:13:41,035][04584] Fps is (10 sec: 6139.4, 60 sec: 6075.1, 300 sec: 4627.6). Total num frames: 11862016. Throughput: 0: 1525.2. Samples: 957987. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0)
+[2024-11-07 15:13:41,037][04584] Avg episode reward: [(0, '4.347')]
+[2024-11-07 15:13:41,220][09024] Updated weights for policy 0, policy_version 2897 (0.0051)
+[2024-11-07 15:13:46,028][04584] Fps is (10 sec: 6966.9, 60 sec: 6075.8, 300 sec: 4651.4). Total num frames: 11898880. Throughput: 0: 1542.1. Samples: 969009. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0)
+[2024-11-07 15:13:46,030][04584] Avg episode reward: [(0, '4.503')]
+[2024-11-07 15:13:46,718][09024] Updated weights for policy 0, policy_version 2907 (0.0032)
+[2024-11-07 15:13:51,028][04584] Fps is (10 sec: 7377.9, 60 sec: 6417.1, 300 sec: 4665.3). Total num frames: 11935744. Throughput: 0: 1589.0. Samples: 980049. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0)
+[2024-11-07 15:13:51,031][04584] Avg episode reward: [(0, '4.431')]
+[2024-11-07 15:13:52,617][09024] Updated weights for policy 0, policy_version 2917 (0.0039)
+[2024-11-07 15:13:57,206][04584] Fps is (10 sec: 5863.0, 60 sec: 6159.6, 300 sec: 4619.1). Total num frames: 11964416. Throughput: 0: 1561.5. Samples: 985140. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0)
+[2024-11-07 15:13:57,212][04584] Avg episode reward: [(0, '4.592')]
+[2024-11-07 15:14:00,312][09024] Updated weights for policy 0, policy_version 2927 (0.0028)
+[2024-11-07 15:14:01,028][04584] Fps is (10 sec: 5734.7, 60 sec: 6212.5, 300 sec: 4637.5). Total num frames: 11993088. Throughput: 0: 1551.5. Samples: 992271. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0)
+[2024-11-07 15:14:01,029][04584] Avg episode reward: [(0, '4.379')]
+[2024-11-07 15:14:06,028][04584] Fps is (10 sec: 6964.1, 60 sec: 6281.2, 300 sec: 4679.2). Total num frames: 12025856. Throughput: 0: 1607.6. Samples: 1002018. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0)
+[2024-11-07 15:14:06,030][04584] Avg episode reward: [(0, '4.472')]
+[2024-11-07 15:14:06,718][09024] Updated weights for policy 0, policy_version 2937 (0.0058)
+[2024-11-07 15:14:11,028][04584] Fps is (10 sec: 6963.2, 60 sec: 6417.1, 300 sec: 4734.7). Total num frames: 12062720. Throughput: 0: 1629.3. Samples: 1007526. Policy #0 lag: (min: 0.0, avg: 1.3, max: 4.0)
+[2024-11-07 15:14:11,033][04584] Avg episode reward: [(0, '4.317')]
+[2024-11-07 15:14:11,886][09024] Updated weights for policy 0, policy_version 2947 (0.0051)
+[2024-11-07 15:14:16,029][04584] Fps is (10 sec: 7372.4, 60 sec: 6485.2, 300 sec: 4826.3). Total num frames: 12099584. Throughput: 0: 1657.5. Samples: 1018914. Policy #0 lag: (min: 0.0, avg: 1.3, max: 4.0)
+[2024-11-07 15:14:16,031][04584] Avg episode reward: [(0, '4.249')]
+[2024-11-07 15:14:17,578][09024] Updated weights for policy 0, policy_version 2957 (0.0039)
+[2024-11-07 15:14:21,028][04584] Fps is (10 sec: 7372.8, 60 sec: 6763.0, 300 sec: 4873.5). Total num frames: 12136448. Throughput: 0: 1692.9. Samples: 1029663. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:14:21,029][04584] Avg episode reward: [(0, '4.486')]
+[2024-11-07 15:14:23,230][09024] Updated weights for policy 0, policy_version 2967 (0.0057)
+[2024-11-07 15:14:26,028][04584] Fps is (10 sec: 6963.8, 60 sec: 6690.1, 300 sec: 4901.3). Total num frames: 12169216. Throughput: 0: 1719.2. Samples: 1035336. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:14:26,034][04584] Avg episode reward: [(0, '4.601')]
+[2024-11-07 15:14:28,687][09024] Updated weights for policy 0, policy_version 2977 (0.0045)
+[2024-11-07 15:14:31,524][04584] Fps is (10 sec: 5463.0, 60 sec: 6499.8, 300 sec: 4906.9). Total num frames: 12193792. Throughput: 0: 1696.9. Samples: 1046211. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0)
+[2024-11-07 15:14:31,526][04584] Avg episode reward: [(0, '4.651')]
+[2024-11-07 15:14:36,028][04584] Fps is (10 sec: 6143.9, 60 sec: 6690.7, 300 sec: 4956.9). Total num frames: 12230656. Throughput: 0: 1634.1. Samples: 1053585. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:14:36,030][04584] Avg episode reward: [(0, '4.456')]
+[2024-11-07 15:14:36,569][09024] Updated weights for policy 0, policy_version 2987 (0.0041)
+[2024-11-07 15:14:41,028][04584] Fps is (10 sec: 6896.1, 60 sec: 6622.7, 300 sec: 4984.6). Total num frames: 12259328. Throughput: 0: 1685.2. Samples: 1058991. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0)
+[2024-11-07 15:14:41,031][04584] Avg episode reward: [(0, '4.358')]
+[2024-11-07 15:14:44,208][09024] Updated weights for policy 0, policy_version 2997 (0.0079)
+[2024-11-07 15:14:46,028][04584] Fps is (10 sec: 4915.3, 60 sec: 6348.8, 300 sec: 4998.5). Total num frames: 12279808. Throughput: 0: 1637.1. Samples: 1065942. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0)
+[2024-11-07 15:14:46,036][04584] Avg episode reward: [(0, '4.494')]
+[2024-11-07 15:14:51,037][04584] Fps is (10 sec: 5319.7, 60 sec: 6279.6, 300 sec: 5080.0). Total num frames: 12312576. Throughput: 0: 1610.5. Samples: 1074504. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:14:51,040][04584] Avg episode reward: [(0, '4.573')]
+[2024-11-07 15:14:51,137][09024] Updated weights for policy 0, policy_version 3007 (0.0068)
+[2024-11-07 15:14:56,028][04584] Fps is (10 sec: 7372.9, 60 sec: 6615.2, 300 sec: 5151.2). Total num frames: 12353536. Throughput: 0: 1614.2. Samples: 1080165. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:14:56,030][04584] Avg episode reward: [(0, '4.318')]
+[2024-11-07 15:14:56,610][09024] Updated weights for policy 0, policy_version 3017 (0.0036)
+[2024-11-07 15:15:01,028][04584] Fps is (10 sec: 7379.8, 60 sec: 6553.6, 300 sec: 5207.4). Total num frames: 12386304. Throughput: 0: 1603.7. Samples: 1091079. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:15:01,033][04584] Avg episode reward: [(0, '4.576')]
+[2024-11-07 15:15:02,672][09024] Updated weights for policy 0, policy_version 3027 (0.0045)
+[2024-11-07 15:15:06,031][04584] Fps is (10 sec: 5323.2, 60 sec: 6348.5, 300 sec: 5206.8). Total num frames: 12406784. Throughput: 0: 1540.1. Samples: 1098972. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0)
+[2024-11-07 15:15:06,033][04584] Avg episode reward: [(0, '4.521')]
+[2024-11-07 15:15:10,389][09024] Updated weights for policy 0, policy_version 3037 (0.0042)
+[2024-11-07 15:15:11,029][04584] Fps is (10 sec: 5324.3, 60 sec: 6280.4, 300 sec: 5248.4). Total num frames: 12439552. Throughput: 0: 1499.1. Samples: 1102797. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:15:11,031][04584] Avg episode reward: [(0, '4.354')]
+[2024-11-07 15:15:11,268][09009] Saving /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000003038_12443648.pth...
+[2024-11-07 15:15:11,509][09009] Removing /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000002706_11083776.pth
+[2024-11-07 15:15:16,028][04584] Fps is (10 sec: 6555.2, 60 sec: 6212.3, 300 sec: 5290.1). Total num frames: 12472320. Throughput: 0: 1503.8. Samples: 1113135. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:15:16,032][04584] Avg episode reward: [(0, '4.369')]
+[2024-11-07 15:15:16,987][09024] Updated weights for policy 0, policy_version 3047 (0.0049)
+[2024-11-07 15:15:21,028][04584] Fps is (10 sec: 6554.3, 60 sec: 6144.0, 300 sec: 5345.6). Total num frames: 12505088. Throughput: 0: 1518.1. Samples: 1121901. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:15:21,030][04584] Avg episode reward: [(0, '4.181')]
+[2024-11-07 15:15:24,057][09024] Updated weights for policy 0, policy_version 3057 (0.0030)
+[2024-11-07 15:15:26,029][04584] Fps is (10 sec: 6143.3, 60 sec: 6075.6, 300 sec: 5415.0). Total num frames: 12533760. Throughput: 0: 1497.6. Samples: 1126386. Policy #0 lag: (min: 0.0, avg: 1.2, max: 4.0)
+[2024-11-07 15:15:26,031][04584] Avg episode reward: [(0, '4.241')]
+[2024-11-07 15:15:29,604][09024] Updated weights for policy 0, policy_version 3067 (0.0041)
+[2024-11-07 15:15:31,030][04584] Fps is (10 sec: 6551.9, 60 sec: 6332.7, 300 sec: 5484.4). Total num frames: 12570624. Throughput: 0: 1579.4. Samples: 1137018. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:15:31,040][04584] Avg episode reward: [(0, '4.564')]
+[2024-11-07 15:15:35,284][09024] Updated weights for policy 0, policy_version 3077 (0.0032)
+[2024-11-07 15:15:36,030][04584] Fps is (10 sec: 7372.0, 60 sec: 6280.3, 300 sec: 5553.8). Total num frames: 12607488. Throughput: 0: 1630.2. Samples: 1147851. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:15:36,032][04584] Avg episode reward: [(0, '4.505')]
+[2024-11-07 15:15:41,028][04584] Fps is (10 sec: 5735.8, 60 sec: 6144.0, 300 sec: 5553.9). Total num frames: 12627968. Throughput: 0: 1625.3. Samples: 1153302. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:15:41,030][04584] Avg episode reward: [(0, '4.570')]
+[2024-11-07 15:15:42,967][09024] Updated weights for policy 0, policy_version 3087 (0.0043)
+[2024-11-07 15:15:46,028][04584] Fps is (10 sec: 5735.9, 60 sec: 6417.0, 300 sec: 5609.4). Total num frames: 12664832. Throughput: 0: 1542.5. Samples: 1160490. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0)
+[2024-11-07 15:15:46,030][04584] Avg episode reward: [(0, '4.759')]
+[2024-11-07 15:15:49,866][09024] Updated weights for policy 0, policy_version 3097 (0.0048)
+[2024-11-07 15:15:51,032][04584] Fps is (10 sec: 6141.4, 60 sec: 6281.1, 300 sec: 5623.2). Total num frames: 12689408. Throughput: 0: 1555.1. Samples: 1168953. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0)
+[2024-11-07 15:15:51,035][04584] Avg episode reward: [(0, '4.603')]
+[2024-11-07 15:15:56,030][04584] Fps is (10 sec: 4914.2, 60 sec: 6007.2, 300 sec: 5637.2). Total num frames: 12713984. Throughput: 0: 1541.6. Samples: 1172169. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:15:56,034][04584] Avg episode reward: [(0, '4.540')]
+[2024-11-07 15:15:58,053][09024] Updated weights for policy 0, policy_version 3107 (0.0034)
+[2024-11-07 15:16:01,028][04584] Fps is (10 sec: 4917.4, 60 sec: 5871.0, 300 sec: 5692.7). Total num frames: 12738560. Throughput: 0: 1491.2. Samples: 1180236. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:16:01,031][04584] Avg episode reward: [(0, '4.267')]
+[2024-11-07 15:16:06,033][04584] Fps is (10 sec: 4504.1, 60 sec: 5870.7, 300 sec: 5692.8). Total num frames: 12759040. Throughput: 0: 1437.4. Samples: 1186593. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:16:06,038][04584] Avg episode reward: [(0, '4.296')]
+[2024-11-07 15:16:07,478][09024] Updated weights for policy 0, policy_version 3117 (0.0062)
+[2024-11-07 15:16:11,030][04584] Fps is (10 sec: 4095.1, 60 sec: 5666.0, 300 sec: 5678.8). Total num frames: 12779520. Throughput: 0: 1411.1. Samples: 1189884. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:16:11,032][04584] Avg episode reward: [(0, '4.350')]
+[2024-11-07 15:16:16,027][04584] Fps is (10 sec: 3278.6, 60 sec: 5324.9, 300 sec: 5637.2). Total num frames: 12791808. Throughput: 0: 1267.9. Samples: 1194069. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:16:16,039][04584] Avg episode reward: [(0, '4.416')]
+[2024-11-07 15:16:19,334][09024] Updated weights for policy 0, policy_version 3127 (0.0075)
+[2024-11-07 15:16:21,028][04584] Fps is (10 sec: 3277.4, 60 sec: 5120.0, 300 sec: 5637.2). Total num frames: 12812288. Throughput: 0: 1162.9. Samples: 1200180. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:16:21,035][04584] Avg episode reward: [(0, '4.512')]
+[2024-11-07 15:16:26,028][04584] Fps is (10 sec: 4505.5, 60 sec: 5051.9, 300 sec: 5651.3). Total num frames: 12836864. Throughput: 0: 1108.3. Samples: 1203174. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:16:26,031][04584] Avg episode reward: [(0, '4.481')]
+[2024-11-07 15:16:29,455][09024] Updated weights for policy 0, policy_version 3137 (0.0076)
+[2024-11-07 15:16:31,034][04584] Fps is (10 sec: 4093.6, 60 sec: 4710.1, 300 sec: 5651.0). Total num frames: 12853248. Throughput: 0: 1089.5. Samples: 1209522. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0)
+[2024-11-07 15:16:31,035][04584] Avg episode reward: [(0, '4.504')]
+[2024-11-07 15:16:36,028][04584] Fps is (10 sec: 4096.1, 60 sec: 4505.8, 300 sec: 5692.8). Total num frames: 12877824. Throughput: 0: 1049.7. Samples: 1216185. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:16:36,038][04584] Avg episode reward: [(0, '4.367')]
+[2024-11-07 15:16:37,390][09024] Updated weights for policy 0, policy_version 3147 (0.0072)
+[2024-11-07 15:16:41,027][04584] Fps is (10 sec: 6147.7, 60 sec: 4778.7, 300 sec: 5762.2). Total num frames: 12914688. Throughput: 0: 1088.7. Samples: 1221156. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:16:41,029][04584] Avg episode reward: [(0, '4.351')]
+[2024-11-07 15:16:42,957][09024] Updated weights for policy 0, policy_version 3157 (0.0072)
+[2024-11-07 15:16:46,028][04584] Fps is (10 sec: 7372.8, 60 sec: 4778.7, 300 sec: 5803.8). Total num frames: 12951552. Throughput: 0: 1153.2. Samples: 1232130. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:16:46,029][04584] Avg episode reward: [(0, '4.437')]
+[2024-11-07 15:16:50,809][09024] Updated weights for policy 0, policy_version 3167 (0.0060)
+[2024-11-07 15:16:51,031][04584] Fps is (10 sec: 5732.6, 60 sec: 4710.5, 300 sec: 5817.7). Total num frames: 12972032. Throughput: 0: 1166.0. Samples: 1239060. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0)
+[2024-11-07 15:16:51,033][04584] Avg episode reward: [(0, '4.411')]
+[2024-11-07 15:16:56,028][04584] Fps is (10 sec: 5734.1, 60 sec: 4915.3, 300 sec: 5887.1). Total num frames: 13008896. Throughput: 0: 1216.6. Samples: 1244631. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:16:56,034][04584] Avg episode reward: [(0, '4.402')]
+[2024-11-07 15:16:56,200][09024] Updated weights for policy 0, policy_version 3177 (0.0044)
+[2024-11-07 15:17:01,029][04584] Fps is (10 sec: 7783.8, 60 sec: 5188.2, 300 sec: 5956.5). Total num frames: 13049856. Throughput: 0: 1382.7. Samples: 1256292. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0)
+[2024-11-07 15:17:01,031][04584] Avg episode reward: [(0, '4.430')]
+[2024-11-07 15:17:01,393][09024] Updated weights for policy 0, policy_version 3187 (0.0050)
+[2024-11-07 15:17:06,028][04584] Fps is (10 sec: 7782.9, 60 sec: 5461.8, 300 sec: 5970.5). Total num frames: 13086720. Throughput: 0: 1490.1. Samples: 1267233. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:17:06,029][04584] Avg episode reward: [(0, '4.488')]
+[2024-11-07 15:17:07,166][09024] Updated weights for policy 0, policy_version 3197 (0.0034)
+[2024-11-07 15:17:11,028][04584] Fps is (10 sec: 7373.7, 60 sec: 5734.6, 300 sec: 6039.9). Total num frames: 13123584. Throughput: 0: 1548.8. Samples: 1272870. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:17:11,029][04584] Avg episode reward: [(0, '4.295')]
+[2024-11-07 15:17:11,242][09009] Saving /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000003205_13127680.pth...
+[2024-11-07 15:17:11,341][09009] Removing /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000002852_11681792.pth
+[2024-11-07 15:17:12,310][09024] Updated weights for policy 0, policy_version 3207 (0.0045)
+[2024-11-07 15:17:16,028][04584] Fps is (10 sec: 7372.8, 60 sec: 6144.0, 300 sec: 6067.7). Total num frames: 13160448. Throughput: 0: 1663.0. Samples: 1284345. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0)
+[2024-11-07 15:17:16,029][04584] Avg episode reward: [(0, '4.660')]
+[2024-11-07 15:17:17,924][09024] Updated weights for policy 0, policy_version 3217 (0.0044)
+[2024-11-07 15:17:21,028][04584] Fps is (10 sec: 6963.1, 60 sec: 6348.8, 300 sec: 6081.5). Total num frames: 13193216. Throughput: 0: 1750.2. Samples: 1294944. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0)
+[2024-11-07 15:17:21,039][04584] Avg episode reward: [(0, '4.639')]
+[2024-11-07 15:17:26,029][04584] Fps is (10 sec: 5324.3, 60 sec: 6280.5, 300 sec: 6040.0). Total num frames: 13213696. Throughput: 0: 1676.6. Samples: 1296603. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0)
+[2024-11-07 15:17:26,032][04584] Avg episode reward: [(0, '4.504')]
+[2024-11-07 15:17:26,557][09024] Updated weights for policy 0, policy_version 3227 (0.0042)
+[2024-11-07 15:17:31,028][04584] Fps is (10 sec: 5734.2, 60 sec: 6622.5, 300 sec: 6040.0). Total num frames: 13250560. Throughput: 0: 1658.7. Samples: 1306773. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:17:31,030][04584] Avg episode reward: [(0, '4.591')]
+[2024-11-07 15:17:31,592][09024] Updated weights for policy 0, policy_version 3237 (0.0043)
+[2024-11-07 15:17:36,028][04584] Fps is (10 sec: 6963.7, 60 sec: 6758.4, 300 sec: 6053.8). Total num frames: 13283328. Throughput: 0: 1743.0. Samples: 1317489. Policy #0 lag: (min: 0.0, avg: 1.1, max: 4.0)
+[2024-11-07 15:17:36,039][04584] Avg episode reward: [(0, '4.412')]
+[2024-11-07 15:17:40,291][09024] Updated weights for policy 0, policy_version 3247 (0.0078)
+[2024-11-07 15:17:41,030][04584] Fps is (10 sec: 4914.0, 60 sec: 6416.8, 300 sec: 5984.3). Total num frames: 13299712. Throughput: 0: 1683.3. Samples: 1320381. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:17:41,034][04584] Avg episode reward: [(0, '4.433')]
+[2024-11-07 15:17:46,035][04584] Fps is (10 sec: 3274.6, 60 sec: 6075.0, 300 sec: 5984.2). Total num frames: 13316096. Throughput: 0: 1530.9. Samples: 1325190. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0)
+[2024-11-07 15:17:46,038][04584] Avg episode reward: [(0, '4.402')]
+[2024-11-07 15:17:51,032][04584] Fps is (10 sec: 3685.8, 60 sec: 6075.6, 300 sec: 5928.7). Total num frames: 13336576. Throughput: 0: 1412.1. Samples: 1330785. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0)
+[2024-11-07 15:17:51,040][04584] Avg episode reward: [(0, '4.321')]
+[2024-11-07 15:17:51,466][09024] Updated weights for policy 0, policy_version 3257 (0.0066)
+[2024-11-07 15:17:57,446][04584] Fps is (10 sec: 2871.7, 60 sec: 5535.4, 300 sec: 5831.4). Total num frames: 13348864. Throughput: 0: 1311.9. Samples: 1333767. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0)
+[2024-11-07 15:17:57,457][04584] Avg episode reward: [(0, '4.297')]
+[2024-11-07 15:18:01,037][04584] Fps is (10 sec: 2865.7, 60 sec: 5255.8, 300 sec: 5817.6). Total num frames: 13365248. Throughput: 0: 1179.2. Samples: 1337421. Policy #0 lag: (min: 0.0, avg: 0.9, max: 4.0)
+[2024-11-07 15:18:01,041][04584] Avg episode reward: [(0, '4.410')]
+[2024-11-07 15:18:06,028][04584] Fps is (10 sec: 2863.6, 60 sec: 4778.6, 300 sec: 5748.3). Total num frames: 13373440. Throughput: 0: 1028.0. Samples: 1341204. Policy #0 lag: (min: 0.0, avg: 1.6, max: 3.0)
+[2024-11-07 15:18:06,035][04584] Avg episode reward: [(0, '4.401')]
+[2024-11-07 15:18:06,827][09024] Updated weights for policy 0, policy_version 3267 (0.0073)
+[2024-11-07 15:18:11,028][04584] Fps is (10 sec: 3279.8, 60 sec: 4573.8, 300 sec: 5720.5). Total num frames: 13398016. Throughput: 0: 1053.3. Samples: 1344000. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0)
+[2024-11-07 15:18:11,035][04584] Avg episode reward: [(0, '4.495')]
+[2024-11-07 15:18:16,033][04584] Fps is (10 sec: 4503.3, 60 sec: 4300.4, 300 sec: 5714.8). Total num frames: 13418496. Throughput: 0: 978.6. Samples: 1350813. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:18:16,047][04584] Avg episode reward: [(0, '4.469')]
+[2024-11-07 15:18:16,227][09024] Updated weights for policy 0, policy_version 3277 (0.0046)
+[2024-11-07 15:18:21,028][04584] Fps is (10 sec: 4096.1, 60 sec: 4096.0, 300 sec: 5665.0). Total num frames: 13438976. Throughput: 0: 873.2. Samples: 1356783. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0)
+[2024-11-07 15:18:21,031][04584] Avg episode reward: [(0, '4.397')]
+[2024-11-07 15:18:25,955][09024] Updated weights for policy 0, policy_version 3287 (0.0095)
+[2024-11-07 15:18:26,028][04584] Fps is (10 sec: 4507.9, 60 sec: 4164.3, 300 sec: 5637.2). Total num frames: 13463552. Throughput: 0: 880.2. Samples: 1359987. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:18:26,029][04584] Avg episode reward: [(0, '4.511')]
+[2024-11-07 15:18:31,786][04584] Fps is (10 sec: 3045.8, 60 sec: 3640.4, 300 sec: 5553.6). Total num frames: 13471744. Throughput: 0: 882.3. Samples: 1365555. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:18:31,802][04584] Avg episode reward: [(0, '4.441')]
+[2024-11-07 15:18:36,028][04584] Fps is (10 sec: 2457.6, 60 sec: 3413.3, 300 sec: 5512.4). Total num frames: 13488128. Throughput: 0: 844.5. Samples: 1368783. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:18:36,046][04584] Avg episode reward: [(0, '4.248')]
+[2024-11-07 15:18:40,005][09024] Updated weights for policy 0, policy_version 3297 (0.0126)
+[2024-11-07 15:18:41,028][04584] Fps is (10 sec: 3545.7, 60 sec: 3413.5, 300 sec: 5442.8). Total num frames: 13504512. Throughput: 0: 868.6. Samples: 1371624. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0)
+[2024-11-07 15:18:41,031][04584] Avg episode reward: [(0, '4.406')]
+[2024-11-07 15:18:46,028][04584] Fps is (10 sec: 4095.8, 60 sec: 3550.2, 300 sec: 5401.2). Total num frames: 13529088. Throughput: 0: 893.7. Samples: 1377630. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:18:46,041][04584] Avg episode reward: [(0, '4.452')]
+[2024-11-07 15:18:49,850][09024] Updated weights for policy 0, policy_version 3307 (0.0041)
+[2024-11-07 15:18:51,028][04584] Fps is (10 sec: 4095.8, 60 sec: 3481.8, 300 sec: 5381.0). Total num frames: 13545472. Throughput: 0: 942.9. Samples: 1383636. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:18:51,037][04584] Avg episode reward: [(0, '4.478')]
+[2024-11-07 15:18:56,031][04584] Fps is (10 sec: 4504.3, 60 sec: 3845.3, 300 sec: 5359.4). Total num frames: 13574144. Throughput: 0: 959.2. Samples: 1387167. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:18:56,040][04584] Avg episode reward: [(0, '4.499')]
+[2024-11-07 15:18:57,652][09024] Updated weights for policy 0, policy_version 3317 (0.0054)
+[2024-11-07 15:19:01,028][04584] Fps is (10 sec: 5734.7, 60 sec: 3960.1, 300 sec: 5345.6). Total num frames: 13602816. Throughput: 0: 1009.3. Samples: 1396227. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:19:01,042][04584] Avg episode reward: [(0, '4.339')]
+[2024-11-07 15:19:06,270][04584] Fps is (10 sec: 4800.8, 60 sec: 4147.6, 300 sec: 5285.8). Total num frames: 13623296. Throughput: 0: 962.7. Samples: 1400337. Policy #0 lag: (min: 0.0, avg: 1.2, max: 4.0)
+[2024-11-07 15:19:06,275][04584] Avg episode reward: [(0, '4.364')]
+[2024-11-07 15:19:06,706][09024] Updated weights for policy 0, policy_version 3327 (0.0078)
+[2024-11-07 15:19:11,028][04584] Fps is (10 sec: 4915.3, 60 sec: 4232.6, 300 sec: 5262.3). Total num frames: 13651968. Throughput: 0: 1026.0. Samples: 1406157. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0)
+[2024-11-07 15:19:11,031][04584] Avg episode reward: [(0, '4.456')]
+[2024-11-07 15:19:11,165][09009] Saving /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000003333_13651968.pth...
+[2024-11-07 15:19:11,634][09009] Removing /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000003038_12443648.pth
+[2024-11-07 15:19:13,368][09024] Updated weights for policy 0, policy_version 3337 (0.0061)
+[2024-11-07 15:19:16,029][04584] Fps is (10 sec: 6295.7, 60 sec: 4437.6, 300 sec: 5248.4). Total num frames: 13684736. Throughput: 0: 1129.8. Samples: 1415538. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0)
+[2024-11-07 15:19:16,031][04584] Avg episode reward: [(0, '4.392')]
+[2024-11-07 15:19:19,544][09024] Updated weights for policy 0, policy_version 3347 (0.0048)
+[2024-11-07 15:19:21,028][04584] Fps is (10 sec: 6553.6, 60 sec: 4642.2, 300 sec: 5248.4). Total num frames: 13717504. Throughput: 0: 1260.9. Samples: 1425525. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:19:21,030][04584] Avg episode reward: [(0, '4.638')]
+[2024-11-07 15:19:25,100][09024] Updated weights for policy 0, policy_version 3357 (0.0035)
+[2024-11-07 15:19:26,027][04584] Fps is (10 sec: 7373.7, 60 sec: 4915.2, 300 sec: 5312.9). Total num frames: 13758464. Throughput: 0: 1307.5. Samples: 1430463. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0)
+[2024-11-07 15:19:26,029][04584] Avg episode reward: [(0, '4.403')]
+[2024-11-07 15:19:30,049][09024] Updated weights for policy 0, policy_version 3367 (0.0042)
+[2024-11-07 15:19:31,028][04584] Fps is (10 sec: 8192.0, 60 sec: 5531.3, 300 sec: 5317.9). Total num frames: 13799424. Throughput: 0: 1453.4. Samples: 1443030. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:19:31,030][04584] Avg episode reward: [(0, '4.426')]
+[2024-11-07 15:19:35,383][09024] Updated weights for policy 0, policy_version 3377 (0.0046)
+[2024-11-07 15:19:36,028][04584] Fps is (10 sec: 7782.2, 60 sec: 5802.7, 300 sec: 5345.6). Total num frames: 13836288. Throughput: 0: 1583.3. Samples: 1454883. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:19:36,030][04584] Avg episode reward: [(0, '4.432')]
+[2024-11-07 15:19:41,028][04584] Fps is (10 sec: 5734.0, 60 sec: 5870.9, 300 sec: 5345.6). Total num frames: 13856768. Throughput: 0: 1627.3. Samples: 1460391. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:19:41,031][04584] Avg episode reward: [(0, '4.316')]
+[2024-11-07 15:19:43,198][09024] Updated weights for policy 0, policy_version 3387 (0.0046)
+[2024-11-07 15:19:46,028][04584] Fps is (10 sec: 5734.4, 60 sec: 6075.8, 300 sec: 5359.7). Total num frames: 13893632. Throughput: 0: 1574.5. Samples: 1467081. Policy #0 lag: (min: 0.0, avg: 1.4, max: 2.0)
+[2024-11-07 15:19:46,031][04584] Avg episode reward: [(0, '4.465')]
+[2024-11-07 15:19:48,791][09024] Updated weights for policy 0, policy_version 3397 (0.0040)
+[2024-11-07 15:19:51,029][04584] Fps is (10 sec: 7372.6, 60 sec: 6417.0, 300 sec: 5345.6). Total num frames: 13930496. Throughput: 0: 1741.6. Samples: 1478289. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0)
+[2024-11-07 15:19:51,031][04584] Avg episode reward: [(0, '4.315')]
+[2024-11-07 15:19:56,032][04584] Fps is (10 sec: 5732.1, 60 sec: 6280.5, 300 sec: 5303.9). Total num frames: 13950976. Throughput: 0: 1703.6. Samples: 1482825. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0)
+[2024-11-07 15:19:56,033][04584] Avg episode reward: [(0, '4.279')]
+[2024-11-07 15:19:56,173][09024] Updated weights for policy 0, policy_version 3407 (0.0061)
+[2024-11-07 15:20:01,028][04584] Fps is (10 sec: 5325.3, 60 sec: 6348.8, 300 sec: 5345.7). Total num frames: 13983744. Throughput: 0: 1658.4. Samples: 1490166. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:20:01,038][04584] Avg episode reward: [(0, '4.491')]
+[2024-11-07 15:20:03,670][09024] Updated weights for policy 0, policy_version 3417 (0.0068)
+[2024-11-07 15:20:06,028][04584] Fps is (10 sec: 5736.7, 60 sec: 6443.0, 300 sec: 5317.9). Total num frames: 14008320. Throughput: 0: 1613.7. Samples: 1498143. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0)
+[2024-11-07 15:20:06,033][04584] Avg episode reward: [(0, '4.486')]
+[2024-11-07 15:20:10,251][09024] Updated weights for policy 0, policy_version 3427 (0.0055)
+[2024-11-07 15:20:11,031][04584] Fps is (10 sec: 5323.5, 60 sec: 6416.8, 300 sec: 5303.9). Total num frames: 14036992. Throughput: 0: 1615.9. Samples: 1503183. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:20:11,033][04584] Avg episode reward: [(0, '4.214')]
+[2024-11-07 15:20:16,028][04584] Fps is (10 sec: 4505.5, 60 sec: 6144.1, 300 sec: 5248.4). Total num frames: 14053376. Throughput: 0: 1494.3. Samples: 1510272. Policy #0 lag: (min: 0.0, avg: 1.5, max: 2.0)
+[2024-11-07 15:20:16,029][04584] Avg episode reward: [(0, '4.210')]
+[2024-11-07 15:20:19,639][09024] Updated weights for policy 0, policy_version 3437 (0.0053)
+[2024-11-07 15:20:21,033][04584] Fps is (10 sec: 4915.4, 60 sec: 6143.8, 300 sec: 5262.3). Total num frames: 14086144. Throughput: 0: 1395.5. Samples: 1517682. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:20:21,037][04584] Avg episode reward: [(0, '4.434')]
+[2024-11-07 15:20:26,028][04584] Fps is (10 sec: 6143.8, 60 sec: 5939.1, 300 sec: 5234.6). Total num frames: 14114816. Throughput: 0: 1373.1. Samples: 1522182. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:20:26,032][04584] Avg episode reward: [(0, '4.349')]
+[2024-11-07 15:20:26,131][09024] Updated weights for policy 0, policy_version 3447 (0.0076)
+[2024-11-07 15:20:31,028][04584] Fps is (10 sec: 6144.9, 60 sec: 5802.6, 300 sec: 5220.7). Total num frames: 14147584. Throughput: 0: 1439.2. Samples: 1531848. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:20:31,031][04584] Avg episode reward: [(0, '4.535')]
+[2024-11-07 15:20:32,703][09024] Updated weights for policy 0, policy_version 3457 (0.0041)
+[2024-11-07 15:20:36,028][04584] Fps is (10 sec: 6554.1, 60 sec: 5734.4, 300 sec: 5262.3). Total num frames: 14180352. Throughput: 0: 1398.4. Samples: 1541214. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:20:36,030][04584] Avg episode reward: [(0, '4.496')]
+[2024-11-07 15:20:38,720][09024] Updated weights for policy 0, policy_version 3467 (0.0053)
+[2024-11-07 15:20:41,028][04584] Fps is (10 sec: 6553.8, 60 sec: 5939.2, 300 sec: 5248.4). Total num frames: 14213120. Throughput: 0: 1413.5. Samples: 1546428. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0)
+[2024-11-07 15:20:41,042][04584] Avg episode reward: [(0, '4.401')]
+[2024-11-07 15:20:45,336][09024] Updated weights for policy 0, policy_version 3477 (0.0039)
+[2024-11-07 15:20:46,028][04584] Fps is (10 sec: 6143.7, 60 sec: 5802.6, 300 sec: 5262.4). Total num frames: 14241792. Throughput: 0: 1449.3. Samples: 1555386. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0)
+[2024-11-07 15:20:46,041][04584] Avg episode reward: [(0, '4.415')]
+[2024-11-07 15:20:51,028][04584] Fps is (10 sec: 4505.7, 60 sec: 5461.4, 300 sec: 5234.6). Total num frames: 14258176. Throughput: 0: 1400.9. Samples: 1561185. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0)
+[2024-11-07 15:20:51,034][04584] Avg episode reward: [(0, '4.540')]
+[2024-11-07 15:20:54,462][09024] Updated weights for policy 0, policy_version 3487 (0.0060)
+[2024-11-07 15:20:56,031][04584] Fps is (10 sec: 4913.8, 60 sec: 5666.2, 300 sec: 5262.3). Total num frames: 14290944. Throughput: 0: 1396.8. Samples: 1566039. Policy #0 lag: (min: 0.0, avg: 1.6, max: 3.0)
+[2024-11-07 15:20:56,042][04584] Avg episode reward: [(0, '4.524')]
+[2024-11-07 15:21:01,028][04584] Fps is (10 sec: 5734.5, 60 sec: 5529.6, 300 sec: 5276.3). Total num frames: 14315520. Throughput: 0: 1413.1. Samples: 1573863. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:21:01,031][04584] Avg episode reward: [(0, '4.498')]
+[2024-11-07 15:21:02,337][09024] Updated weights for policy 0, policy_version 3497 (0.0062)
+[2024-11-07 15:21:06,028][04584] Fps is (10 sec: 4916.8, 60 sec: 5529.6, 300 sec: 5290.1). Total num frames: 14340096. Throughput: 0: 1409.3. Samples: 1581096. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0)
+[2024-11-07 15:21:06,031][04584] Avg episode reward: [(0, '4.470')]
+[2024-11-07 15:21:09,697][09024] Updated weights for policy 0, policy_version 3507 (0.0050)
+[2024-11-07 15:21:11,028][04584] Fps is (10 sec: 5324.8, 60 sec: 5529.8, 300 sec: 5345.6). Total num frames: 14368768. Throughput: 0: 1419.2. Samples: 1586046. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:21:11,039][04584] Avg episode reward: [(0, '4.391')]
+[2024-11-07 15:21:11,183][09009] Saving /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000003509_14372864.pth...
+[2024-11-07 15:21:11,472][09009] Removing /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000003205_13127680.pth
+[2024-11-07 15:21:16,028][04584] Fps is (10 sec: 5734.2, 60 sec: 5734.4, 300 sec: 5373.4). Total num frames: 14397440. Throughput: 0: 1388.4. Samples: 1594326. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:21:16,035][04584] Avg episode reward: [(0, '4.430')]
+[2024-11-07 15:21:16,959][09024] Updated weights for policy 0, policy_version 3517 (0.0056)
+[2024-11-07 15:21:21,028][04584] Fps is (10 sec: 5734.3, 60 sec: 5666.3, 300 sec: 5387.3). Total num frames: 14426112. Throughput: 0: 1373.9. Samples: 1603041. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0)
+[2024-11-07 15:21:21,030][04584] Avg episode reward: [(0, '4.463')]
+[2024-11-07 15:21:26,028][04584] Fps is (10 sec: 4505.6, 60 sec: 5461.4, 300 sec: 5387.4). Total num frames: 14442496. Throughput: 0: 1323.1. Samples: 1605966. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:21:26,031][04584] Avg episode reward: [(0, '4.369')]
+[2024-11-07 15:21:26,572][09024] Updated weights for policy 0, policy_version 3527 (0.0072)
+[2024-11-07 15:21:31,029][04584] Fps is (10 sec: 4505.0, 60 sec: 5393.0, 300 sec: 5401.1). Total num frames: 14471168. Throughput: 0: 1270.9. Samples: 1612578. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0)
+[2024-11-07 15:21:31,031][04584] Avg episode reward: [(0, '4.422')]
+[2024-11-07 15:21:32,854][09024] Updated weights for policy 0, policy_version 3537 (0.0041)
+[2024-11-07 15:21:36,028][04584] Fps is (10 sec: 6553.3, 60 sec: 5461.3, 300 sec: 5401.1). Total num frames: 14508032. Throughput: 0: 1370.6. Samples: 1622862. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:21:36,030][04584] Avg episode reward: [(0, '4.480')]
+[2024-11-07 15:21:38,882][09024] Updated weights for policy 0, policy_version 3547 (0.0037)
+[2024-11-07 15:21:41,028][04584] Fps is (10 sec: 6964.0, 60 sec: 5461.3, 300 sec: 5387.3). Total num frames: 14540800. Throughput: 0: 1373.6. Samples: 1627848. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:21:41,031][04584] Avg episode reward: [(0, '4.407')]
+[2024-11-07 15:21:45,700][09024] Updated weights for policy 0, policy_version 3557 (0.0073)
+[2024-11-07 15:21:46,028][04584] Fps is (10 sec: 6144.4, 60 sec: 5461.3, 300 sec: 5415.1). Total num frames: 14569472. Throughput: 0: 1403.5. Samples: 1637022. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:21:46,032][04584] Avg episode reward: [(0, '4.427')]
+[2024-11-07 15:21:51,028][04584] Fps is (10 sec: 6553.7, 60 sec: 5802.7, 300 sec: 5415.1). Total num frames: 14606336. Throughput: 0: 1468.6. Samples: 1647183. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:21:51,030][04584] Avg episode reward: [(0, '4.294')]
+[2024-11-07 15:21:51,480][09024] Updated weights for policy 0, policy_version 3567 (0.0047)
+[2024-11-07 15:21:56,028][04584] Fps is (10 sec: 6963.1, 60 sec: 5802.9, 300 sec: 5387.3). Total num frames: 14639104. Throughput: 0: 1479.7. Samples: 1652631. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:21:56,037][04584] Avg episode reward: [(0, '4.431')]
+[2024-11-07 15:21:59,534][09024] Updated weights for policy 0, policy_version 3577 (0.0050)
+[2024-11-07 15:22:01,029][04584] Fps is (10 sec: 5324.4, 60 sec: 5734.3, 300 sec: 5331.7). Total num frames: 14659584. Throughput: 0: 1446.8. Samples: 1659432. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:22:01,033][04584] Avg episode reward: [(0, '4.272')]
+[2024-11-07 15:22:06,031][04584] Fps is (10 sec: 4913.5, 60 sec: 5802.3, 300 sec: 5303.9). Total num frames: 14688256. Throughput: 0: 1439.5. Samples: 1667823. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0)
+[2024-11-07 15:22:06,033][04584] Avg episode reward: [(0, '4.296')]
+[2024-11-07 15:22:06,795][09024] Updated weights for policy 0, policy_version 3587 (0.0040)
+[2024-11-07 15:22:11,027][04584] Fps is (10 sec: 5735.0, 60 sec: 5802.7, 300 sec: 5276.2). Total num frames: 14716928. Throughput: 0: 1472.8. Samples: 1672242. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0)
+[2024-11-07 15:22:11,030][04584] Avg episode reward: [(0, '4.152')]
+[2024-11-07 15:22:12,703][09024] Updated weights for policy 0, policy_version 3597 (0.0034)
+[2024-11-07 15:22:16,029][04584] Fps is (10 sec: 6145.8, 60 sec: 5870.9, 300 sec: 5276.2). Total num frames: 14749696. Throughput: 0: 1566.5. Samples: 1683072. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:22:16,030][04584] Avg episode reward: [(0, '4.296')]
+[2024-11-07 15:22:20,096][09024] Updated weights for policy 0, policy_version 3607 (0.0047)
+[2024-11-07 15:22:21,028][04584] Fps is (10 sec: 5734.2, 60 sec: 5802.7, 300 sec: 5290.1). Total num frames: 14774272. Throughput: 0: 1506.8. Samples: 1690665. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0)
+[2024-11-07 15:22:21,031][04584] Avg episode reward: [(0, '4.260')]
+[2024-11-07 15:22:26,028][04584] Fps is (10 sec: 4505.9, 60 sec: 5871.0, 300 sec: 5234.6). Total num frames: 14794752. Throughput: 0: 1458.4. Samples: 1693476. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:22:26,031][04584] Avg episode reward: [(0, '4.190')]
+[2024-11-07 15:22:29,189][09024] Updated weights for policy 0, policy_version 3617 (0.0069)
+[2024-11-07 15:22:31,030][04584] Fps is (10 sec: 4914.7, 60 sec: 5871.0, 300 sec: 5220.6). Total num frames: 14823424. Throughput: 0: 1418.5. Samples: 1700856. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0)
+[2024-11-07 15:22:31,035][04584] Avg episode reward: [(0, '4.454')]
+[2024-11-07 15:22:36,031][04584] Fps is (10 sec: 4094.8, 60 sec: 5461.1, 300 sec: 5206.8). Total num frames: 14835712. Throughput: 0: 1289.9. Samples: 1705233. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0)
+[2024-11-07 15:22:36,034][04584] Avg episode reward: [(0, '4.470')]
+[2024-11-07 15:22:39,738][09024] Updated weights for policy 0, policy_version 3627 (0.0034)
+[2024-11-07 15:22:41,030][04584] Fps is (10 sec: 3685.9, 60 sec: 5324.6, 300 sec: 5234.6). Total num frames: 14860288. Throughput: 0: 1252.8. Samples: 1709010. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:22:41,035][04584] Avg episode reward: [(0, '4.389')]
+[2024-11-07 15:22:46,028][04584] Fps is (10 sec: 5326.5, 60 sec: 5324.8, 300 sec: 5262.4). Total num frames: 14888960. Throughput: 0: 1274.8. Samples: 1716798. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0)
+[2024-11-07 15:22:46,038][04584] Avg episode reward: [(0, '4.541')]
+[2024-11-07 15:22:47,436][09024] Updated weights for policy 0, policy_version 3637 (0.0068)
+[2024-11-07 15:22:51,028][04584] Fps is (10 sec: 5326.1, 60 sec: 5120.0, 300 sec: 5329.6). Total num frames: 14913536. Throughput: 0: 1276.2. Samples: 1725249. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0)
+[2024-11-07 15:22:51,030][04584] Avg episode reward: [(0, '4.592')]
+[2024-11-07 15:22:54,034][09024] Updated weights for policy 0, policy_version 3647 (0.0041)
+[2024-11-07 15:22:56,028][04584] Fps is (10 sec: 6144.0, 60 sec: 5188.3, 300 sec: 5373.6). Total num frames: 14950400. Throughput: 0: 1286.1. Samples: 1730118. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:22:56,029][04584] Avg episode reward: [(0, '4.450')]
+[2024-11-07 15:22:59,685][09024] Updated weights for policy 0, policy_version 3657 (0.0033)
+[2024-11-07 15:23:01,029][04584] Fps is (10 sec: 6962.5, 60 sec: 5393.1, 300 sec: 5456.7). Total num frames: 14983168. Throughput: 0: 1286.4. Samples: 1740960. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:23:01,043][04584] Avg episode reward: [(0, '4.578')]
+[2024-11-07 15:23:07,933][04584] Fps is (10 sec: 5160.5, 60 sec: 5227.4, 300 sec: 5435.5). Total num frames: 15011840. Throughput: 0: 1254.4. Samples: 1749504. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:23:07,937][04584] Avg episode reward: [(0, '4.497')]
+[2024-11-07 15:23:08,868][09024] Updated weights for policy 0, policy_version 3667 (0.0061)
+[2024-11-07 15:23:11,028][04584] Fps is (10 sec: 4915.6, 60 sec: 5256.5, 300 sec: 5470.7). Total num frames: 15032320. Throughput: 0: 1274.7. Samples: 1750839. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0)
+[2024-11-07 15:23:11,030][04584] Avg episode reward: [(0, '4.241')]
+[2024-11-07 15:23:11,056][09009] Saving /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000003670_15032320.pth...
+[2024-11-07 15:23:11,488][09009] Removing /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000003333_13651968.pth
+[2024-11-07 15:23:15,771][09024] Updated weights for policy 0, policy_version 3677 (0.0062)
+[2024-11-07 15:23:16,028][04584] Fps is (10 sec: 6072.5, 60 sec: 5188.3, 300 sec: 5498.4). Total num frames: 15060992. Throughput: 0: 1307.6. Samples: 1759698. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:23:16,030][04584] Avg episode reward: [(0, '4.370')]
+[2024-11-07 15:23:21,028][04584] Fps is (10 sec: 5734.5, 60 sec: 5256.6, 300 sec: 5512.2). Total num frames: 15089664. Throughput: 0: 1416.4. Samples: 1768965. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:23:21,030][04584] Avg episode reward: [(0, '4.447')]
+[2024-11-07 15:23:22,754][09024] Updated weights for policy 0, policy_version 3687 (0.0042)
+[2024-11-07 15:23:26,028][04584] Fps is (10 sec: 6553.6, 60 sec: 5529.6, 300 sec: 5623.9). Total num frames: 15126528. Throughput: 0: 1440.3. Samples: 1773822. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:23:26,031][04584] Avg episode reward: [(0, '4.320')]
+[2024-11-07 15:23:27,486][09024] Updated weights for policy 0, policy_version 3697 (0.0048)
+[2024-11-07 15:23:31,028][04584] Fps is (10 sec: 7781.8, 60 sec: 5734.4, 300 sec: 5692.7). Total num frames: 15167488. Throughput: 0: 1530.0. Samples: 1785651. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:23:31,032][04584] Avg episode reward: [(0, '4.306')]
+[2024-11-07 15:23:32,802][09024] Updated weights for policy 0, policy_version 3707 (0.0041)
+[2024-11-07 15:23:36,033][04584] Fps is (10 sec: 7778.0, 60 sec: 6143.7, 300 sec: 5762.1). Total num frames: 15204352. Throughput: 0: 1604.4. Samples: 1797456. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0)
+[2024-11-07 15:23:36,036][04584] Avg episode reward: [(0, '4.600')]
+[2024-11-07 15:23:39,526][09024] Updated weights for policy 0, policy_version 3717 (0.0049)
+[2024-11-07 15:23:42,489][04584] Fps is (10 sec: 5361.2, 60 sec: 5998.2, 300 sec: 5733.8). Total num frames: 15228928. Throughput: 0: 1531.2. Samples: 1801260. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0)
+[2024-11-07 15:23:42,491][04584] Avg episode reward: [(0, '4.589')]
+[2024-11-07 15:23:46,027][04584] Fps is (10 sec: 4508.3, 60 sec: 6007.5, 300 sec: 5776.1). Total num frames: 15249408. Throughput: 0: 1467.6. Samples: 1806999. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0)
+[2024-11-07 15:23:46,030][04584] Avg episode reward: [(0, '4.559')]
+[2024-11-07 15:23:47,911][09024] Updated weights for policy 0, policy_version 3727 (0.0051)
+[2024-11-07 15:23:51,028][04584] Fps is (10 sec: 6715.1, 60 sec: 6212.2, 300 sec: 5803.9). Total num frames: 15286272. Throughput: 0: 1578.8. Samples: 1817544. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:23:51,030][04584] Avg episode reward: [(0, '4.696')]
+[2024-11-07 15:23:53,536][09024] Updated weights for policy 0, policy_version 3737 (0.0043)
+[2024-11-07 15:23:56,028][04584] Fps is (10 sec: 7782.1, 60 sec: 6280.5, 300 sec: 5845.5). Total num frames: 15327232. Throughput: 0: 1607.7. Samples: 1823187. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:23:56,029][04584] Avg episode reward: [(0, '4.438')]
+[2024-11-07 15:23:58,746][09024] Updated weights for policy 0, policy_version 3747 (0.0047)
+[2024-11-07 15:24:01,028][04584] Fps is (10 sec: 7782.8, 60 sec: 6348.9, 300 sec: 5905.9). Total num frames: 15364096. Throughput: 0: 1671.8. Samples: 1834929. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:24:01,033][04584] Avg episode reward: [(0, '4.452')]
+[2024-11-07 15:24:04,806][09024] Updated weights for policy 0, policy_version 3757 (0.0063)
+[2024-11-07 15:24:06,030][04584] Fps is (10 sec: 6961.5, 60 sec: 6627.3, 300 sec: 5914.9). Total num frames: 15396864. Throughput: 0: 1695.1. Samples: 1845249. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:24:06,035][04584] Avg episode reward: [(0, '4.504')]
+[2024-11-07 15:24:10,314][09024] Updated weights for policy 0, policy_version 3767 (0.0044)
+[2024-11-07 15:24:11,028][04584] Fps is (10 sec: 6963.2, 60 sec: 6690.2, 300 sec: 5928.8). Total num frames: 15433728. Throughput: 0: 1711.3. Samples: 1850832. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:24:11,031][04584] Avg episode reward: [(0, '4.455')]
+[2024-11-07 15:24:16,915][04584] Fps is (10 sec: 5644.4, 60 sec: 6525.3, 300 sec: 5883.3). Total num frames: 15458304. Throughput: 0: 1663.4. Samples: 1861977. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:24:16,920][04584] Avg episode reward: [(0, '4.409')]
+[2024-11-07 15:24:17,977][09024] Updated weights for policy 0, policy_version 3777 (0.0042)
+[2024-11-07 15:24:21,033][04584] Fps is (10 sec: 5734.2, 60 sec: 6690.1, 300 sec: 5873.2). Total num frames: 15491072. Throughput: 0: 1585.9. Samples: 1868814. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0)
+[2024-11-07 15:24:21,041][04584] Avg episode reward: [(0, '4.502')]
+[2024-11-07 15:24:23,334][09024] Updated weights for policy 0, policy_version 3787 (0.0046)
+[2024-11-07 15:24:26,028][04584] Fps is (10 sec: 7641.4, 60 sec: 6690.1, 300 sec: 5859.4). Total num frames: 15527936. Throughput: 0: 1676.7. Samples: 1874262. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0)
+[2024-11-07 15:24:26,032][04584] Avg episode reward: [(0, '4.557')]
+[2024-11-07 15:24:28,965][09024] Updated weights for policy 0, policy_version 3797 (0.0066)
+[2024-11-07 15:24:31,028][04584] Fps is (10 sec: 7782.5, 60 sec: 6690.2, 300 sec: 5873.2). Total num frames: 15568896. Throughput: 0: 1750.1. Samples: 1885752. Policy #0 lag: (min: 0.0, avg: 1.4, max: 4.0)
+[2024-11-07 15:24:31,030][04584] Avg episode reward: [(0, '4.387')]
+[2024-11-07 15:24:33,986][09024] Updated weights for policy 0, policy_version 3807 (0.0035)
+[2024-11-07 15:24:36,028][04584] Fps is (10 sec: 7782.2, 60 sec: 6690.7, 300 sec: 5928.8). Total num frames: 15605760. Throughput: 0: 1776.5. Samples: 1897488. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:24:36,031][04584] Avg episode reward: [(0, '4.476')]
+[2024-11-07 15:24:39,316][09024] Updated weights for policy 0, policy_version 3817 (0.0047)
+[2024-11-07 15:24:41,028][04584] Fps is (10 sec: 7372.9, 60 sec: 7067.0, 300 sec: 5928.8). Total num frames: 15642624. Throughput: 0: 1785.7. Samples: 1903545. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:24:41,030][04584] Avg episode reward: [(0, '4.429')]
+[2024-11-07 15:24:45,770][09024] Updated weights for policy 0, policy_version 3827 (0.0057)
+[2024-11-07 15:24:46,029][04584] Fps is (10 sec: 6962.5, 60 sec: 7099.6, 300 sec: 5914.9). Total num frames: 15675392. Throughput: 0: 1739.0. Samples: 1913187. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0)
+[2024-11-07 15:24:46,033][04584] Avg episode reward: [(0, '4.549')]
+[2024-11-07 15:24:51,363][04584] Fps is (10 sec: 5152.1, 60 sec: 6788.8, 300 sec: 5908.3). Total num frames: 15695872. Throughput: 0: 1610.0. Samples: 1918233. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0)
+[2024-11-07 15:24:51,366][04584] Avg episode reward: [(0, '4.576')]
+[2024-11-07 15:24:55,001][09024] Updated weights for policy 0, policy_version 3837 (0.0038)
+[2024-11-07 15:24:56,028][04584] Fps is (10 sec: 4506.1, 60 sec: 6553.6, 300 sec: 5887.1). Total num frames: 15720448. Throughput: 0: 1615.8. Samples: 1923543. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0)
+[2024-11-07 15:24:56,030][04584] Avg episode reward: [(0, '4.527')]
+[2024-11-07 15:25:00,829][09024] Updated weights for policy 0, policy_version 3847 (0.0064)
+[2024-11-07 15:25:01,028][04584] Fps is (10 sec: 6357.0, 60 sec: 6553.6, 300 sec: 5928.8). Total num frames: 15757312. Throughput: 0: 1617.7. Samples: 1933338. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:25:01,030][04584] Avg episode reward: [(0, '4.338')]
+[2024-11-07 15:25:06,027][04584] Fps is (10 sec: 7373.0, 60 sec: 6622.2, 300 sec: 5956.6). Total num frames: 15794176. Throughput: 0: 1676.0. Samples: 1944231. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:25:06,032][04584] Avg episode reward: [(0, '4.445')]
+[2024-11-07 15:25:06,372][09024] Updated weights for policy 0, policy_version 3857 (0.0037)
+[2024-11-07 15:25:11,031][04584] Fps is (10 sec: 7370.4, 60 sec: 6621.5, 300 sec: 6025.9). Total num frames: 15831040. Throughput: 0: 1680.4. Samples: 1949886. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0)
+[2024-11-07 15:25:11,033][04584] Avg episode reward: [(0, '4.314')]
+[2024-11-07 15:25:11,052][09009] Saving /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000003865_15831040.pth...
+[2024-11-07 15:25:11,277][09009] Removing /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000003509_14372864.pth
+[2024-11-07 15:25:11,863][09024] Updated weights for policy 0, policy_version 3867 (0.0044)
+[2024-11-07 15:25:16,028][04584] Fps is (10 sec: 7372.7, 60 sec: 6929.2, 300 sec: 6039.9). Total num frames: 15867904. Throughput: 0: 1678.3. Samples: 1961277. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0)
+[2024-11-07 15:25:16,035][04584] Avg episode reward: [(0, '4.238')]
+[2024-11-07 15:25:17,024][09024] Updated weights for policy 0, policy_version 3877 (0.0041)
+[2024-11-07 15:25:21,030][04584] Fps is (10 sec: 7783.6, 60 sec: 6963.0, 300 sec: 6081.5). Total num frames: 15908864. Throughput: 0: 1676.8. Samples: 1972947. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:25:21,032][04584] Avg episode reward: [(0, '4.292')]
+[2024-11-07 15:25:22,877][09024] Updated weights for policy 0, policy_version 3887 (0.0059)
+[2024-11-07 15:25:26,028][04584] Fps is (10 sec: 5734.1, 60 sec: 6621.8, 300 sec: 6026.0). Total num frames: 15925248. Throughput: 0: 1657.8. Samples: 1978146. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0)
+[2024-11-07 15:25:26,030][04584] Avg episode reward: [(0, '4.499')]
+[2024-11-07 15:25:30,836][09024] Updated weights for policy 0, policy_version 3897 (0.0053)
+[2024-11-07 15:25:31,028][04584] Fps is (10 sec: 5325.6, 60 sec: 6553.6, 300 sec: 6039.9). Total num frames: 15962112. Throughput: 0: 1588.0. Samples: 1984647. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0)
+[2024-11-07 15:25:31,030][04584] Avg episode reward: [(0, '4.349')]
+[2024-11-07 15:25:36,029][04584] Fps is (10 sec: 7372.2, 60 sec: 6553.5, 300 sec: 6053.7). Total num frames: 15998976. Throughput: 0: 1733.7. Samples: 1995669. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0)
+[2024-11-07 15:25:36,035][04584] Avg episode reward: [(0, '4.612')]
+[2024-11-07 15:25:36,308][09024] Updated weights for policy 0, policy_version 3907 (0.0043)
+[2024-11-07 15:25:36,861][09009] Saving /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000003908_16007168.pth...
+[2024-11-07 15:25:36,876][09009] Stopping Batcher_0...
+[2024-11-07 15:25:36,877][09009] Loop batcher_evt_loop terminating...
+[2024-11-07 15:25:36,874][04584] Component Batcher_0 stopped!
+[2024-11-07 15:25:36,971][09024] Weights refcount: 2 0
+[2024-11-07 15:25:36,977][09024] Stopping InferenceWorker_p0-w0...
+[2024-11-07 15:25:36,978][09024] Loop inference_proc0-0_evt_loop terminating...
+[2024-11-07 15:25:36,979][04584] Component InferenceWorker_p0-w0 stopped!
+[2024-11-07 15:25:37,003][09009] Removing /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000003670_15032320.pth
+[2024-11-07 15:25:37,010][09009] Saving /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000003908_16007168.pth...
+[2024-11-07 15:25:37,187][09009] Stopping LearnerWorker_p0...
+[2024-11-07 15:25:37,188][09009] Loop learner_proc0_evt_loop terminating...
+[2024-11-07 15:25:37,188][04584] Component LearnerWorker_p0 stopped!
+[2024-11-07 15:25:37,438][04584] Component RolloutWorker_w3 stopped!
+[2024-11-07 15:25:37,451][04584] Component RolloutWorker_w4 stopped!
+[2024-11-07 15:25:37,446][09026] Stopping RolloutWorker_w3...
+[2024-11-07 15:25:37,460][09026] Loop rollout_proc3_evt_loop terminating...
+[2024-11-07 15:25:37,452][09030] Stopping RolloutWorker_w4...
+[2024-11-07 15:25:37,467][09030] Loop rollout_proc4_evt_loop terminating...
+[2024-11-07 15:25:37,514][04584] Component RolloutWorker_w6 stopped!
+[2024-11-07 15:25:37,522][09037] Stopping RolloutWorker_w6...
+[2024-11-07 15:25:37,528][09037] Loop rollout_proc6_evt_loop terminating...
+[2024-11-07 15:25:37,552][04584] Component RolloutWorker_w0 stopped!
+[2024-11-07 15:25:37,553][09025] Stopping RolloutWorker_w0...
+[2024-11-07 15:25:37,556][09025] Loop rollout_proc0_evt_loop terminating...
+[2024-11-07 15:25:37,632][04584] Component RolloutWorker_w1 stopped!
+[2024-11-07 15:25:37,633][09029] Stopping RolloutWorker_w1...
+[2024-11-07 15:25:37,643][09029] Loop rollout_proc1_evt_loop terminating...
+[2024-11-07 15:25:37,666][04584] Component RolloutWorker_w9 stopped!
+[2024-11-07 15:25:37,664][09039] Stopping RolloutWorker_w9...
+[2024-11-07 15:25:37,702][09039] Loop rollout_proc9_evt_loop terminating...
+[2024-11-07 15:25:38,109][04584] Component RolloutWorker_w8 stopped!
+[2024-11-07 15:25:38,154][04584] Component RolloutWorker_w7 stopped!
+[2024-11-07 15:25:38,111][09040] Stopping RolloutWorker_w8...
+[2024-11-07 15:25:38,159][09040] Loop rollout_proc8_evt_loop terminating...
+[2024-11-07 15:25:38,156][09038] Stopping RolloutWorker_w7...
+[2024-11-07 15:25:38,168][04584] Component RolloutWorker_w5 stopped!
+[2024-11-07 15:25:38,172][09038] Loop rollout_proc7_evt_loop terminating...
+[2024-11-07 15:25:38,172][09028] Stopping RolloutWorker_w5...
+[2024-11-07 15:25:38,178][09028] Loop rollout_proc5_evt_loop terminating...
+[2024-11-07 15:25:38,397][04584] Component RolloutWorker_w2 stopped!
+[2024-11-07 15:25:38,405][09027] Stopping RolloutWorker_w2...
+[2024-11-07 15:25:38,402][04584] Waiting for process learner_proc0 to stop...
+[2024-11-07 15:25:38,410][09027] Loop rollout_proc2_evt_loop terminating...
+[2024-11-07 15:25:44,086][04584] Waiting for process inference_proc0-0 to join...
+[2024-11-07 15:25:44,088][04584] Waiting for process rollout_proc0 to join...
+[2024-11-07 15:25:44,090][04584] Waiting for process rollout_proc1 to join...
+[2024-11-07 15:25:44,092][04584] Waiting for process rollout_proc2 to join...
+[2024-11-07 15:25:44,094][04584] Waiting for process rollout_proc3 to join...
+[2024-11-07 15:25:44,096][04584] Waiting for process rollout_proc4 to join...
+[2024-11-07 15:25:44,098][04584] Waiting for process rollout_proc5 to join...
+[2024-11-07 15:25:44,099][04584] Waiting for process rollout_proc6 to join...
+[2024-11-07 15:25:44,102][04584] Waiting for process rollout_proc7 to join...
+[2024-11-07 15:25:44,106][04584] Waiting for process rollout_proc8 to join...
+[2024-11-07 15:25:44,108][04584] Waiting for process rollout_proc9 to join...
+[2024-11-07 15:25:44,110][04584] Batcher 0 profile tree view:
+batching: 177.8703, releasing_batches: 0.3180
+[2024-11-07 15:25:44,112][04584] InferenceWorker_p0-w0 profile tree view:
+wait_policy: 0.0001
+  wait_policy_total: 27.4860
+update_model: 26.3783
+  weight_update: 0.0043
+one_step: 0.0063
+  handle_policy_step: 1339.9856
+    deserialize: 47.4091, stack: 6.2756, obs_to_device_normalize: 399.2838, forward: 569.5053, send_messages: 86.6617
+    prepare_outputs: 187.4270
+      to_cpu: 142.4986
+[2024-11-07 15:25:44,113][04584] Learner 0 profile tree view:
+misc: 0.0142, prepare_batch: 70.5279
+train: 315.6715
+  epoch_init: 0.0392, minibatch_init: 0.0518, losses_postprocess: 3.5406, kl_divergence: 4.0413, after_optimizer: 18.4153
+  calculate_losses: 108.2507
+    losses_init: 0.0171, forward_head: 8.9566, bptt_initial: 63.2059, tail: 4.5197, advantages_returns: 1.3485, losses: 15.0900
+    bptt: 13.8609
+      bptt_forward_core: 13.4036
+  update: 178.6702
+    clip: 5.0453
+[2024-11-07 15:25:44,118][04584] RolloutWorker_w0 profile tree view:
+wait_for_trajectories: 0.6293, enqueue_policy_requests: 37.4664, env_step: 596.6403, overhead: 39.0474, complete_rollouts: 2.4262
+save_policy_outputs: 47.7074
+  split_output_tensors: 15.5779
+[2024-11-07 15:25:44,120][04584] RolloutWorker_w9 profile tree view:
+wait_for_trajectories: 0.5428, enqueue_policy_requests: 33.2939, env_step: 802.9854, overhead: 35.3433, complete_rollouts: 1.0714
+save_policy_outputs: 45.4609
+  split_output_tensors: 15.8628
+[2024-11-07 15:25:44,122][04584] Loop Runner_EvtLoop terminating...
+[2024-11-07 15:25:44,126][04584] Runner profile tree view:
+main_loop: 1467.0711
+[2024-11-07 15:25:44,131][04584] Collected {0: 16007168}, FPS: 5447.1
+[2024-11-07 15:25:44,737][04584] Loading existing experiment configuration from /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/config.json
+[2024-11-07 15:25:44,739][04584] Adding new argument 'no_render'=True that is not in the saved config file!
+[2024-11-07 15:25:44,739][04584] Adding new argument 'save_video'=True that is not in the saved config file!
+[2024-11-07 15:25:44,741][04584] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file!
+[2024-11-07 15:25:44,742][04584] Adding new argument 'video_name'=None that is not in the saved config file!
+[2024-11-07 15:25:44,744][04584] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file!
+[2024-11-07 15:25:44,748][04584] Adding new argument 'max_num_episodes'=20 that is not in the saved config file!
+[2024-11-07 15:25:44,750][04584] Adding new argument 'push_to_hub'=False that is not in the saved config file!
+[2024-11-07 15:25:44,752][04584] Adding new argument 'hf_repository'=None that is not in the saved config file!
+[2024-11-07 15:25:44,754][04584] Adding new argument 'policy_index'=0 that is not in the saved config file!
+[2024-11-07 15:25:44,755][04584] Adding new argument 'eval_deterministic'=False that is not in the saved config file!
+[2024-11-07 15:25:44,758][04584] Adding new argument 'train_script'=None that is not in the saved config file!
+[2024-11-07 15:25:44,759][04584] Adding new argument 'enjoy_script'=None that is not in the saved config file!
+[2024-11-07 15:25:44,763][04584] Using frameskip 1 and render_action_repeat=4 for evaluation
+[2024-11-07 15:25:45,048][04584] RunningMeanStd input shape: (3, 72, 128)
+[2024-11-07 15:25:45,059][04584] RunningMeanStd input shape: (1,)
+[2024-11-07 15:25:45,267][04584] ConvEncoder: input_channels=3
+[2024-11-07 15:25:45,537][04584] Conv encoder output size: 512
+[2024-11-07 15:25:45,540][04584] Policy head output size: 512
+[2024-11-07 15:25:45,688][04584] Loading state from checkpoint /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000003908_16007168.pth...
+[2024-11-07 15:25:46,915][04584] Num frames 100...
+[2024-11-07 15:25:47,218][04584] Num frames 200...
+[2024-11-07 15:25:47,432][04584] Num frames 300...
+[2024-11-07 15:25:47,678][04584] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840
+[2024-11-07 15:25:47,680][04584] Avg episode reward: 3.840, avg true_objective: 3.840
+[2024-11-07 15:25:47,727][04584] Num frames 400...
+[2024-11-07 15:25:47,953][04584] Num frames 500...
+[2024-11-07 15:25:48,204][04584] Num frames 600...
+[2024-11-07 15:25:48,378][04584] Avg episode rewards: #0: 3.200, true rewards: #0: 3.200
+[2024-11-07 15:25:48,379][04584] Avg episode reward: 3.200, avg true_objective: 3.200
+[2024-11-07 15:25:48,527][04584] Num frames 700...
+[2024-11-07 15:25:48,761][04584] Num frames 800...
+[2024-11-07 15:25:48,980][04584] Num frames 900...
+[2024-11-07 15:25:49,257][04584] Num frames 1000...
+[2024-11-07 15:25:49,367][04584] Avg episode rewards: #0: 3.413, true rewards: #0: 3.413
+[2024-11-07 15:25:49,369][04584] Avg episode reward: 3.413, avg true_objective: 3.413
+[2024-11-07 15:25:49,550][04584] Num frames 1100...
+[2024-11-07 15:25:49,786][04584] Num frames 1200...
+[2024-11-07 15:25:50,076][04584] Num frames 1300...
+[2024-11-07 15:25:50,336][04584] Num frames 1400...
+[2024-11-07 15:25:50,412][04584] Avg episode rewards: #0: 3.520, true rewards: #0: 3.520
+[2024-11-07 15:25:50,415][04584] Avg episode reward: 3.520, avg true_objective: 3.520
+[2024-11-07 15:25:50,657][04584] Num frames 1500...
+[2024-11-07 15:25:50,950][04584] Num frames 1600...
+[2024-11-07 15:25:51,197][04584] Num frames 1700...
+[2024-11-07 15:25:51,432][04584] Num frames 1800...
+[2024-11-07 15:25:51,561][04584] Avg episode rewards: #0: 3.848, true rewards: #0: 3.648
+[2024-11-07 15:25:51,562][04584] Avg episode reward: 3.848, avg true_objective: 3.648
+[2024-11-07 15:25:51,762][04584] Num frames 1900...
+[2024-11-07 15:25:52,003][04584] Num frames 2000...
+[2024-11-07 15:25:52,256][04584] Num frames 2100...
+[2024-11-07 15:25:52,501][04584] Num frames 2200...
+[2024-11-07 15:25:52,725][04584] Avg episode rewards: #0: 4.120, true rewards: #0: 3.787
+[2024-11-07 15:25:52,729][04584] Avg episode reward: 4.120, avg true_objective: 3.787
+[2024-11-07 15:25:52,810][04584] Num frames 2300...
+[2024-11-07 15:25:53,060][04584] Num frames 2400...
+[2024-11-07 15:25:53,296][04584] Num frames 2500...
+[2024-11-07 15:25:53,533][04584] Num frames 2600...
+[2024-11-07 15:25:53,726][04584] Avg episode rewards: #0: 4.080, true rewards: #0: 3.794
+[2024-11-07 15:25:53,728][04584] Avg episode reward: 4.080, avg true_objective: 3.794
+[2024-11-07 15:25:53,866][04584] Num frames 2700...
+[2024-11-07 15:25:54,143][04584] Num frames 2800...
+[2024-11-07 15:25:54,389][04584] Num frames 2900...
+[2024-11-07 15:25:54,622][04584] Num frames 3000...
+[2024-11-07 15:25:54,786][04584] Avg episode rewards: #0: 4.050, true rewards: #0: 3.800
+[2024-11-07 15:25:54,789][04584] Avg episode reward: 4.050, avg true_objective: 3.800
+[2024-11-07 15:25:54,940][04584] Num frames 3100...
+[2024-11-07 15:25:55,222][04584] Num frames 3200...
+[2024-11-07 15:25:55,492][04584] Num frames 3300...
+[2024-11-07 15:25:55,765][04584] Num frames 3400...
+[2024-11-07 15:25:56,060][04584] Avg episode rewards: #0: 4.209, true rewards: #0: 3.876
+[2024-11-07 15:25:56,062][04584] Avg episode reward: 4.209, avg true_objective: 3.876
+[2024-11-07 15:25:56,108][04584] Num frames 3500...
+[2024-11-07 15:25:56,386][04584] Num frames 3600...
+[2024-11-07 15:25:56,661][04584] Num frames 3700...
+[2024-11-07 15:25:56,965][04584] Num frames 3800...
+[2024-11-07 15:25:57,293][04584] Avg episode rewards: #0: 4.172, true rewards: #0: 3.872
+[2024-11-07 15:25:57,295][04584] Avg episode reward: 4.172, avg true_objective: 3.872
+[2024-11-07 15:25:57,407][04584] Num frames 3900...
+[2024-11-07 15:25:57,705][04584] Num frames 4000...
+[2024-11-07 15:25:57,968][04584] Num frames 4100...
+[2024-11-07 15:26:00,392][04584] Num frames 4200...
+[2024-11-07 15:26:00,634][04584] Avg episode rewards: #0: 4.142, true rewards: #0: 3.869
+[2024-11-07 15:26:00,639][04584] Avg episode reward: 4.142, avg true_objective: 3.869
+[2024-11-07 15:26:00,803][04584] Num frames 4300...
+[2024-11-07 15:26:01,112][04584] Num frames 4400...
+[2024-11-07 15:26:01,437][04584] Num frames 4500...
+[2024-11-07 15:26:01,718][04584] Num frames 4600...
+[2024-11-07 15:26:01,904][04584] Avg episode rewards: #0: 4.200, true rewards: #0: 3.867
+[2024-11-07 15:26:01,905][04584] Avg episode reward: 4.200, avg true_objective: 3.867
+[2024-11-07 15:26:02,104][04584] Num frames 4700...
+[2024-11-07 15:26:02,487][04584] Num frames 4800...
+[2024-11-07 15:26:02,890][04584] Num frames 4900...
+[2024-11-07 15:26:03,241][04584] Num frames 5000...
+[2024-11-07 15:26:03,366][04584] Avg episode rewards: #0: 4.172, true rewards: #0: 3.865
+[2024-11-07 15:26:03,368][04584] Avg episode reward: 4.172, avg true_objective: 3.865
+[2024-11-07 15:26:03,607][04584] Num frames 5100...
+[2024-11-07 15:26:03,918][04584] Num frames 5200...
+[2024-11-07 15:26:04,195][04584] Num frames 5300...
+[2024-11-07 15:26:04,502][04584] Num frames 5400...
+[2024-11-07 15:26:04,799][04584] Avg episode rewards: #0: 4.266, true rewards: #0: 3.909
+[2024-11-07 15:26:04,803][04584] Avg episode reward: 4.266, avg true_objective: 3.909
+[2024-11-07 15:26:04,915][04584] Num frames 5500...
+[2024-11-07 15:26:05,256][04584] Num frames 5600...
+[2024-11-07 15:26:05,613][04584] Num frames 5700...
+[2024-11-07 15:26:05,979][04584] Num frames 5800...
+[2024-11-07 15:26:06,209][04584] Avg episode rewards: #0: 4.237, true rewards: #0: 3.904
+[2024-11-07 15:26:06,213][04584] Avg episode reward: 4.237, avg true_objective: 3.904
+[2024-11-07 15:26:06,396][04584] Num frames 5900...
+[2024-11-07 15:26:06,733][04584] Num frames 6000...
+[2024-11-07 15:26:07,059][04584] Num frames 6100...
+[2024-11-07 15:26:07,383][04584] Num frames 6200...
+[2024-11-07 15:26:07,575][04584] Avg episode rewards: #0: 4.213, true rewards: #0: 3.900
+[2024-11-07 15:26:07,579][04584] Avg episode reward: 4.213, avg true_objective: 3.900
+[2024-11-07 15:26:07,773][04584] Num frames 6300...
+[2024-11-07 15:26:08,122][04584] Num frames 6400...
+[2024-11-07 15:26:08,510][04584] Avg episode rewards: #0: 4.115, true rewards: #0: 3.821
+[2024-11-07 15:26:08,513][04584] Avg episode reward: 4.115, avg true_objective: 3.821
+[2024-11-07 15:26:08,534][04584] Num frames 6500...
+[2024-11-07 15:26:08,840][04584] Num frames 6600...
+[2024-11-07 15:26:09,118][04584] Num frames 6700...
+[2024-11-07 15:26:09,426][04584] Num frames 6800...
+[2024-11-07 15:26:09,749][04584] Avg episode rewards: #0: 4.100, true rewards: #0: 3.822
+[2024-11-07 15:26:09,751][04584] Avg episode reward: 4.100, avg true_objective: 3.822
+[2024-11-07 15:26:09,827][04584] Num frames 6900...
+[2024-11-07 15:26:10,154][04584] Num frames 7000...
+[2024-11-07 15:26:10,468][04584] Num frames 7100...
+[2024-11-07 15:26:10,765][04584] Num frames 7200...
+[2024-11-07 15:26:11,015][04584] Avg episode rewards: #0: 4.086, true rewards: #0: 3.823
+[2024-11-07 15:26:11,018][04584] Avg episode reward: 4.086, avg true_objective: 3.823
+[2024-11-07 15:26:11,145][04584] Num frames 7300...
+[2024-11-07 15:26:11,448][04584] Num frames 7400...
+[2024-11-07 15:26:11,817][04584] Num frames 7500...
+[2024-11-07 15:26:12,169][04584] Num frames 7600...
+[2024-11-07 15:26:12,493][04584] Avg episode rewards: #0: 4.140, true rewards: #0: 3.840
+[2024-11-07 15:26:12,498][04584] Avg episode reward: 4.140, avg true_objective: 3.840
+[2024-11-07 15:26:39,781][04584] Replay video saved to /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/replay.mp4!
+[2024-11-07 15:26:41,145][04584] Loading existing experiment configuration from /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/config.json
+[2024-11-07 15:26:41,146][04584] Overriding arg 'num_workers' with value 4 passed from command line
+[2024-11-07 15:26:41,148][04584] Adding new argument 'no_render'=True that is not in the saved config file!
+[2024-11-07 15:26:41,149][04584] Adding new argument 'save_video'=True that is not in the saved config file!
+[2024-11-07 15:26:41,152][04584] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file!
+[2024-11-07 15:26:41,154][04584] Adding new argument 'video_name'=None that is not in the saved config file!
+[2024-11-07 15:26:41,156][04584] Adding new argument 'max_num_frames'=150000 that is not in the saved config file!
+[2024-11-07 15:26:41,157][04584] Adding new argument 'max_num_episodes'=10 that is not in the saved config file!
+[2024-11-07 15:26:41,160][04584] Adding new argument 'push_to_hub'=True that is not in the saved config file!
+[2024-11-07 15:26:41,163][04584] Adding new argument 'hf_repository'='alidenewade/rl_course_vizdoom_health_gathering_supreme' that is not in the saved config file!
+[2024-11-07 15:26:41,169][04584] Adding new argument 'policy_index'=0 that is not in the saved config file!
+[2024-11-07 15:26:41,171][04584] Adding new argument 'eval_deterministic'=False that is not in the saved config file!
+[2024-11-07 15:26:41,176][04584] Adding new argument 'train_script'=None that is not in the saved config file!
+[2024-11-07 15:26:41,179][04584] Adding new argument 'enjoy_script'=None that is not in the saved config file!
+[2024-11-07 15:26:41,182][04584] Using frameskip 1 and render_action_repeat=4 for evaluation
+[2024-11-07 15:26:41,222][04584] RunningMeanStd input shape: (3, 72, 128)
+[2024-11-07 15:26:41,224][04584] RunningMeanStd input shape: (1,)
+[2024-11-07 15:26:41,260][04584] ConvEncoder: input_channels=3
+[2024-11-07 15:26:41,321][04584] Conv encoder output size: 512
+[2024-11-07 15:26:41,323][04584] Policy head output size: 512
+[2024-11-07 15:26:41,356][04584] Loading state from checkpoint /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000003908_16007168.pth...
+[2024-11-07 15:26:42,080][04584] Num frames 100...
+[2024-11-07 15:26:42,204][04584] Avg episode rewards: #0: 1.280, true rewards: #0: 1.280
+[2024-11-07 15:26:42,206][04584] Avg episode reward: 1.280, avg true_objective: 1.280
+[2024-11-07 15:26:42,386][04584] Num frames 200...
+[2024-11-07 15:26:42,603][04584] Num frames 300...
+[2024-11-07 15:26:42,828][04584] Num frames 400...
+[2024-11-07 15:26:43,054][04584] Num frames 500...
+[2024-11-07 15:26:43,135][04584] Avg episode rewards: #0: 2.560, true rewards: #0: 2.560
+[2024-11-07 15:26:43,137][04584] Avg episode reward: 2.560, avg true_objective: 2.560
+[2024-11-07 15:26:43,338][04584] Num frames 600...
+[2024-11-07 15:26:43,556][04584] Num frames 700...
+[2024-11-07 15:26:43,790][04584] Num frames 800...
+[2024-11-07 15:26:44,021][04584] Num frames 900...
+[2024-11-07 15:26:44,275][04584] Avg episode rewards: #0: 3.973, true rewards: #0: 3.307
+[2024-11-07 15:26:44,280][04584] Avg episode reward: 3.973, avg true_objective: 3.307
+[2024-11-07 15:26:44,313][04584] Num frames 1000...
+[2024-11-07 15:26:44,514][04584] Num frames 1100...
+[2024-11-07 15:26:44,732][04584] Num frames 1200...
+[2024-11-07 15:26:44,965][04584] Num frames 1300...
+[2024-11-07 15:26:45,194][04584] Avg episode rewards: #0: 3.940, true rewards: #0: 3.440
+[2024-11-07 15:26:45,199][04584] Avg episode reward: 3.940, avg true_objective: 3.440
+[2024-11-07 15:26:45,259][04584] Num frames 1400...
+[2024-11-07 15:26:45,491][04584] Num frames 1500...
+[2024-11-07 15:26:45,708][04584] Num frames 1600...
+[2024-11-07 15:26:45,933][04584] Num frames 1700...
+[2024-11-07 15:26:46,133][04584] Avg episode rewards: #0: 3.920, true rewards: #0: 3.520
+[2024-11-07 15:26:46,138][04584] Avg episode reward: 3.920, avg true_objective: 3.520
+[2024-11-07 15:26:46,227][04584] Num frames 1800...
+[2024-11-07 15:26:46,458][04584] Num frames 1900...
+[2024-11-07 15:26:46,696][04584] Num frames 2000...
+[2024-11-07 15:26:46,930][04584] Num frames 2100...
+[2024-11-07 15:26:47,112][04584] Avg episode rewards: #0: 3.907, true rewards: #0: 3.573
+[2024-11-07 15:26:47,115][04584] Avg episode reward: 3.907, avg true_objective: 3.573
+[2024-11-07 15:26:47,281][04584] Num frames 2200...
+[2024-11-07 15:26:47,506][04584] Num frames 2300...
+[2024-11-07 15:26:47,747][04584] Num frames 2400...
+[2024-11-07 15:26:48,011][04584] Num frames 2500...
+[2024-11-07 15:26:48,134][04584] Avg episode rewards: #0: 3.897, true rewards: #0: 3.611
+[2024-11-07 15:26:48,136][04584] Avg episode reward: 3.897, avg true_objective: 3.611
+[2024-11-07 15:26:48,329][04584] Num frames 2600...
+[2024-11-07 15:26:48,555][04584] Num frames 2700...
+[2024-11-07 15:26:48,784][04584] Num frames 2800...
+[2024-11-07 15:26:48,992][04584] Num frames 2900...
+[2024-11-07 15:26:49,076][04584] Avg episode rewards: #0: 3.890, true rewards: #0: 3.640
+[2024-11-07 15:26:49,079][04584] Avg episode reward: 3.890, avg true_objective: 3.640
+[2024-11-07 15:26:49,278][04584] Num frames 3000...
+[2024-11-07 15:26:49,523][04584] Num frames 3100...
+[2024-11-07 15:26:49,752][04584] Num frames 3200...
+[2024-11-07 15:26:49,974][04584] Num frames 3300...
+[2024-11-07 15:26:50,159][04584] Avg episode rewards: #0: 4.067, true rewards: #0: 3.733
+[2024-11-07 15:26:50,161][04584] Avg episode reward: 4.067, avg true_objective: 3.733
+[2024-11-07 15:26:50,253][04584] Num frames 3400...
+[2024-11-07 15:26:50,471][04584] Num frames 3500...
+[2024-11-07 15:26:50,695][04584] Num frames 3600...
+[2024-11-07 15:26:50,913][04584] Num frames 3700...
+[2024-11-07 15:26:51,056][04584] Avg episode rewards: #0: 4.044, true rewards: #0: 3.744
+[2024-11-07 15:26:51,059][04584] Avg episode reward: 4.044, avg true_objective: 3.744
+[2024-11-07 15:27:00,168][04584] Replay video saved to /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/replay.mp4!