alidenewade
/

rl_course_vizdoom_health_gathering_supreme

@@ -15,7 +15,7 @@ model-index:
       type: doom_health_gathering_supreme
     metrics:
     - type: mean_reward
-      value: 3.74 +/- 0.88
       name: mean_reward
       verified: false
 ---

       type: doom_health_gathering_supreme
     metrics:
     - type: mean_reward
+      value: 3.71 +/- 0.63
       name: mean_reward
       verified: false
 ---

replay.mp4 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9309b48096383501180a71893b75c454a292313ef81446074e21343d357b4bb8
-size 5709090

 version https://git-lfs.github.com/spec/v1
+oid sha256:cc8d65182a01240d78094bcad1ac054bcbd3b602de2485fbaf2f3c34247352b4
+size 5783761

sf_log.txt CHANGED Viewed

@@ -7933,3 +7933,83 @@ main_loop: 1467.0711
 [2024-11-07 15:26:51,056][04584] Avg episode rewards: #0: 4.044, true rewards: #0: 3.744
 [2024-11-07 15:26:51,059][04584] Avg episode reward: 4.044, avg true_objective: 3.744
 [2024-11-07 15:27:00,168][04584] Replay video saved to /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/replay.mp4!

 [2024-11-07 15:26:51,056][04584] Avg episode rewards: #0: 4.044, true rewards: #0: 3.744
 [2024-11-07 15:26:51,059][04584] Avg episode reward: 4.044, avg true_objective: 3.744
 [2024-11-07 15:27:00,168][04584] Replay video saved to /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/replay.mp4!
+[2024-11-07 15:27:15,613][04584] The model has been pushed to https://huggingface.co/alidenewade/rl_course_vizdoom_health_gathering_supreme
+[2024-11-07 15:28:10,876][04584] Loading existing experiment configuration from /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/config.json
+[2024-11-07 15:28:10,878][04584] Overriding arg 'num_workers' with value 4 passed from command line
+[2024-11-07 15:28:10,879][04584] Adding new argument 'no_render'=True that is not in the saved config file!
+[2024-11-07 15:28:10,880][04584] Adding new argument 'save_video'=True that is not in the saved config file!
+[2024-11-07 15:28:10,883][04584] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file!
+[2024-11-07 15:28:10,884][04584] Adding new argument 'video_name'=None that is not in the saved config file!
+[2024-11-07 15:28:10,885][04584] Adding new argument 'max_num_frames'=150000 that is not in the saved config file!
+[2024-11-07 15:28:10,886][04584] Adding new argument 'max_num_episodes'=10 that is not in the saved config file!
+[2024-11-07 15:28:10,887][04584] Adding new argument 'push_to_hub'=True that is not in the saved config file!
+[2024-11-07 15:28:10,890][04584] Adding new argument 'hf_repository'='alidenewade/rl_course_vizdoom_health_gathering_supreme' that is not in the saved config file!
+[2024-11-07 15:28:10,891][04584] Adding new argument 'policy_index'=0 that is not in the saved config file!
+[2024-11-07 15:28:10,893][04584] Adding new argument 'eval_deterministic'=False that is not in the saved config file!
+[2024-11-07 15:28:10,894][04584] Adding new argument 'train_script'=None that is not in the saved config file!
+[2024-11-07 15:28:10,896][04584] Adding new argument 'enjoy_script'=None that is not in the saved config file!
+[2024-11-07 15:28:10,898][04584] Using frameskip 1 and render_action_repeat=4 for evaluation
+[2024-11-07 15:28:10,928][04584] RunningMeanStd input shape: (3, 72, 128)
+[2024-11-07 15:28:10,931][04584] RunningMeanStd input shape: (1,)
+[2024-11-07 15:28:10,949][04584] ConvEncoder: input_channels=3
+[2024-11-07 15:28:11,051][04584] Conv encoder output size: 512
+[2024-11-07 15:28:11,053][04584] Policy head output size: 512
+[2024-11-07 15:28:11,095][04584] Loading state from checkpoint /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000003908_16007168.pth...
+[2024-11-07 15:28:11,668][04584] Num frames 100...
+[2024-11-07 15:28:11,899][04584] Num frames 200...
+[2024-11-07 15:28:12,095][04584] Avg episode rewards: #0: 2.560, true rewards: #0: 2.560
+[2024-11-07 15:28:12,099][04584] Avg episode reward: 2.560, avg true_objective: 2.560
+[2024-11-07 15:28:12,197][04584] Num frames 300...
+[2024-11-07 15:28:12,417][04584] Num frames 400...
+[2024-11-07 15:28:12,623][04584] Num frames 500...
+[2024-11-07 15:28:12,833][04584] Num frames 600...
+[2024-11-07 15:28:12,965][04584] Avg episode rewards: #0: 3.200, true rewards: #0: 3.200
+[2024-11-07 15:28:12,969][04584] Avg episode reward: 3.200, avg true_objective: 3.200
+[2024-11-07 15:28:13,114][04584] Num frames 700...
+[2024-11-07 15:28:13,354][04584] Num frames 800...
+[2024-11-07 15:28:13,583][04584] Num frames 900...
+[2024-11-07 15:28:13,801][04584] Num frames 1000...
+[2024-11-07 15:28:13,909][04584] Avg episode rewards: #0: 3.413, true rewards: #0: 3.413
+[2024-11-07 15:28:13,910][04584] Avg episode reward: 3.413, avg true_objective: 3.413
+[2024-11-07 15:28:14,079][04584] Num frames 1100...
+[2024-11-07 15:28:14,288][04584] Num frames 1200...
+[2024-11-07 15:28:14,513][04584] Num frames 1300...
+[2024-11-07 15:28:14,726][04584] Num frames 1400...
+[2024-11-07 15:28:14,806][04584] Avg episode rewards: #0: 3.520, true rewards: #0: 3.520
+[2024-11-07 15:28:14,809][04584] Avg episode reward: 3.520, avg true_objective: 3.520
+[2024-11-07 15:28:15,035][04584] Num frames 1500...
+[2024-11-07 15:28:15,244][04584] Num frames 1600...
+[2024-11-07 15:28:17,568][04584] Num frames 1700...
+[2024-11-07 15:28:17,830][04584] Avg episode rewards: #0: 3.584, true rewards: #0: 3.584
+[2024-11-07 15:28:17,835][04584] Avg episode reward: 3.584, avg true_objective: 3.584
+[2024-11-07 15:28:17,873][04584] Num frames 1800...
+[2024-11-07 15:28:18,082][04584] Num frames 1900...
+[2024-11-07 15:28:18,295][04584] Num frames 2000...
+[2024-11-07 15:28:18,511][04584] Num frames 2100...
+[2024-11-07 15:28:18,737][04584] Num frames 2200...
+[2024-11-07 15:28:18,880][04584] Avg episode rewards: #0: 3.900, true rewards: #0: 3.733
+[2024-11-07 15:28:18,886][04584] Avg episode reward: 3.900, avg true_objective: 3.733
+[2024-11-07 15:28:19,045][04584] Num frames 2300...
+[2024-11-07 15:28:19,258][04584] Num frames 2400...
+[2024-11-07 15:28:19,459][04584] Num frames 2500...
+[2024-11-07 15:28:19,684][04584] Num frames 2600...
+[2024-11-07 15:28:19,802][04584] Avg episode rewards: #0: 3.891, true rewards: #0: 3.749
+[2024-11-07 15:28:19,803][04584] Avg episode reward: 3.891, avg true_objective: 3.749
+[2024-11-07 15:28:19,962][04584] Num frames 2700...
+[2024-11-07 15:28:20,182][04584] Num frames 2800...
+[2024-11-07 15:28:20,404][04584] Num frames 2900...
+[2024-11-07 15:28:20,622][04584] Num frames 3000...
+[2024-11-07 15:28:20,830][04584] Avg episode rewards: #0: 4.090, true rewards: #0: 3.840
+[2024-11-07 15:28:20,835][04584] Avg episode reward: 4.090, avg true_objective: 3.840
+[2024-11-07 15:28:20,924][04584] Num frames 3100...
+[2024-11-07 15:28:21,144][04584] Num frames 3200...
+[2024-11-07 15:28:21,345][04584] Num frames 3300...
+[2024-11-07 15:28:21,589][04584] Num frames 3400...
+[2024-11-07 15:28:21,784][04584] Avg episode rewards: #0: 4.062, true rewards: #0: 3.840
+[2024-11-07 15:28:21,789][04584] Avg episode reward: 4.062, avg true_objective: 3.840
+[2024-11-07 15:28:21,910][04584] Num frames 3500...
+[2024-11-07 15:28:22,124][04584] Num frames 3600...
+[2024-11-07 15:28:22,341][04584] Num frames 3700...
+[2024-11-07 15:28:22,426][04584] Avg episode rewards: #0: 3.912, true rewards: #0: 3.712
+[2024-11-07 15:28:22,431][04584] Avg episode reward: 3.912, avg true_objective: 3.712
+[2024-11-07 15:28:31,547][04584] Replay video saved to /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/replay.mp4!