alidenewade
/

rl_course_vizdoom_health_gathering_supreme

@@ -15,7 +15,7 @@ model-index:
       type: doom_health_gathering_supreme
     metrics:
     - type: mean_reward
-      value: 3.97 +/- 0.26
       name: mean_reward
       verified: false
 ---

       type: doom_health_gathering_supreme
     metrics:
     - type: mean_reward
+      value: 4.16 +/- 0.43
       name: mean_reward
       verified: false
 ---

replay.mp4 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:62e8342934d8c6c88ead48b8939739dc42c639fd96f7aede0ea5d35c8be70345
-size 5839479

 version https://git-lfs.github.com/spec/v1
+oid sha256:e4400a542b41ef1ba1ffb960e294df641800f7312d7253a079285a19b13c8f2d
+size 5915714

sf_log.txt CHANGED Viewed

@@ -4822,3 +4822,87 @@ main_loop: 54.9388
 [2024-11-07 14:21:43,963][01364] Avg episode rewards: #0: 4.168, true rewards: #0: 3.968
 [2024-11-07 14:21:43,965][01364] Avg episode reward: 4.168, avg true_objective: 3.968
 [2024-11-07 14:21:56,432][01364] Replay video saved to /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/replay.mp4!

 [2024-11-07 14:21:43,963][01364] Avg episode rewards: #0: 4.168, true rewards: #0: 3.968
 [2024-11-07 14:21:43,965][01364] Avg episode reward: 4.168, avg true_objective: 3.968
 [2024-11-07 14:21:56,432][01364] Replay video saved to /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/replay.mp4!
+[2024-11-07 14:22:10,194][01364] The model has been pushed to https://huggingface.co/alidenewade/rl_course_vizdoom_health_gathering_supreme
+[2024-11-07 14:22:17,362][01364] Loading existing experiment configuration from /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/config.json
+[2024-11-07 14:22:17,364][01364] Overriding arg 'num_workers' with value 1 passed from command line
+[2024-11-07 14:22:17,367][01364] Adding new argument 'no_render'=True that is not in the saved config file!
+[2024-11-07 14:22:17,370][01364] Adding new argument 'save_video'=True that is not in the saved config file!
+[2024-11-07 14:22:17,372][01364] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file!
+[2024-11-07 14:22:17,375][01364] Adding new argument 'video_name'=None that is not in the saved config file!
+[2024-11-07 14:22:17,377][01364] Adding new argument 'max_num_frames'=100000 that is not in the saved config file!
+[2024-11-07 14:22:17,382][01364] Adding new argument 'max_num_episodes'=10 that is not in the saved config file!
+[2024-11-07 14:22:17,384][01364] Adding new argument 'push_to_hub'=True that is not in the saved config file!
+[2024-11-07 14:22:17,385][01364] Adding new argument 'hf_repository'='alidenewade/rl_course_vizdoom_health_gathering_supreme' that is not in the saved config file!
+[2024-11-07 14:22:17,388][01364] Adding new argument 'policy_index'=0 that is not in the saved config file!
+[2024-11-07 14:22:17,389][01364] Adding new argument 'eval_deterministic'=False that is not in the saved config file!
+[2024-11-07 14:22:17,392][01364] Adding new argument 'train_script'=None that is not in the saved config file!
+[2024-11-07 14:22:17,396][01364] Adding new argument 'enjoy_script'=None that is not in the saved config file!
+[2024-11-07 14:22:17,398][01364] Using frameskip 1 and render_action_repeat=4 for evaluation
+[2024-11-07 14:22:17,445][01364] RunningMeanStd input shape: (3, 72, 128)
+[2024-11-07 14:22:17,450][01364] RunningMeanStd input shape: (1,)
+[2024-11-07 14:22:17,485][01364] ConvEncoder: input_channels=3
+[2024-11-07 14:22:17,580][01364] Conv encoder output size: 512
+[2024-11-07 14:22:17,582][01364] Policy head output size: 512
+[2024-11-07 14:22:17,619][01364] Loading state from checkpoint /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000000986_4038656.pth...
+[2024-11-07 14:22:18,441][01364] Num frames 100...
+[2024-11-07 14:22:18,875][01364] Num frames 200...
+[2024-11-07 14:22:19,235][01364] Num frames 300...
+[2024-11-07 14:22:19,612][01364] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840
+[2024-11-07 14:22:19,614][01364] Avg episode reward: 3.840, avg true_objective: 3.840
+[2024-11-07 14:22:19,694][01364] Num frames 400...
+[2024-11-07 14:22:20,083][01364] Num frames 500...
+[2024-11-07 14:22:20,493][01364] Num frames 600...
+[2024-11-07 14:22:20,960][01364] Num frames 700...
+[2024-11-07 14:22:21,365][01364] Num frames 800...
+[2024-11-07 14:22:21,518][01364] Avg episode rewards: #0: 4.660, true rewards: #0: 4.160
+[2024-11-07 14:22:21,519][01364] Avg episode reward: 4.660, avg true_objective: 4.160
+[2024-11-07 14:22:21,722][01364] Num frames 900...
+[2024-11-07 14:22:22,052][01364] Num frames 1000...
+[2024-11-07 14:22:22,332][01364] Num frames 1100...
+[2024-11-07 14:22:22,612][01364] Num frames 1200...
+[2024-11-07 14:22:22,926][01364] Avg episode rewards: #0: 4.933, true rewards: #0: 4.267
+[2024-11-07 14:22:22,927][01364] Avg episode reward: 4.933, avg true_objective: 4.267
+[2024-11-07 14:22:23,001][01364] Num frames 1300...
+[2024-11-07 14:22:23,320][01364] Num frames 1400...
+[2024-11-07 14:22:23,606][01364] Num frames 1500...
+[2024-11-07 14:22:23,880][01364] Num frames 1600...
+[2024-11-07 14:22:24,135][01364] Avg episode rewards: #0: 4.660, true rewards: #0: 4.160
+[2024-11-07 14:22:24,136][01364] Avg episode reward: 4.660, avg true_objective: 4.160
+[2024-11-07 14:22:24,254][01364] Num frames 1700...
+[2024-11-07 14:22:24,530][01364] Num frames 1800...
+[2024-11-07 14:22:24,780][01364] Num frames 1900...
+[2024-11-07 14:22:25,066][01364] Num frames 2000...
+[2024-11-07 14:22:25,268][01364] Avg episode rewards: #0: 4.496, true rewards: #0: 4.096
+[2024-11-07 14:22:25,274][01364] Avg episode reward: 4.496, avg true_objective: 4.096
+[2024-11-07 14:22:25,413][01364] Num frames 2100...
+[2024-11-07 14:22:25,642][01364] Num frames 2200...
+[2024-11-07 14:22:25,905][01364] Num frames 2300...
+[2024-11-07 14:22:26,208][01364] Num frames 2400...
+[2024-11-07 14:22:26,349][01364] Avg episode rewards: #0: 4.387, true rewards: #0: 4.053
+[2024-11-07 14:22:26,350][01364] Avg episode reward: 4.387, avg true_objective: 4.053
+[2024-11-07 14:22:26,528][01364] Num frames 2500...
+[2024-11-07 14:22:26,795][01364] Num frames 2600...
+[2024-11-07 14:22:27,084][01364] Num frames 2700...
+[2024-11-07 14:22:27,366][01364] Num frames 2800...
+[2024-11-07 14:22:29,207][01364] Num frames 2900...
+[2024-11-07 14:22:29,387][01364] Avg episode rewards: #0: 4.777, true rewards: #0: 4.206
+[2024-11-07 14:22:29,392][01364] Avg episode reward: 4.777, avg true_objective: 4.206
+[2024-11-07 14:22:29,605][01364] Num frames 3000...
+[2024-11-07 14:22:29,954][01364] Num frames 3100...
+[2024-11-07 14:22:30,304][01364] Num frames 3200...
+[2024-11-07 14:22:30,773][01364] Num frames 3300...
+[2024-11-07 14:22:30,997][01364] Avg episode rewards: #0: 4.660, true rewards: #0: 4.160
+[2024-11-07 14:22:30,999][01364] Avg episode reward: 4.660, avg true_objective: 4.160
+[2024-11-07 14:22:31,199][01364] Num frames 3400...
+[2024-11-07 14:22:31,513][01364] Num frames 3500...
+[2024-11-07 14:22:31,765][01364] Num frames 3600...
+[2024-11-07 14:22:32,016][01364] Num frames 3700...
+[2024-11-07 14:22:32,260][01364] Avg episode rewards: #0: 4.751, true rewards: #0: 4.196
+[2024-11-07 14:22:32,266][01364] Avg episode reward: 4.751, avg true_objective: 4.196
+[2024-11-07 14:22:32,344][01364] Num frames 3800...
+[2024-11-07 14:22:32,570][01364] Num frames 3900...
+[2024-11-07 14:22:32,817][01364] Num frames 4000...
+[2024-11-07 14:22:33,072][01364] Num frames 4100...
+[2024-11-07 14:22:33,260][01364] Avg episode rewards: #0: 4.660, true rewards: #0: 4.160
+[2024-11-07 14:22:33,261][01364] Avg episode reward: 4.660, avg true_objective: 4.160
+[2024-11-07 14:22:46,334][01364] Replay video saved to /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/replay.mp4!