alidenewade
/

rl_course_vizdoom_health_gathering_supreme

@@ -15,7 +15,7 @@ model-index:
       type: doom_health_gathering_supreme
     metrics:
     - type: mean_reward
-      value: 4.16 +/- 0.40
       name: mean_reward
       verified: false
 ---

       type: doom_health_gathering_supreme
     metrics:
     - type: mean_reward
+      value: 4.00 +/- 0.58
       name: mean_reward
       verified: false
 ---

replay.mp4 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0ebbd919fbdc3030800efb2f1e143d54c8c67bdda23aefd40b82e2eb8c7ac065
-size 6180137

 version https://git-lfs.github.com/spec/v1
+oid sha256:e97dd79cc84566043bc7f37148d9b9076a10c3e97a1ca07a0c143811c8fd34b0
+size 5723967

sf_log.txt CHANGED Viewed

@@ -5805,3 +5805,86 @@ main_loop: 558.4363
 [2024-11-07 14:45:01,777][04584] Avg episode rewards: #0: 4.760, true rewards: #0: 4.160
 [2024-11-07 14:45:01,778][04584] Avg episode reward: 4.760, avg true_objective: 4.160
 [2024-11-07 14:45:10,932][04584] Replay video saved to /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/replay.mp4!

 [2024-11-07 14:45:01,777][04584] Avg episode rewards: #0: 4.760, true rewards: #0: 4.160
 [2024-11-07 14:45:01,778][04584] Avg episode reward: 4.760, avg true_objective: 4.160
 [2024-11-07 14:45:10,932][04584] Replay video saved to /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/replay.mp4!
+[2024-11-07 14:45:22,820][04584] The model has been pushed to https://huggingface.co/alidenewade/rl_course_vizdoom_health_gathering_supreme
+[2024-11-07 14:52:22,743][04584] Loading existing experiment configuration from /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/config.json
+[2024-11-07 14:52:22,744][04584] Overriding arg 'num_workers' with value 1 passed from command line
+[2024-11-07 14:52:22,746][04584] Adding new argument 'no_render'=True that is not in the saved config file!
+[2024-11-07 14:52:22,747][04584] Adding new argument 'save_video'=True that is not in the saved config file!
+[2024-11-07 14:52:22,749][04584] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file!
+[2024-11-07 14:52:22,750][04584] Adding new argument 'video_name'=None that is not in the saved config file!
+[2024-11-07 14:52:22,753][04584] Adding new argument 'max_num_frames'=100000 that is not in the saved config file!
+[2024-11-07 14:52:22,755][04584] Adding new argument 'max_num_episodes'=10 that is not in the saved config file!
+[2024-11-07 14:52:22,756][04584] Adding new argument 'push_to_hub'=True that is not in the saved config file!
+[2024-11-07 14:52:22,757][04584] Adding new argument 'hf_repository'='alidenewade/rl_course_vizdoom_health_gathering_supreme' that is not in the saved config file!
+[2024-11-07 14:52:22,758][04584] Adding new argument 'policy_index'=0 that is not in the saved config file!
+[2024-11-07 14:52:22,761][04584] Adding new argument 'eval_deterministic'=False that is not in the saved config file!
+[2024-11-07 14:52:22,762][04584] Adding new argument 'train_script'=None that is not in the saved config file!
+[2024-11-07 14:52:22,764][04584] Adding new argument 'enjoy_script'=None that is not in the saved config file!
+[2024-11-07 14:52:22,765][04584] Using frameskip 1 and render_action_repeat=4 for evaluation
+[2024-11-07 14:52:22,805][04584] RunningMeanStd input shape: (3, 72, 128)
+[2024-11-07 14:52:22,807][04584] RunningMeanStd input shape: (1,)
+[2024-11-07 14:52:22,823][04584] ConvEncoder: input_channels=3
+[2024-11-07 14:52:22,886][04584] Conv encoder output size: 512
+[2024-11-07 14:52:22,887][04584] Policy head output size: 512
+[2024-11-07 14:52:22,925][04584] Loading state from checkpoint /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000001955_8007680.pth...
+[2024-11-07 14:52:23,495][04584] Num frames 100...
+[2024-11-07 14:52:23,750][04584] Num frames 200...
+[2024-11-07 14:52:23,933][04584] Num frames 300...
+[2024-11-07 14:52:24,160][04584] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840
+[2024-11-07 14:52:24,166][04584] Avg episode reward: 3.840, avg true_objective: 3.840
+[2024-11-07 14:52:24,207][04584] Num frames 400...
+[2024-11-07 14:52:24,395][04584] Num frames 500...
+[2024-11-07 14:52:24,568][04584] Num frames 600...
+[2024-11-07 14:52:24,735][04584] Num frames 700...
+[2024-11-07 14:52:24,905][04584] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840
+[2024-11-07 14:52:24,908][04584] Avg episode reward: 3.840, avg true_objective: 3.840
+[2024-11-07 14:52:24,982][04584] Num frames 800...
+[2024-11-07 14:52:25,141][04584] Num frames 900...
+[2024-11-07 14:52:25,293][04584] Num frames 1000...
+[2024-11-07 14:52:25,495][04584] Num frames 1100...
+[2024-11-07 14:52:25,703][04584] Num frames 1200...
+[2024-11-07 14:52:25,787][04584] Avg episode rewards: #0: 4.387, true rewards: #0: 4.053
+[2024-11-07 14:52:25,789][04584] Avg episode reward: 4.387, avg true_objective: 4.053
+[2024-11-07 14:52:25,973][04584] Num frames 1300...
+[2024-11-07 14:52:26,162][04584] Num frames 1400...
+[2024-11-07 14:52:26,327][04584] Avg episode rewards: #0: 3.925, true rewards: #0: 3.675
+[2024-11-07 14:52:26,328][04584] Avg episode reward: 3.925, avg true_objective: 3.675
+[2024-11-07 14:52:26,390][04584] Num frames 1500...
+[2024-11-07 14:52:26,547][04584] Num frames 1600...
+[2024-11-07 14:52:26,703][04584] Num frames 1700...
+[2024-11-07 14:52:26,865][04584] Num frames 1800...
+[2024-11-07 14:52:27,004][04584] Avg episode rewards: #0: 3.908, true rewards: #0: 3.708
+[2024-11-07 14:52:27,009][04584] Avg episode reward: 3.908, avg true_objective: 3.708
+[2024-11-07 14:52:27,106][04584] Num frames 1900...
+[2024-11-07 14:52:27,292][04584] Num frames 2000...
+[2024-11-07 14:52:27,460][04584] Num frames 2100...
+[2024-11-07 14:52:27,618][04584] Num frames 2200...
+[2024-11-07 14:52:27,770][04584] Num frames 2300...
+[2024-11-07 14:52:27,829][04584] Avg episode rewards: #0: 4.170, true rewards: #0: 3.837
+[2024-11-07 14:52:27,830][04584] Avg episode reward: 4.170, avg true_objective: 3.837
+[2024-11-07 14:52:28,026][04584] Num frames 2400...
+[2024-11-07 14:52:28,195][04584] Num frames 2500...
+[2024-11-07 14:52:28,412][04584] Num frames 2600...
+[2024-11-07 14:52:28,639][04584] Num frames 2700...
+[2024-11-07 14:52:28,757][04584] Avg episode rewards: #0: 4.169, true rewards: #0: 3.883
+[2024-11-07 14:52:28,759][04584] Avg episode reward: 4.169, avg true_objective: 3.883
+[2024-11-07 14:52:28,999][04584] Num frames 2800...
+[2024-11-07 14:52:29,214][04584] Num frames 2900...
+[2024-11-07 14:52:29,416][04584] Num frames 3000...
+[2024-11-07 14:52:29,700][04584] Num frames 3100...
+[2024-11-07 14:52:30,033][04584] Avg episode rewards: #0: 4.498, true rewards: #0: 3.997
+[2024-11-07 14:52:30,037][04584] Avg episode reward: 4.498, avg true_objective: 3.997
+[2024-11-07 14:52:30,057][04584] Num frames 3200...
+[2024-11-07 14:52:30,268][04584] Num frames 3300...
+[2024-11-07 14:52:30,520][04584] Num frames 3400...
+[2024-11-07 14:52:30,893][04584] Num frames 3500...
+[2024-11-07 14:52:31,203][04584] Avg episode rewards: #0: 4.424, true rewards: #0: 3.980
+[2024-11-07 14:52:31,209][04584] Avg episode reward: 4.424, avg true_objective: 3.980
+[2024-11-07 14:52:31,268][04584] Num frames 3600...
+[2024-11-07 14:52:31,510][04584] Num frames 3700...
+[2024-11-07 14:52:31,731][04584] Num frames 3800...
+[2024-11-07 14:52:31,955][04584] Num frames 3900...
+[2024-11-07 14:52:32,276][04584] Avg episode rewards: #0: 4.498, true rewards: #0: 3.998
+[2024-11-07 14:52:32,278][04584] Avg episode reward: 4.498, avg true_objective: 3.998
+[2024-11-07 14:52:32,283][04584] Num frames 4000...
+[2024-11-07 14:52:40,832][04584] Replay video saved to /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/replay.mp4!