Upload folder using huggingface_hub
Browse files- .summary/0/events.out.tfevents.1730982413.ali +0 -0
- .summary/0/events.out.tfevents.1730982684.ali +0 -0
- .summary/0/events.out.tfevents.1730982741.ali +3 -0
- .summary/0/events.out.tfevents.1730982907.ali +3 -0
- README.md +1 -1
- checkpoint_p0/checkpoint_000001806_7397376.pth +3 -0
- checkpoint_p0/checkpoint_000001955_8007680.pth +3 -0
- config.json +1 -1
- replay.mp4 +2 -2
- sf_log.txt +899 -0
.summary/0/events.out.tfevents.1730982413.ali
ADDED
|
File without changes
|
.summary/0/events.out.tfevents.1730982684.ali
ADDED
|
File without changes
|
.summary/0/events.out.tfevents.1730982741.ali
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:814f1a05c62696ae377c67fe385aa2b78f2e1f1bb048a26fdfd6d51ce9706129
|
| 3 |
+
size 40
|
.summary/0/events.out.tfevents.1730982907.ali
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b77ed93e786f21de8587f4a2cf8852dbda3dbf10c6afaab58ea577ed51e36a71
|
| 3 |
+
size 418702
|
README.md
CHANGED
|
@@ -15,7 +15,7 @@ model-index:
|
|
| 15 |
type: doom_health_gathering_supreme
|
| 16 |
metrics:
|
| 17 |
- type: mean_reward
|
| 18 |
-
value: 4.16 +/- 0.
|
| 19 |
name: mean_reward
|
| 20 |
verified: false
|
| 21 |
---
|
|
|
|
| 15 |
type: doom_health_gathering_supreme
|
| 16 |
metrics:
|
| 17 |
- type: mean_reward
|
| 18 |
+
value: 4.16 +/- 0.40
|
| 19 |
name: mean_reward
|
| 20 |
verified: false
|
| 21 |
---
|
checkpoint_p0/checkpoint_000001806_7397376.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:45da3df3b08606fd196e42a86db3419ab04d7a53e4a0dabcdbd1d4a373ff64fc
|
| 3 |
+
size 34929669
|
checkpoint_p0/checkpoint_000001955_8007680.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3c40896e490d9f61fa149c75461188d66fac389407b5485ac47f0d95ca71c25c
|
| 3 |
+
size 34929669
|
config.json
CHANGED
|
@@ -65,7 +65,7 @@
|
|
| 65 |
"summaries_use_frameskip": true,
|
| 66 |
"heartbeat_interval": 20,
|
| 67 |
"heartbeat_reporting_interval": 600,
|
| 68 |
-
"train_for_env_steps":
|
| 69 |
"train_for_seconds": 10000000000,
|
| 70 |
"save_every_sec": 120,
|
| 71 |
"keep_checkpoints": 2,
|
|
|
|
| 65 |
"summaries_use_frameskip": true,
|
| 66 |
"heartbeat_interval": 20,
|
| 67 |
"heartbeat_reporting_interval": 600,
|
| 68 |
+
"train_for_env_steps": 8000000,
|
| 69 |
"train_for_seconds": 10000000000,
|
| 70 |
"save_every_sec": 120,
|
| 71 |
"keep_checkpoints": 2,
|
replay.mp4
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0ebbd919fbdc3030800efb2f1e143d54c8c67bdda23aefd40b82e2eb8c7ac065
|
| 3 |
+
size 6180137
|
sf_log.txt
CHANGED
|
@@ -4906,3 +4906,902 @@ main_loop: 54.9388
|
|
| 4906 |
[2024-11-07 14:22:33,260][01364] Avg episode rewards: #0: 4.660, true rewards: #0: 4.160
|
| 4907 |
[2024-11-07 14:22:33,261][01364] Avg episode reward: 4.660, avg true_objective: 4.160
|
| 4908 |
[2024-11-07 14:22:46,334][01364] Replay video saved to /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/replay.mp4!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4906 |
[2024-11-07 14:22:33,260][01364] Avg episode rewards: #0: 4.660, true rewards: #0: 4.160
|
| 4907 |
[2024-11-07 14:22:33,261][01364] Avg episode reward: 4.660, avg true_objective: 4.160
|
| 4908 |
[2024-11-07 14:22:46,334][01364] Replay video saved to /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/replay.mp4!
|
| 4909 |
+
[2024-11-07 14:22:51,976][01364] The model has been pushed to https://huggingface.co/alidenewade/rl_course_vizdoom_health_gathering_supreme
|
| 4910 |
+
[2024-11-07 14:26:53,545][01364] Environment doom_basic already registered, overwriting...
|
| 4911 |
+
[2024-11-07 14:26:53,548][01364] Environment doom_two_colors_easy already registered, overwriting...
|
| 4912 |
+
[2024-11-07 14:26:53,550][01364] Environment doom_two_colors_hard already registered, overwriting...
|
| 4913 |
+
[2024-11-07 14:26:53,552][01364] Environment doom_dm already registered, overwriting...
|
| 4914 |
+
[2024-11-07 14:26:53,553][01364] Environment doom_dwango5 already registered, overwriting...
|
| 4915 |
+
[2024-11-07 14:26:53,555][01364] Environment doom_my_way_home_flat_actions already registered, overwriting...
|
| 4916 |
+
[2024-11-07 14:26:53,556][01364] Environment doom_defend_the_center_flat_actions already registered, overwriting...
|
| 4917 |
+
[2024-11-07 14:26:53,558][01364] Environment doom_my_way_home already registered, overwriting...
|
| 4918 |
+
[2024-11-07 14:26:53,559][01364] Environment doom_deadly_corridor already registered, overwriting...
|
| 4919 |
+
[2024-11-07 14:26:53,561][01364] Environment doom_defend_the_center already registered, overwriting...
|
| 4920 |
+
[2024-11-07 14:26:53,564][01364] Environment doom_defend_the_line already registered, overwriting...
|
| 4921 |
+
[2024-11-07 14:26:53,565][01364] Environment doom_health_gathering already registered, overwriting...
|
| 4922 |
+
[2024-11-07 14:26:53,565][01364] Environment doom_health_gathering_supreme already registered, overwriting...
|
| 4923 |
+
[2024-11-07 14:26:53,568][01364] Environment doom_battle already registered, overwriting...
|
| 4924 |
+
[2024-11-07 14:26:53,569][01364] Environment doom_battle2 already registered, overwriting...
|
| 4925 |
+
[2024-11-07 14:26:53,570][01364] Environment doom_duel_bots already registered, overwriting...
|
| 4926 |
+
[2024-11-07 14:26:53,572][01364] Environment doom_deathmatch_bots already registered, overwriting...
|
| 4927 |
+
[2024-11-07 14:26:53,574][01364] Environment doom_duel already registered, overwriting...
|
| 4928 |
+
[2024-11-07 14:26:53,575][01364] Environment doom_deathmatch_full already registered, overwriting...
|
| 4929 |
+
[2024-11-07 14:26:53,577][01364] Environment doom_benchmark already registered, overwriting...
|
| 4930 |
+
[2024-11-07 14:26:53,579][01364] register_encoder_factory: <function make_vizdoom_encoder at 0x7f6746896950>
|
| 4931 |
+
[2024-11-07 14:26:53,595][01364] Loading existing experiment configuration from /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/config.json
|
| 4932 |
+
[2024-11-07 14:26:53,597][01364] Overriding arg 'env' with value 'LunarLander-v2' passed from command line
|
| 4933 |
+
[2024-11-07 14:26:53,603][01364] Experiment dir /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment already exists!
|
| 4934 |
+
[2024-11-07 14:26:53,604][01364] Resuming existing experiment from /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment...
|
| 4935 |
+
[2024-11-07 14:26:53,607][01364] Weights and Biases integration disabled
|
| 4936 |
+
[2024-11-07 14:26:53,612][01364] Environment var CUDA_VISIBLE_DEVICES is 0
|
| 4937 |
+
|
| 4938 |
+
[2024-11-07 14:32:23,905][03851] Saving configuration to /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/config.json...
|
| 4939 |
+
[2024-11-07 14:32:23,907][03851] Rollout worker 0 uses device cpu
|
| 4940 |
+
[2024-11-07 14:32:23,908][03851] Rollout worker 1 uses device cpu
|
| 4941 |
+
[2024-11-07 14:32:23,909][03851] Rollout worker 2 uses device cpu
|
| 4942 |
+
[2024-11-07 14:32:23,911][03851] Rollout worker 3 uses device cpu
|
| 4943 |
+
[2024-11-07 14:32:23,913][03851] Rollout worker 4 uses device cpu
|
| 4944 |
+
[2024-11-07 14:32:23,914][03851] Rollout worker 5 uses device cpu
|
| 4945 |
+
[2024-11-07 14:32:23,916][03851] Rollout worker 6 uses device cpu
|
| 4946 |
+
[2024-11-07 14:32:23,918][03851] Rollout worker 7 uses device cpu
|
| 4947 |
+
[2024-11-07 14:32:23,982][03851] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
| 4948 |
+
[2024-11-07 14:32:23,983][03851] InferenceWorker_p0-w0: min num requests: 2
|
| 4949 |
+
[2024-11-07 14:32:24,069][03851] Starting all processes...
|
| 4950 |
+
[2024-11-07 14:32:24,070][03851] Starting process learner_proc0
|
| 4951 |
+
[2024-11-07 14:32:24,119][03851] Starting all processes...
|
| 4952 |
+
[2024-11-07 14:32:24,126][03851] Starting process inference_proc0-0
|
| 4953 |
+
[2024-11-07 14:32:24,126][03851] Starting process rollout_proc0
|
| 4954 |
+
[2024-11-07 14:32:24,127][03851] Starting process rollout_proc1
|
| 4955 |
+
[2024-11-07 14:32:24,128][03851] Starting process rollout_proc2
|
| 4956 |
+
[2024-11-07 14:32:24,130][03851] Starting process rollout_proc3
|
| 4957 |
+
[2024-11-07 14:32:24,131][03851] Starting process rollout_proc4
|
| 4958 |
+
[2024-11-07 14:32:24,132][03851] Starting process rollout_proc5
|
| 4959 |
+
[2024-11-07 14:32:24,133][03851] Starting process rollout_proc6
|
| 4960 |
+
[2024-11-07 14:32:24,134][03851] Starting process rollout_proc7
|
| 4961 |
+
[2024-11-07 14:32:28,240][04103] Worker 3 uses CPU cores [3]
|
| 4962 |
+
[2024-11-07 14:32:28,599][04099] Worker 0 uses CPU cores [0]
|
| 4963 |
+
[2024-11-07 14:32:28,800][04102] Worker 2 uses CPU cores [2]
|
| 4964 |
+
[2024-11-07 14:32:28,820][04086] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
| 4965 |
+
[2024-11-07 14:32:28,820][04086] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0
|
| 4966 |
+
[2024-11-07 14:32:28,984][04086] Num visible devices: 1
|
| 4967 |
+
[2024-11-07 14:32:29,023][04086] Starting seed is not provided
|
| 4968 |
+
[2024-11-07 14:32:29,023][04086] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
| 4969 |
+
[2024-11-07 14:32:29,023][04086] Initializing actor-critic model on device cuda:0
|
| 4970 |
+
[2024-11-07 14:32:29,024][04086] RunningMeanStd input shape: (3, 72, 128)
|
| 4971 |
+
[2024-11-07 14:32:29,030][04086] RunningMeanStd input shape: (1,)
|
| 4972 |
+
[2024-11-07 14:32:29,031][04101] Worker 1 uses CPU cores [1]
|
| 4973 |
+
[2024-11-07 14:32:29,065][04086] ConvEncoder: input_channels=3
|
| 4974 |
+
[2024-11-07 14:32:29,179][04105] Worker 5 uses CPU cores [5]
|
| 4975 |
+
[2024-11-07 14:32:29,418][04086] Conv encoder output size: 512
|
| 4976 |
+
[2024-11-07 14:32:29,418][04086] Policy head output size: 512
|
| 4977 |
+
[2024-11-07 14:32:29,488][04086] Created Actor Critic model with architecture:
|
| 4978 |
+
[2024-11-07 14:32:29,488][04086] ActorCriticSharedWeights(
|
| 4979 |
+
(obs_normalizer): ObservationNormalizer(
|
| 4980 |
+
(running_mean_std): RunningMeanStdDictInPlace(
|
| 4981 |
+
(running_mean_std): ModuleDict(
|
| 4982 |
+
(obs): RunningMeanStdInPlace()
|
| 4983 |
+
)
|
| 4984 |
+
)
|
| 4985 |
+
)
|
| 4986 |
+
(returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace)
|
| 4987 |
+
(encoder): VizdoomEncoder(
|
| 4988 |
+
(basic_encoder): ConvEncoder(
|
| 4989 |
+
(enc): RecursiveScriptModule(
|
| 4990 |
+
original_name=ConvEncoderImpl
|
| 4991 |
+
(conv_head): RecursiveScriptModule(
|
| 4992 |
+
original_name=Sequential
|
| 4993 |
+
(0): RecursiveScriptModule(original_name=Conv2d)
|
| 4994 |
+
(1): RecursiveScriptModule(original_name=ELU)
|
| 4995 |
+
(2): RecursiveScriptModule(original_name=Conv2d)
|
| 4996 |
+
(3): RecursiveScriptModule(original_name=ELU)
|
| 4997 |
+
(4): RecursiveScriptModule(original_name=Conv2d)
|
| 4998 |
+
(5): RecursiveScriptModule(original_name=ELU)
|
| 4999 |
+
)
|
| 5000 |
+
(mlp_layers): RecursiveScriptModule(
|
| 5001 |
+
original_name=Sequential
|
| 5002 |
+
(0): RecursiveScriptModule(original_name=Linear)
|
| 5003 |
+
(1): RecursiveScriptModule(original_name=ELU)
|
| 5004 |
+
)
|
| 5005 |
+
)
|
| 5006 |
+
)
|
| 5007 |
+
)
|
| 5008 |
+
(core): ModelCoreRNN(
|
| 5009 |
+
(core): GRU(512, 512)
|
| 5010 |
+
)
|
| 5011 |
+
(decoder): MlpDecoder(
|
| 5012 |
+
(mlp): Identity()
|
| 5013 |
+
)
|
| 5014 |
+
(critic_linear): Linear(in_features=512, out_features=1, bias=True)
|
| 5015 |
+
(action_parameterization): ActionParameterizationDefault(
|
| 5016 |
+
(distribution_linear): Linear(in_features=512, out_features=4, bias=True)
|
| 5017 |
+
)
|
| 5018 |
+
)
|
| 5019 |
+
[2024-11-07 14:32:29,721][04100] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
| 5020 |
+
[2024-11-07 14:32:29,722][04100] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0
|
| 5021 |
+
[2024-11-07 14:32:29,759][04100] Num visible devices: 1
|
| 5022 |
+
[2024-11-07 14:32:29,806][04106] Worker 6 uses CPU cores [6]
|
| 5023 |
+
[2024-11-07 14:32:29,878][04107] Worker 7 uses CPU cores [0, 1, 2, 3, 4, 5, 6]
|
| 5024 |
+
[2024-11-07 14:32:29,920][04104] Worker 4 uses CPU cores [4]
|
| 5025 |
+
[2024-11-07 14:32:30,421][04086] Using optimizer <class 'torch.optim.adam.Adam'>
|
| 5026 |
+
[2024-11-07 14:32:31,436][04086] Loading state from checkpoint /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000000986_4038656.pth...
|
| 5027 |
+
[2024-11-07 14:32:31,484][04086] Loading model from checkpoint
|
| 5028 |
+
[2024-11-07 14:32:31,485][04086] EvtLoop [learner_proc0_evt_loop, process=learner_proc0] unhandled exception in slot='init' connected to emitter=Emitter(object_id='Runner_EvtLoop', signal_name='start'), args=()
|
| 5029 |
+
Traceback (most recent call last):
|
| 5030 |
+
File "/root/miniconda3/envs/unit8/lib/python3.10/site-packages/signal_slot/signal_slot.py", line 355, in _process_signal
|
| 5031 |
+
slot_callable(*args)
|
| 5032 |
+
File "/root/miniconda3/envs/unit8/lib/python3.10/site-packages/sample_factory/algo/learning/learner_worker.py", line 139, in init
|
| 5033 |
+
init_model_data = self.learner.init()
|
| 5034 |
+
File "/root/miniconda3/envs/unit8/lib/python3.10/site-packages/sample_factory/algo/learning/learner.py", line 245, in init
|
| 5035 |
+
self.load_from_checkpoint(self.policy_id)
|
| 5036 |
+
File "/root/miniconda3/envs/unit8/lib/python3.10/site-packages/sample_factory/algo/learning/learner.py", line 307, in load_from_checkpoint
|
| 5037 |
+
self._load_state(checkpoint_dict, load_progress=load_progress)
|
| 5038 |
+
File "/root/miniconda3/envs/unit8/lib/python3.10/site-packages/sample_factory/algo/learning/learner.py", line 291, in _load_state
|
| 5039 |
+
self.actor_critic.load_state_dict(checkpoint_dict["model"])
|
| 5040 |
+
File "/root/miniconda3/envs/unit8/lib/python3.10/site-packages/torch/nn/modules/module.py", line 2584, in load_state_dict
|
| 5041 |
+
raise RuntimeError(
|
| 5042 |
+
RuntimeError: Error(s) in loading state_dict for ActorCriticSharedWeights:
|
| 5043 |
+
size mismatch for action_parameterization.distribution_linear.weight: copying a param with shape torch.Size([5, 512]) from checkpoint, the shape in current model is torch.Size([4, 512]).
|
| 5044 |
+
size mismatch for action_parameterization.distribution_linear.bias: copying a param with shape torch.Size([5]) from checkpoint, the shape in current model is torch.Size([4]).
|
| 5045 |
+
[2024-11-07 14:32:31,488][04086] Unhandled exception Error(s) in loading state_dict for ActorCriticSharedWeights:
|
| 5046 |
+
size mismatch for action_parameterization.distribution_linear.weight: copying a param with shape torch.Size([5, 512]) from checkpoint, the shape in current model is torch.Size([4, 512]).
|
| 5047 |
+
size mismatch for action_parameterization.distribution_linear.bias: copying a param with shape torch.Size([5]) from checkpoint, the shape in current model is torch.Size([4]). in evt loop learner_proc0_evt_loop
|
| 5048 |
+
[2024-11-07 14:32:43,973][03851] Heartbeat connected on Batcher_0
|
| 5049 |
+
[2024-11-07 14:32:43,982][03851] Heartbeat connected on InferenceWorker_p0-w0
|
| 5050 |
+
[2024-11-07 14:32:43,989][03851] Heartbeat connected on RolloutWorker_w0
|
| 5051 |
+
[2024-11-07 14:32:43,992][03851] Heartbeat connected on RolloutWorker_w1
|
| 5052 |
+
[2024-11-07 14:32:43,997][03851] Heartbeat connected on RolloutWorker_w2
|
| 5053 |
+
[2024-11-07 14:32:44,000][03851] Heartbeat connected on RolloutWorker_w3
|
| 5054 |
+
[2024-11-07 14:32:44,004][03851] Heartbeat connected on RolloutWorker_w4
|
| 5055 |
+
[2024-11-07 14:32:44,007][03851] Heartbeat connected on RolloutWorker_w5
|
| 5056 |
+
[2024-11-07 14:32:44,067][03851] Heartbeat connected on RolloutWorker_w6
|
| 5057 |
+
[2024-11-07 14:32:44,068][03851] Heartbeat connected on RolloutWorker_w7
|
| 5058 |
+
[2024-11-07 14:34:40,019][03851] Keyboard interrupt detected in the event loop EvtLoop [Runner_EvtLoop, process=main process 3851], exiting...
|
| 5059 |
+
[2024-11-07 14:34:40,023][04100] Stopping InferenceWorker_p0-w0...
|
| 5060 |
+
[2024-11-07 14:34:40,023][04106] Stopping RolloutWorker_w6...
|
| 5061 |
+
[2024-11-07 14:34:40,023][04099] Stopping RolloutWorker_w0...
|
| 5062 |
+
[2024-11-07 14:34:40,023][04106] Loop rollout_proc6_evt_loop terminating...
|
| 5063 |
+
[2024-11-07 14:34:40,023][04100] Loop inference_proc0-0_evt_loop terminating...
|
| 5064 |
+
[2024-11-07 14:34:40,023][04099] Loop rollout_proc0_evt_loop terminating...
|
| 5065 |
+
[2024-11-07 14:34:40,022][04107] Stopping RolloutWorker_w7...
|
| 5066 |
+
[2024-11-07 14:34:40,024][04107] Loop rollout_proc7_evt_loop terminating...
|
| 5067 |
+
[2024-11-07 14:34:40,024][04104] Stopping RolloutWorker_w4...
|
| 5068 |
+
[2024-11-07 14:34:40,026][04104] Loop rollout_proc4_evt_loop terminating...
|
| 5069 |
+
[2024-11-07 14:34:40,023][03851] Runner profile tree view:
|
| 5070 |
+
main_loop: 135.9546
|
| 5071 |
+
[2024-11-07 14:34:40,027][04086] Stopping Batcher_0...
|
| 5072 |
+
[2024-11-07 14:34:40,028][04086] Loop batcher_evt_loop terminating...
|
| 5073 |
+
[2024-11-07 14:34:40,027][03851] Collected {}, FPS: 0.0
|
| 5074 |
+
[2024-11-07 14:34:40,029][04102] Stopping RolloutWorker_w2...
|
| 5075 |
+
[2024-11-07 14:34:40,029][04102] Loop rollout_proc2_evt_loop terminating...
|
| 5076 |
+
[2024-11-07 14:34:40,030][04105] Stopping RolloutWorker_w5...
|
| 5077 |
+
[2024-11-07 14:34:40,031][04105] Loop rollout_proc5_evt_loop terminating...
|
| 5078 |
+
[2024-11-07 14:34:40,034][04101] Stopping RolloutWorker_w1...
|
| 5079 |
+
[2024-11-07 14:34:40,040][04101] Loop rollout_proc1_evt_loop terminating...
|
| 5080 |
+
[2024-11-07 14:34:40,042][04103] Stopping RolloutWorker_w3...
|
| 5081 |
+
[2024-11-07 14:34:40,046][04103] Loop rollout_proc3_evt_loop terminating...
|
| 5082 |
+
[2024-11-07 14:35:11,133][04584] Saving configuration to /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/config.json...
|
| 5083 |
+
[2024-11-07 14:35:11,135][04584] Rollout worker 0 uses device cpu
|
| 5084 |
+
[2024-11-07 14:35:11,136][04584] Rollout worker 1 uses device cpu
|
| 5085 |
+
[2024-11-07 14:35:11,138][04584] Rollout worker 2 uses device cpu
|
| 5086 |
+
[2024-11-07 14:35:11,139][04584] Rollout worker 3 uses device cpu
|
| 5087 |
+
[2024-11-07 14:35:11,141][04584] Rollout worker 4 uses device cpu
|
| 5088 |
+
[2024-11-07 14:35:11,142][04584] Rollout worker 5 uses device cpu
|
| 5089 |
+
[2024-11-07 14:35:11,144][04584] Rollout worker 6 uses device cpu
|
| 5090 |
+
[2024-11-07 14:35:11,146][04584] Rollout worker 7 uses device cpu
|
| 5091 |
+
[2024-11-07 14:35:11,205][04584] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
| 5092 |
+
[2024-11-07 14:35:11,206][04584] InferenceWorker_p0-w0: min num requests: 2
|
| 5093 |
+
[2024-11-07 14:35:11,239][04584] Starting all processes...
|
| 5094 |
+
[2024-11-07 14:35:11,241][04584] Starting process learner_proc0
|
| 5095 |
+
[2024-11-07 14:35:11,376][04584] Starting all processes...
|
| 5096 |
+
[2024-11-07 14:35:11,383][04584] Starting process inference_proc0-0
|
| 5097 |
+
[2024-11-07 14:35:11,384][04584] Starting process rollout_proc0
|
| 5098 |
+
[2024-11-07 14:35:11,385][04584] Starting process rollout_proc1
|
| 5099 |
+
[2024-11-07 14:35:11,385][04584] Starting process rollout_proc2
|
| 5100 |
+
[2024-11-07 14:35:11,386][04584] Starting process rollout_proc3
|
| 5101 |
+
[2024-11-07 14:35:11,387][04584] Starting process rollout_proc4
|
| 5102 |
+
[2024-11-07 14:35:11,388][04584] Starting process rollout_proc5
|
| 5103 |
+
[2024-11-07 14:35:11,390][04584] Starting process rollout_proc6
|
| 5104 |
+
[2024-11-07 14:35:11,390][04584] Starting process rollout_proc7
|
| 5105 |
+
[2024-11-07 14:35:16,833][04708] Worker 6 uses CPU cores [6]
|
| 5106 |
+
[2024-11-07 14:35:16,839][04706] Worker 4 uses CPU cores [4]
|
| 5107 |
+
[2024-11-07 14:35:17,166][04709] Worker 7 uses CPU cores [0, 1, 2, 3, 4, 5, 6]
|
| 5108 |
+
[2024-11-07 14:35:17,188][04705] Worker 2 uses CPU cores [2]
|
| 5109 |
+
[2024-11-07 14:35:17,642][04702] Worker 0 uses CPU cores [0]
|
| 5110 |
+
[2024-11-07 14:35:17,899][04707] Worker 5 uses CPU cores [5]
|
| 5111 |
+
[2024-11-07 14:35:17,907][04701] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
| 5112 |
+
[2024-11-07 14:35:17,907][04701] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0
|
| 5113 |
+
[2024-11-07 14:35:17,932][04703] Worker 1 uses CPU cores [1]
|
| 5114 |
+
[2024-11-07 14:35:17,951][04688] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
| 5115 |
+
[2024-11-07 14:35:17,952][04688] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0
|
| 5116 |
+
[2024-11-07 14:35:17,978][04688] Num visible devices: 1
|
| 5117 |
+
[2024-11-07 14:35:17,978][04701] Num visible devices: 1
|
| 5118 |
+
[2024-11-07 14:35:17,993][04688] Starting seed is not provided
|
| 5119 |
+
[2024-11-07 14:35:17,993][04688] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
| 5120 |
+
[2024-11-07 14:35:17,994][04688] Initializing actor-critic model on device cuda:0
|
| 5121 |
+
[2024-11-07 14:35:17,994][04688] RunningMeanStd input shape: (3, 72, 128)
|
| 5122 |
+
[2024-11-07 14:35:17,995][04688] RunningMeanStd input shape: (1,)
|
| 5123 |
+
[2024-11-07 14:35:18,006][04688] ConvEncoder: input_channels=3
|
| 5124 |
+
[2024-11-07 14:35:18,132][04688] Conv encoder output size: 512
|
| 5125 |
+
[2024-11-07 14:35:18,133][04688] Policy head output size: 512
|
| 5126 |
+
[2024-11-07 14:35:18,148][04688] Created Actor Critic model with architecture:
|
| 5127 |
+
[2024-11-07 14:35:18,148][04688] ActorCriticSharedWeights(
|
| 5128 |
+
(obs_normalizer): ObservationNormalizer(
|
| 5129 |
+
(running_mean_std): RunningMeanStdDictInPlace(
|
| 5130 |
+
(running_mean_std): ModuleDict(
|
| 5131 |
+
(obs): RunningMeanStdInPlace()
|
| 5132 |
+
)
|
| 5133 |
+
)
|
| 5134 |
+
)
|
| 5135 |
+
(returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace)
|
| 5136 |
+
(encoder): VizdoomEncoder(
|
| 5137 |
+
(basic_encoder): ConvEncoder(
|
| 5138 |
+
(enc): RecursiveScriptModule(
|
| 5139 |
+
original_name=ConvEncoderImpl
|
| 5140 |
+
(conv_head): RecursiveScriptModule(
|
| 5141 |
+
original_name=Sequential
|
| 5142 |
+
(0): RecursiveScriptModule(original_name=Conv2d)
|
| 5143 |
+
(1): RecursiveScriptModule(original_name=ELU)
|
| 5144 |
+
(2): RecursiveScriptModule(original_name=Conv2d)
|
| 5145 |
+
(3): RecursiveScriptModule(original_name=ELU)
|
| 5146 |
+
(4): RecursiveScriptModule(original_name=Conv2d)
|
| 5147 |
+
(5): RecursiveScriptModule(original_name=ELU)
|
| 5148 |
+
)
|
| 5149 |
+
(mlp_layers): RecursiveScriptModule(
|
| 5150 |
+
original_name=Sequential
|
| 5151 |
+
(0): RecursiveScriptModule(original_name=Linear)
|
| 5152 |
+
(1): RecursiveScriptModule(original_name=ELU)
|
| 5153 |
+
)
|
| 5154 |
+
)
|
| 5155 |
+
)
|
| 5156 |
+
)
|
| 5157 |
+
(core): ModelCoreRNN(
|
| 5158 |
+
(core): GRU(512, 512)
|
| 5159 |
+
)
|
| 5160 |
+
(decoder): MlpDecoder(
|
| 5161 |
+
(mlp): Identity()
|
| 5162 |
+
)
|
| 5163 |
+
(critic_linear): Linear(in_features=512, out_features=1, bias=True)
|
| 5164 |
+
(action_parameterization): ActionParameterizationDefault(
|
| 5165 |
+
(distribution_linear): Linear(in_features=512, out_features=5, bias=True)
|
| 5166 |
+
)
|
| 5167 |
+
)
|
| 5168 |
+
[2024-11-07 14:35:18,249][04704] Worker 3 uses CPU cores [3]
|
| 5169 |
+
[2024-11-07 14:35:18,791][04688] Using optimizer <class 'torch.optim.adam.Adam'>
|
| 5170 |
+
[2024-11-07 14:35:19,775][04688] Loading state from checkpoint /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000000986_4038656.pth...
|
| 5171 |
+
[2024-11-07 14:35:19,840][04688] Loading model from checkpoint
|
| 5172 |
+
[2024-11-07 14:35:19,842][04688] Loaded experiment state at self.train_step=986, self.env_steps=4038656
|
| 5173 |
+
[2024-11-07 14:35:19,842][04688] Initialized policy 0 weights for model version 986
|
| 5174 |
+
[2024-11-07 14:35:19,848][04688] LearnerWorker_p0 finished initialization!
|
| 5175 |
+
[2024-11-07 14:35:19,849][04688] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
| 5176 |
+
[2024-11-07 14:35:20,005][04701] RunningMeanStd input shape: (3, 72, 128)
|
| 5177 |
+
[2024-11-07 14:35:20,006][04701] RunningMeanStd input shape: (1,)
|
| 5178 |
+
[2024-11-07 14:35:20,019][04701] ConvEncoder: input_channels=3
|
| 5179 |
+
[2024-11-07 14:35:20,124][04701] Conv encoder output size: 512
|
| 5180 |
+
[2024-11-07 14:35:20,124][04701] Policy head output size: 512
|
| 5181 |
+
[2024-11-07 14:35:20,165][04584] Inference worker 0-0 is ready!
|
| 5182 |
+
[2024-11-07 14:35:20,167][04584] All inference workers are ready! Signal rollout workers to start!
|
| 5183 |
+
[2024-11-07 14:35:20,253][04705] Doom resolution: 160x120, resize resolution: (128, 72)
|
| 5184 |
+
[2024-11-07 14:35:20,255][04703] Doom resolution: 160x120, resize resolution: (128, 72)
|
| 5185 |
+
[2024-11-07 14:35:20,257][04706] Doom resolution: 160x120, resize resolution: (128, 72)
|
| 5186 |
+
[2024-11-07 14:35:20,259][04707] Doom resolution: 160x120, resize resolution: (128, 72)
|
| 5187 |
+
[2024-11-07 14:35:20,265][04704] Doom resolution: 160x120, resize resolution: (128, 72)
|
| 5188 |
+
[2024-11-07 14:35:20,283][04708] Doom resolution: 160x120, resize resolution: (128, 72)
|
| 5189 |
+
[2024-11-07 14:35:20,317][04709] Doom resolution: 160x120, resize resolution: (128, 72)
|
| 5190 |
+
[2024-11-07 14:35:20,332][04702] Doom resolution: 160x120, resize resolution: (128, 72)
|
| 5191 |
+
[2024-11-07 14:35:20,647][04705] Decorrelating experience for 0 frames...
|
| 5192 |
+
[2024-11-07 14:35:20,650][04703] Decorrelating experience for 0 frames...
|
| 5193 |
+
[2024-11-07 14:35:20,663][04707] Decorrelating experience for 0 frames...
|
| 5194 |
+
[2024-11-07 14:35:20,681][04708] Decorrelating experience for 0 frames...
|
| 5195 |
+
[2024-11-07 14:35:20,693][04709] Decorrelating experience for 0 frames...
|
| 5196 |
+
[2024-11-07 14:35:20,977][04704] Decorrelating experience for 0 frames...
|
| 5197 |
+
[2024-11-07 14:35:21,024][04708] Decorrelating experience for 32 frames...
|
| 5198 |
+
[2024-11-07 14:35:21,043][04709] Decorrelating experience for 32 frames...
|
| 5199 |
+
[2024-11-07 14:35:21,063][04705] Decorrelating experience for 32 frames...
|
| 5200 |
+
[2024-11-07 14:35:21,064][04707] Decorrelating experience for 32 frames...
|
| 5201 |
+
[2024-11-07 14:35:21,066][04703] Decorrelating experience for 32 frames...
|
| 5202 |
+
[2024-11-07 14:35:21,373][04702] Decorrelating experience for 0 frames...
|
| 5203 |
+
[2024-11-07 14:35:21,407][04704] Decorrelating experience for 32 frames...
|
| 5204 |
+
[2024-11-07 14:35:21,456][04705] Decorrelating experience for 64 frames...
|
| 5205 |
+
[2024-11-07 14:35:21,457][04708] Decorrelating experience for 64 frames...
|
| 5206 |
+
[2024-11-07 14:35:21,525][04703] Decorrelating experience for 64 frames...
|
| 5207 |
+
[2024-11-07 14:35:21,694][04702] Decorrelating experience for 32 frames...
|
| 5208 |
+
[2024-11-07 14:35:21,817][04704] Decorrelating experience for 64 frames...
|
| 5209 |
+
[2024-11-07 14:35:21,843][04705] Decorrelating experience for 96 frames...
|
| 5210 |
+
[2024-11-07 14:35:21,844][04708] Decorrelating experience for 96 frames...
|
| 5211 |
+
[2024-11-07 14:35:22,066][04703] Decorrelating experience for 96 frames...
|
| 5212 |
+
[2024-11-07 14:35:22,068][04706] Decorrelating experience for 0 frames...
|
| 5213 |
+
[2024-11-07 14:35:22,108][04584] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 4038656. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
| 5214 |
+
[2024-11-07 14:35:22,184][04704] Decorrelating experience for 96 frames...
|
| 5215 |
+
[2024-11-07 14:35:22,274][04702] Decorrelating experience for 64 frames...
|
| 5216 |
+
[2024-11-07 14:35:22,403][04706] Decorrelating experience for 32 frames...
|
| 5217 |
+
[2024-11-07 14:35:22,687][04702] Decorrelating experience for 96 frames...
|
| 5218 |
+
[2024-11-07 14:35:22,831][04707] Decorrelating experience for 64 frames...
|
| 5219 |
+
[2024-11-07 14:35:23,152][04706] Decorrelating experience for 64 frames...
|
| 5220 |
+
[2024-11-07 14:35:23,300][04707] Decorrelating experience for 96 frames...
|
| 5221 |
+
[2024-11-07 14:35:23,313][04709] Decorrelating experience for 64 frames...
|
| 5222 |
+
[2024-11-07 14:35:23,775][04706] Decorrelating experience for 96 frames...
|
| 5223 |
+
[2024-11-07 14:35:24,023][04709] Decorrelating experience for 96 frames...
|
| 5224 |
+
[2024-11-07 14:35:24,784][04688] Signal inference workers to stop experience collection...
|
| 5225 |
+
[2024-11-07 14:35:24,831][04701] InferenceWorker_p0-w0: stopping experience collection
|
| 5226 |
+
[2024-11-07 14:35:27,108][04584] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 4038656. Throughput: 0: 546.0. Samples: 2730. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
| 5227 |
+
[2024-11-07 14:35:27,109][04584] Avg episode reward: [(0, '2.632')]
|
| 5228 |
+
[2024-11-07 14:35:29,009][04688] Signal inference workers to resume experience collection...
|
| 5229 |
+
[2024-11-07 14:35:29,010][04701] InferenceWorker_p0-w0: resuming experience collection
|
| 5230 |
+
[2024-11-07 14:35:31,197][04584] Heartbeat connected on Batcher_0
|
| 5231 |
+
[2024-11-07 14:35:31,200][04584] Heartbeat connected on LearnerWorker_p0
|
| 5232 |
+
[2024-11-07 14:35:31,212][04584] Heartbeat connected on RolloutWorker_w0
|
| 5233 |
+
[2024-11-07 14:35:31,216][04584] Heartbeat connected on RolloutWorker_w1
|
| 5234 |
+
[2024-11-07 14:35:31,223][04584] Heartbeat connected on InferenceWorker_p0-w0
|
| 5235 |
+
[2024-11-07 14:35:31,227][04584] Heartbeat connected on RolloutWorker_w2
|
| 5236 |
+
[2024-11-07 14:35:31,235][04584] Heartbeat connected on RolloutWorker_w3
|
| 5237 |
+
[2024-11-07 14:35:31,238][04584] Heartbeat connected on RolloutWorker_w6
|
| 5238 |
+
[2024-11-07 14:35:31,244][04584] Heartbeat connected on RolloutWorker_w4
|
| 5239 |
+
[2024-11-07 14:35:31,250][04584] Heartbeat connected on RolloutWorker_w5
|
| 5240 |
+
[2024-11-07 14:35:31,255][04584] Heartbeat connected on RolloutWorker_w7
|
| 5241 |
+
[2024-11-07 14:35:32,108][04584] Fps is (10 sec: 2867.1, 60 sec: 2867.1, 300 sec: 2867.1). Total num frames: 4067328. Throughput: 0: 299.6. Samples: 2996. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
|
| 5242 |
+
[2024-11-07 14:35:32,111][04584] Avg episode reward: [(0, '3.943')]
|
| 5243 |
+
[2024-11-07 14:35:35,402][04701] Updated weights for policy 0, policy_version 996 (0.0025)
|
| 5244 |
+
[2024-11-07 14:35:37,108][04584] Fps is (10 sec: 5324.7, 60 sec: 3549.8, 300 sec: 3549.8). Total num frames: 4091904. Throughput: 0: 700.9. Samples: 10514. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
|
| 5245 |
+
[2024-11-07 14:35:37,112][04584] Avg episode reward: [(0, '4.473')]
|
| 5246 |
+
[2024-11-07 14:35:40,471][04701] Updated weights for policy 0, policy_version 1006 (0.0023)
|
| 5247 |
+
[2024-11-07 14:35:42,108][04584] Fps is (10 sec: 6553.5, 60 sec: 4710.3, 300 sec: 4710.3). Total num frames: 4132864. Throughput: 0: 1134.7. Samples: 22694. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
| 5248 |
+
[2024-11-07 14:35:42,112][04584] Avg episode reward: [(0, '4.427')]
|
| 5249 |
+
[2024-11-07 14:35:45,559][04701] Updated weights for policy 0, policy_version 1016 (0.0033)
|
| 5250 |
+
[2024-11-07 14:35:47,108][04584] Fps is (10 sec: 8192.2, 60 sec: 5406.7, 300 sec: 5406.7). Total num frames: 4173824. Throughput: 0: 1148.1. Samples: 28702. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
| 5251 |
+
[2024-11-07 14:35:47,110][04584] Avg episode reward: [(0, '4.231')]
|
| 5252 |
+
[2024-11-07 14:35:50,462][04701] Updated weights for policy 0, policy_version 1026 (0.0025)
|
| 5253 |
+
[2024-11-07 14:35:52,108][04584] Fps is (10 sec: 8192.2, 60 sec: 5870.9, 300 sec: 5870.9). Total num frames: 4214784. Throughput: 0: 1364.1. Samples: 40924. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0)
|
| 5254 |
+
[2024-11-07 14:35:52,111][04584] Avg episode reward: [(0, '4.551')]
|
| 5255 |
+
[2024-11-07 14:35:55,421][04701] Updated weights for policy 0, policy_version 1036 (0.0029)
|
| 5256 |
+
[2024-11-07 14:35:57,108][04584] Fps is (10 sec: 8192.0, 60 sec: 6202.5, 300 sec: 6202.5). Total num frames: 4255744. Throughput: 0: 1530.4. Samples: 53564. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
| 5257 |
+
[2024-11-07 14:35:57,109][04584] Avg episode reward: [(0, '4.498')]
|
| 5258 |
+
[2024-11-07 14:36:00,664][04701] Updated weights for policy 0, policy_version 1046 (0.0037)
|
| 5259 |
+
[2024-11-07 14:36:02,109][04584] Fps is (10 sec: 7781.9, 60 sec: 6348.7, 300 sec: 6348.7). Total num frames: 4292608. Throughput: 0: 1490.7. Samples: 59628. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
| 5260 |
+
[2024-11-07 14:36:02,118][04584] Avg episode reward: [(0, '4.486')]
|
| 5261 |
+
[2024-11-07 14:36:07,108][04584] Fps is (10 sec: 6553.5, 60 sec: 6280.5, 300 sec: 6280.5). Total num frames: 4321280. Throughput: 0: 1538.8. Samples: 69244. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
|
| 5262 |
+
[2024-11-07 14:36:07,110][04584] Avg episode reward: [(0, '4.349')]
|
| 5263 |
+
[2024-11-07 14:36:07,113][04701] Updated weights for policy 0, policy_version 1056 (0.0030)
|
| 5264 |
+
[2024-11-07 14:36:12,109][04584] Fps is (10 sec: 4915.4, 60 sec: 6062.0, 300 sec: 6062.0). Total num frames: 4341760. Throughput: 0: 1618.7. Samples: 75570. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0)
|
| 5265 |
+
[2024-11-07 14:36:12,111][04584] Avg episode reward: [(0, '4.336')]
|
| 5266 |
+
[2024-11-07 14:36:15,164][04701] Updated weights for policy 0, policy_version 1066 (0.0029)
|
| 5267 |
+
[2024-11-07 14:36:17,109][04584] Fps is (10 sec: 5734.2, 60 sec: 6181.2, 300 sec: 6181.2). Total num frames: 4378624. Throughput: 0: 1729.5. Samples: 80826. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
| 5268 |
+
[2024-11-07 14:36:17,114][04584] Avg episode reward: [(0, '4.405')]
|
| 5269 |
+
[2024-11-07 14:36:20,267][04701] Updated weights for policy 0, policy_version 1076 (0.0026)
|
| 5270 |
+
[2024-11-07 14:36:22,108][04584] Fps is (10 sec: 7782.7, 60 sec: 6348.8, 300 sec: 6348.8). Total num frames: 4419584. Throughput: 0: 1830.0. Samples: 92864. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
| 5271 |
+
[2024-11-07 14:36:22,111][04584] Avg episode reward: [(0, '4.417')]
|
| 5272 |
+
[2024-11-07 14:36:25,223][04701] Updated weights for policy 0, policy_version 1086 (0.0024)
|
| 5273 |
+
[2024-11-07 14:36:27,108][04584] Fps is (10 sec: 8192.4, 60 sec: 7031.5, 300 sec: 6490.6). Total num frames: 4460544. Throughput: 0: 1831.8. Samples: 105126. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
| 5274 |
+
[2024-11-07 14:36:27,110][04584] Avg episode reward: [(0, '4.564')]
|
| 5275 |
+
[2024-11-07 14:36:30,159][04701] Updated weights for policy 0, policy_version 1096 (0.0032)
|
| 5276 |
+
[2024-11-07 14:36:32,108][04584] Fps is (10 sec: 8192.0, 60 sec: 7236.3, 300 sec: 6612.1). Total num frames: 4501504. Throughput: 0: 1834.5. Samples: 111254. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
| 5277 |
+
[2024-11-07 14:36:32,110][04584] Avg episode reward: [(0, '4.676')]
|
| 5278 |
+
[2024-11-07 14:36:35,225][04701] Updated weights for policy 0, policy_version 1106 (0.0022)
|
| 5279 |
+
[2024-11-07 14:36:37,108][04584] Fps is (10 sec: 8191.8, 60 sec: 7509.3, 300 sec: 6717.4). Total num frames: 4542464. Throughput: 0: 1839.1. Samples: 123682. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
| 5280 |
+
[2024-11-07 14:36:37,110][04584] Avg episode reward: [(0, '4.525')]
|
| 5281 |
+
[2024-11-07 14:36:40,134][04701] Updated weights for policy 0, policy_version 1116 (0.0024)
|
| 5282 |
+
[2024-11-07 14:36:43,724][04584] Fps is (10 sec: 6699.8, 60 sec: 7246.0, 300 sec: 6624.6). Total num frames: 4579328. Throughput: 0: 1768.0. Samples: 135982. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
| 5283 |
+
[2024-11-07 14:36:43,725][04584] Avg episode reward: [(0, '4.556')]
|
| 5284 |
+
[2024-11-07 14:36:47,108][04584] Fps is (10 sec: 6553.6, 60 sec: 7236.2, 300 sec: 6698.1). Total num frames: 4608000. Throughput: 0: 1733.3. Samples: 137624. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
| 5285 |
+
[2024-11-07 14:36:47,113][04584] Avg episode reward: [(0, '4.348')]
|
| 5286 |
+
[2024-11-07 14:36:47,253][04701] Updated weights for policy 0, policy_version 1126 (0.0024)
|
| 5287 |
+
[2024-11-07 14:36:52,108][04584] Fps is (10 sec: 8305.3, 60 sec: 7236.3, 300 sec: 6781.2). Total num frames: 4648960. Throughput: 0: 1783.3. Samples: 149492. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0)
|
| 5288 |
+
[2024-11-07 14:36:52,109][04584] Avg episode reward: [(0, '4.319')]
|
| 5289 |
+
[2024-11-07 14:36:52,633][04701] Updated weights for policy 0, policy_version 1136 (0.0028)
|
| 5290 |
+
[2024-11-07 14:36:57,108][04584] Fps is (10 sec: 7782.6, 60 sec: 7168.0, 300 sec: 6812.3). Total num frames: 4685824. Throughput: 0: 1909.7. Samples: 161508. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
| 5291 |
+
[2024-11-07 14:36:57,110][04584] Avg episode reward: [(0, '4.504')]
|
| 5292 |
+
[2024-11-07 14:36:57,814][04701] Updated weights for policy 0, policy_version 1146 (0.0030)
|
| 5293 |
+
[2024-11-07 14:37:02,108][04584] Fps is (10 sec: 7372.7, 60 sec: 7168.1, 300 sec: 6840.3). Total num frames: 4722688. Throughput: 0: 1923.7. Samples: 167394. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
| 5294 |
+
[2024-11-07 14:37:02,110][04584] Avg episode reward: [(0, '4.470')]
|
| 5295 |
+
[2024-11-07 14:37:03,385][04701] Updated weights for policy 0, policy_version 1156 (0.0031)
|
| 5296 |
+
[2024-11-07 14:37:07,108][04584] Fps is (10 sec: 7782.3, 60 sec: 7372.8, 300 sec: 6904.7). Total num frames: 4763648. Throughput: 0: 1904.9. Samples: 178584. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
| 5297 |
+
[2024-11-07 14:37:07,111][04584] Avg episode reward: [(0, '4.431')]
|
| 5298 |
+
[2024-11-07 14:37:07,126][04688] Saving /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000001163_4763648.pth...
|
| 5299 |
+
[2024-11-07 14:37:07,332][04688] Removing /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000000984_4030464.pth
|
| 5300 |
+
[2024-11-07 14:37:08,663][04701] Updated weights for policy 0, policy_version 1166 (0.0024)
|
| 5301 |
+
[2024-11-07 14:37:12,108][04584] Fps is (10 sec: 7372.8, 60 sec: 7577.6, 300 sec: 6888.7). Total num frames: 4796416. Throughput: 0: 1873.3. Samples: 189424. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
| 5302 |
+
[2024-11-07 14:37:12,110][04584] Avg episode reward: [(0, '4.392')]
|
| 5303 |
+
[2024-11-07 14:37:14,273][04701] Updated weights for policy 0, policy_version 1176 (0.0024)
|
| 5304 |
+
[2024-11-07 14:37:18,138][04584] Fps is (10 sec: 5941.6, 60 sec: 7382.6, 300 sec: 6813.1). Total num frames: 4829184. Throughput: 0: 1821.9. Samples: 195116. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
| 5305 |
+
[2024-11-07 14:37:18,142][04584] Avg episode reward: [(0, '4.604')]
|
| 5306 |
+
[2024-11-07 14:37:21,528][04701] Updated weights for policy 0, policy_version 1186 (0.0027)
|
| 5307 |
+
[2024-11-07 14:37:22,108][04584] Fps is (10 sec: 6553.5, 60 sec: 7372.8, 300 sec: 6860.8). Total num frames: 4861952. Throughput: 0: 1756.1. Samples: 202708. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
| 5308 |
+
[2024-11-07 14:37:22,110][04584] Avg episode reward: [(0, '4.486')]
|
| 5309 |
+
[2024-11-07 14:37:26,690][04701] Updated weights for policy 0, policy_version 1196 (0.0022)
|
| 5310 |
+
[2024-11-07 14:37:27,108][04584] Fps is (10 sec: 7763.0, 60 sec: 7304.5, 300 sec: 6881.3). Total num frames: 4898816. Throughput: 0: 1816.0. Samples: 214768. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
| 5311 |
+
[2024-11-07 14:37:27,110][04584] Avg episode reward: [(0, '4.418')]
|
| 5312 |
+
[2024-11-07 14:37:31,942][04701] Updated weights for policy 0, policy_version 1206 (0.0030)
|
| 5313 |
+
[2024-11-07 14:37:32,108][04584] Fps is (10 sec: 7782.5, 60 sec: 7304.5, 300 sec: 6931.7). Total num frames: 4939776. Throughput: 0: 1847.4. Samples: 220758. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0)
|
| 5314 |
+
[2024-11-07 14:37:32,110][04584] Avg episode reward: [(0, '4.316')]
|
| 5315 |
+
[2024-11-07 14:37:36,986][04701] Updated weights for policy 0, policy_version 1216 (0.0022)
|
| 5316 |
+
[2024-11-07 14:37:37,108][04584] Fps is (10 sec: 8192.1, 60 sec: 7304.6, 300 sec: 6978.4). Total num frames: 4980736. Throughput: 0: 1848.0. Samples: 232650. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
| 5317 |
+
[2024-11-07 14:37:37,109][04584] Avg episode reward: [(0, '4.397')]
|
| 5318 |
+
[2024-11-07 14:37:41,959][04701] Updated weights for policy 0, policy_version 1226 (0.0024)
|
| 5319 |
+
[2024-11-07 14:37:42,109][04584] Fps is (10 sec: 8191.4, 60 sec: 7576.7, 300 sec: 7021.7). Total num frames: 5021696. Throughput: 0: 1853.3. Samples: 244906. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
| 5320 |
+
[2024-11-07 14:37:42,111][04584] Avg episode reward: [(0, '4.489')]
|
| 5321 |
+
[2024-11-07 14:37:46,940][04701] Updated weights for policy 0, policy_version 1236 (0.0028)
|
| 5322 |
+
[2024-11-07 14:37:47,108][04584] Fps is (10 sec: 8191.8, 60 sec: 7577.6, 300 sec: 7062.1). Total num frames: 5062656. Throughput: 0: 1859.2. Samples: 251056. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
| 5323 |
+
[2024-11-07 14:37:47,110][04584] Avg episode reward: [(0, '4.449')]
|
| 5324 |
+
[2024-11-07 14:37:52,554][04584] Fps is (10 sec: 6274.2, 60 sec: 7250.6, 300 sec: 6969.8). Total num frames: 5087232. Throughput: 0: 1865.9. Samples: 263382. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
| 5325 |
+
[2024-11-07 14:37:52,559][04584] Avg episode reward: [(0, '4.337')]
|
| 5326 |
+
[2024-11-07 14:37:54,172][04701] Updated weights for policy 0, policy_version 1246 (0.0027)
|
| 5327 |
+
[2024-11-07 14:37:57,108][04584] Fps is (10 sec: 6144.0, 60 sec: 7304.5, 300 sec: 7002.8). Total num frames: 5124096. Throughput: 0: 1812.0. Samples: 270964. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
| 5328 |
+
[2024-11-07 14:37:57,110][04584] Avg episode reward: [(0, '4.357')]
|
| 5329 |
+
[2024-11-07 14:37:59,230][04701] Updated weights for policy 0, policy_version 1256 (0.0026)
|
| 5330 |
+
[2024-11-07 14:38:02,108][04584] Fps is (10 sec: 8145.9, 60 sec: 7372.8, 300 sec: 7040.0). Total num frames: 5165056. Throughput: 0: 1866.2. Samples: 277174. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
| 5331 |
+
[2024-11-07 14:38:02,109][04584] Avg episode reward: [(0, '4.437')]
|
| 5332 |
+
[2024-11-07 14:38:04,744][04701] Updated weights for policy 0, policy_version 1266 (0.0023)
|
| 5333 |
+
[2024-11-07 14:38:07,108][04584] Fps is (10 sec: 7782.5, 60 sec: 7304.5, 300 sec: 7050.1). Total num frames: 5201920. Throughput: 0: 1901.1. Samples: 288256. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
| 5334 |
+
[2024-11-07 14:38:07,110][04584] Avg episode reward: [(0, '4.331')]
|
| 5335 |
+
[2024-11-07 14:38:09,768][04701] Updated weights for policy 0, policy_version 1276 (0.0026)
|
| 5336 |
+
[2024-11-07 14:38:12,108][04584] Fps is (10 sec: 7782.4, 60 sec: 7441.1, 300 sec: 7083.7). Total num frames: 5242880. Throughput: 0: 1908.4. Samples: 300644. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
| 5337 |
+
[2024-11-07 14:38:12,110][04584] Avg episode reward: [(0, '4.525')]
|
| 5338 |
+
[2024-11-07 14:38:14,720][04701] Updated weights for policy 0, policy_version 1286 (0.0019)
|
| 5339 |
+
[2024-11-07 14:38:17,108][04584] Fps is (10 sec: 8191.9, 60 sec: 7710.0, 300 sec: 7115.3). Total num frames: 5283840. Throughput: 0: 1914.2. Samples: 306896. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
| 5340 |
+
[2024-11-07 14:38:17,110][04584] Avg episode reward: [(0, '4.575')]
|
| 5341 |
+
[2024-11-07 14:38:19,757][04701] Updated weights for policy 0, policy_version 1296 (0.0029)
|
| 5342 |
+
[2024-11-07 14:38:22,108][04584] Fps is (10 sec: 8192.0, 60 sec: 7714.2, 300 sec: 7145.2). Total num frames: 5324800. Throughput: 0: 1920.4. Samples: 319070. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
| 5343 |
+
[2024-11-07 14:38:22,112][04584] Avg episode reward: [(0, '4.339')]
|
| 5344 |
+
[2024-11-07 14:38:24,651][04701] Updated weights for policy 0, policy_version 1306 (0.0030)
|
| 5345 |
+
[2024-11-07 14:38:27,108][04584] Fps is (10 sec: 6553.5, 60 sec: 7509.3, 300 sec: 7085.0). Total num frames: 5349376. Throughput: 0: 1862.0. Samples: 328694. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
| 5346 |
+
[2024-11-07 14:38:27,110][04584] Avg episode reward: [(0, '4.349')]
|
| 5347 |
+
[2024-11-07 14:38:31,931][04701] Updated weights for policy 0, policy_version 1316 (0.0027)
|
| 5348 |
+
[2024-11-07 14:38:32,108][04584] Fps is (10 sec: 6553.6, 60 sec: 7509.4, 300 sec: 7114.1). Total num frames: 5390336. Throughput: 0: 1825.5. Samples: 333202. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
| 5349 |
+
[2024-11-07 14:38:32,110][04584] Avg episode reward: [(0, '4.554')]
|
| 5350 |
+
[2024-11-07 14:38:36,771][04701] Updated weights for policy 0, policy_version 1326 (0.0026)
|
| 5351 |
+
[2024-11-07 14:38:37,108][04584] Fps is (10 sec: 8192.1, 60 sec: 7509.3, 300 sec: 7141.7). Total num frames: 5431296. Throughput: 0: 1844.8. Samples: 345574. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
| 5352 |
+
[2024-11-07 14:38:37,111][04584] Avg episode reward: [(0, '4.685')]
|
| 5353 |
+
[2024-11-07 14:38:41,807][04701] Updated weights for policy 0, policy_version 1336 (0.0021)
|
| 5354 |
+
[2024-11-07 14:38:42,109][04584] Fps is (10 sec: 8191.4, 60 sec: 7509.4, 300 sec: 7168.0). Total num frames: 5472256. Throughput: 0: 1933.9. Samples: 357990. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
| 5355 |
+
[2024-11-07 14:38:42,111][04584] Avg episode reward: [(0, '4.322')]
|
| 5356 |
+
[2024-11-07 14:38:46,713][04701] Updated weights for policy 0, policy_version 1346 (0.0026)
|
| 5357 |
+
[2024-11-07 14:38:47,109][04584] Fps is (10 sec: 8191.8, 60 sec: 7509.3, 300 sec: 7193.0). Total num frames: 5513216. Throughput: 0: 1934.4. Samples: 364222. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
| 5358 |
+
[2024-11-07 14:38:47,112][04584] Avg episode reward: [(0, '4.330')]
|
| 5359 |
+
[2024-11-07 14:38:51,629][04701] Updated weights for policy 0, policy_version 1356 (0.0022)
|
| 5360 |
+
[2024-11-07 14:38:52,108][04584] Fps is (10 sec: 8192.4, 60 sec: 7840.7, 300 sec: 7216.8). Total num frames: 5554176. Throughput: 0: 1963.5. Samples: 376612. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
| 5361 |
+
[2024-11-07 14:38:52,110][04584] Avg episode reward: [(0, '4.316')]
|
| 5362 |
+
[2024-11-07 14:38:56,630][04701] Updated weights for policy 0, policy_version 1366 (0.0025)
|
| 5363 |
+
[2024-11-07 14:38:57,109][04584] Fps is (10 sec: 8192.0, 60 sec: 7850.7, 300 sec: 7239.4). Total num frames: 5595136. Throughput: 0: 1961.9. Samples: 388932. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
| 5364 |
+
[2024-11-07 14:38:57,111][04584] Avg episode reward: [(0, '4.435')]
|
| 5365 |
+
[2024-11-07 14:39:02,108][04584] Fps is (10 sec: 6553.8, 60 sec: 7577.6, 300 sec: 7186.6). Total num frames: 5619712. Throughput: 0: 1958.1. Samples: 395012. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
| 5366 |
+
[2024-11-07 14:39:02,110][04584] Avg episode reward: [(0, '4.515')]
|
| 5367 |
+
[2024-11-07 14:39:04,078][04701] Updated weights for policy 0, policy_version 1376 (0.0027)
|
| 5368 |
+
[2024-11-07 14:39:07,108][04584] Fps is (10 sec: 6144.1, 60 sec: 7577.6, 300 sec: 7190.7). Total num frames: 5656576. Throughput: 0: 1847.9. Samples: 402224. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
| 5369 |
+
[2024-11-07 14:39:07,110][04584] Avg episode reward: [(0, '4.473')]
|
| 5370 |
+
[2024-11-07 14:39:07,120][04688] Saving /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000001382_5660672.pth...
|
| 5371 |
+
[2024-11-07 14:39:07,214][04688] Removing /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000000986_4038656.pth
|
| 5372 |
+
[2024-11-07 14:39:09,137][04701] Updated weights for policy 0, policy_version 1386 (0.0028)
|
| 5373 |
+
[2024-11-07 14:39:12,108][04584] Fps is (10 sec: 7782.3, 60 sec: 7577.6, 300 sec: 7212.5). Total num frames: 5697536. Throughput: 0: 1907.0. Samples: 414508. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
| 5374 |
+
[2024-11-07 14:39:12,111][04584] Avg episode reward: [(0, '4.401')]
|
| 5375 |
+
[2024-11-07 14:39:14,203][04701] Updated weights for policy 0, policy_version 1396 (0.0024)
|
| 5376 |
+
[2024-11-07 14:39:17,110][04584] Fps is (10 sec: 8600.6, 60 sec: 7645.7, 300 sec: 7250.7). Total num frames: 5742592. Throughput: 0: 1942.3. Samples: 420606. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
| 5377 |
+
[2024-11-07 14:39:17,112][04584] Avg episode reward: [(0, '4.251')]
|
| 5378 |
+
[2024-11-07 14:39:19,131][04701] Updated weights for policy 0, policy_version 1406 (0.0027)
|
| 5379 |
+
[2024-11-07 14:39:22,108][04584] Fps is (10 sec: 8601.8, 60 sec: 7645.9, 300 sec: 7270.4). Total num frames: 5783552. Throughput: 0: 1941.7. Samples: 432950. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
| 5380 |
+
[2024-11-07 14:39:22,110][04584] Avg episode reward: [(0, '4.408')]
|
| 5381 |
+
[2024-11-07 14:39:24,033][04701] Updated weights for policy 0, policy_version 1416 (0.0026)
|
| 5382 |
+
[2024-11-07 14:39:27,108][04584] Fps is (10 sec: 8193.0, 60 sec: 7918.9, 300 sec: 7289.2). Total num frames: 5824512. Throughput: 0: 1942.5. Samples: 445400. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
| 5383 |
+
[2024-11-07 14:39:27,112][04584] Avg episode reward: [(0, '4.368')]
|
| 5384 |
+
[2024-11-07 14:39:29,061][04701] Updated weights for policy 0, policy_version 1426 (0.0028)
|
| 5385 |
+
[2024-11-07 14:39:32,109][04584] Fps is (10 sec: 8191.0, 60 sec: 7918.8, 300 sec: 7307.2). Total num frames: 5865472. Throughput: 0: 1942.4. Samples: 451632. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
| 5386 |
+
[2024-11-07 14:39:32,112][04584] Avg episode reward: [(0, '4.311')]
|
| 5387 |
+
[2024-11-07 14:39:36,164][04701] Updated weights for policy 0, policy_version 1436 (0.0029)
|
| 5388 |
+
[2024-11-07 14:39:37,108][04584] Fps is (10 sec: 6144.0, 60 sec: 7577.6, 300 sec: 7244.3). Total num frames: 5885952. Throughput: 0: 1871.7. Samples: 460840. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
| 5389 |
+
[2024-11-07 14:39:37,110][04584] Avg episode reward: [(0, '4.291')]
|
| 5390 |
+
[2024-11-07 14:39:41,153][04701] Updated weights for policy 0, policy_version 1446 (0.0026)
|
| 5391 |
+
[2024-11-07 14:39:42,108][04584] Fps is (10 sec: 6144.7, 60 sec: 7577.7, 300 sec: 7262.5). Total num frames: 5926912. Throughput: 0: 1838.3. Samples: 471654. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
| 5392 |
+
[2024-11-07 14:39:42,110][04584] Avg episode reward: [(0, '4.399')]
|
| 5393 |
+
[2024-11-07 14:39:46,287][04701] Updated weights for policy 0, policy_version 1456 (0.0024)
|
| 5394 |
+
[2024-11-07 14:39:47,109][04584] Fps is (10 sec: 8191.7, 60 sec: 7577.6, 300 sec: 7280.0). Total num frames: 5967872. Throughput: 0: 1836.7. Samples: 477666. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
| 5395 |
+
[2024-11-07 14:39:47,111][04584] Avg episode reward: [(0, '4.391')]
|
| 5396 |
+
[2024-11-07 14:39:51,647][04701] Updated weights for policy 0, policy_version 1466 (0.0024)
|
| 5397 |
+
[2024-11-07 14:39:52,108][04584] Fps is (10 sec: 8192.0, 60 sec: 7577.6, 300 sec: 7296.9). Total num frames: 6008832. Throughput: 0: 1936.8. Samples: 489382. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
| 5398 |
+
[2024-11-07 14:39:52,110][04584] Avg episode reward: [(0, '4.306')]
|
| 5399 |
+
[2024-11-07 14:39:56,638][04701] Updated weights for policy 0, policy_version 1476 (0.0027)
|
| 5400 |
+
[2024-11-07 14:39:57,109][04584] Fps is (10 sec: 7782.4, 60 sec: 7509.3, 300 sec: 7298.3). Total num frames: 6045696. Throughput: 0: 1930.7. Samples: 501392. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
| 5401 |
+
[2024-11-07 14:39:57,111][04584] Avg episode reward: [(0, '4.586')]
|
| 5402 |
+
[2024-11-07 14:40:01,928][04701] Updated weights for policy 0, policy_version 1486 (0.0022)
|
| 5403 |
+
[2024-11-07 14:40:02,108][04584] Fps is (10 sec: 7782.5, 60 sec: 7782.4, 300 sec: 7314.3). Total num frames: 6086656. Throughput: 0: 1933.2. Samples: 507596. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
| 5404 |
+
[2024-11-07 14:40:02,111][04584] Avg episode reward: [(0, '4.157')]
|
| 5405 |
+
[2024-11-07 14:40:07,045][04701] Updated weights for policy 0, policy_version 1496 (0.0028)
|
| 5406 |
+
[2024-11-07 14:40:07,108][04584] Fps is (10 sec: 8192.4, 60 sec: 7850.7, 300 sec: 7329.7). Total num frames: 6127616. Throughput: 0: 1921.9. Samples: 519434. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
| 5407 |
+
[2024-11-07 14:40:07,110][04584] Avg episode reward: [(0, '4.650')]
|
| 5408 |
+
[2024-11-07 14:40:12,108][04584] Fps is (10 sec: 5734.3, 60 sec: 7441.1, 300 sec: 7259.8). Total num frames: 6144000. Throughput: 0: 1798.1. Samples: 526316. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
| 5409 |
+
[2024-11-07 14:40:12,115][04584] Avg episode reward: [(0, '4.644')]
|
| 5410 |
+
[2024-11-07 14:40:15,781][04701] Updated weights for policy 0, policy_version 1506 (0.0037)
|
| 5411 |
+
[2024-11-07 14:40:17,108][04584] Fps is (10 sec: 4915.2, 60 sec: 7236.4, 300 sec: 7247.8). Total num frames: 6176768. Throughput: 0: 1762.2. Samples: 530928. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
| 5412 |
+
[2024-11-07 14:40:17,111][04584] Avg episode reward: [(0, '4.390')]
|
| 5413 |
+
[2024-11-07 14:40:21,601][04701] Updated weights for policy 0, policy_version 1516 (0.0034)
|
| 5414 |
+
[2024-11-07 14:40:22,108][04584] Fps is (10 sec: 6553.7, 60 sec: 7099.7, 300 sec: 7358.9). Total num frames: 6209536. Throughput: 0: 1769.1. Samples: 540450. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
| 5415 |
+
[2024-11-07 14:40:22,110][04584] Avg episode reward: [(0, '4.347')]
|
| 5416 |
+
[2024-11-07 14:40:26,748][04701] Updated weights for policy 0, policy_version 1526 (0.0027)
|
| 5417 |
+
[2024-11-07 14:40:27,109][04584] Fps is (10 sec: 7372.5, 60 sec: 7099.7, 300 sec: 7400.6). Total num frames: 6250496. Throughput: 0: 1796.7. Samples: 552504. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
| 5418 |
+
[2024-11-07 14:40:27,110][04584] Avg episode reward: [(0, '4.378')]
|
| 5419 |
+
[2024-11-07 14:40:31,804][04701] Updated weights for policy 0, policy_version 1536 (0.0030)
|
| 5420 |
+
[2024-11-07 14:40:32,108][04584] Fps is (10 sec: 8191.9, 60 sec: 7099.9, 300 sec: 7456.1). Total num frames: 6291456. Throughput: 0: 1797.6. Samples: 558558. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
| 5421 |
+
[2024-11-07 14:40:32,111][04584] Avg episode reward: [(0, '4.402')]
|
| 5422 |
+
[2024-11-07 14:40:36,996][04701] Updated weights for policy 0, policy_version 1546 (0.0025)
|
| 5423 |
+
[2024-11-07 14:40:37,108][04584] Fps is (10 sec: 8192.1, 60 sec: 7441.1, 300 sec: 7456.1). Total num frames: 6332416. Throughput: 0: 1804.4. Samples: 570582. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
| 5424 |
+
[2024-11-07 14:40:37,110][04584] Avg episode reward: [(0, '4.223')]
|
| 5425 |
+
[2024-11-07 14:40:42,112][04584] Fps is (10 sec: 7779.5, 60 sec: 7372.4, 300 sec: 7442.1). Total num frames: 6369280. Throughput: 0: 1800.2. Samples: 582408. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
| 5426 |
+
[2024-11-07 14:40:42,115][04584] Avg episode reward: [(0, '4.626')]
|
| 5427 |
+
[2024-11-07 14:40:42,246][04701] Updated weights for policy 0, policy_version 1556 (0.0021)
|
| 5428 |
+
[2024-11-07 14:40:47,109][04584] Fps is (10 sec: 5734.3, 60 sec: 7031.5, 300 sec: 7372.8). Total num frames: 6389760. Throughput: 0: 1715.1. Samples: 584778. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
| 5429 |
+
[2024-11-07 14:40:47,111][04584] Avg episode reward: [(0, '4.667')]
|
| 5430 |
+
[2024-11-07 14:40:50,289][04701] Updated weights for policy 0, policy_version 1566 (0.0036)
|
| 5431 |
+
[2024-11-07 14:40:52,108][04584] Fps is (10 sec: 5736.4, 60 sec: 6963.2, 300 sec: 7358.9). Total num frames: 6426624. Throughput: 0: 1668.8. Samples: 594530. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
| 5432 |
+
[2024-11-07 14:40:52,111][04584] Avg episode reward: [(0, '4.452')]
|
| 5433 |
+
[2024-11-07 14:40:55,426][04701] Updated weights for policy 0, policy_version 1576 (0.0025)
|
| 5434 |
+
[2024-11-07 14:40:57,108][04584] Fps is (10 sec: 7782.4, 60 sec: 7031.5, 300 sec: 7372.8). Total num frames: 6467584. Throughput: 0: 1779.1. Samples: 606376. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
| 5435 |
+
[2024-11-07 14:40:57,110][04584] Avg episode reward: [(0, '4.567')]
|
| 5436 |
+
[2024-11-07 14:41:00,789][04701] Updated weights for policy 0, policy_version 1586 (0.0028)
|
| 5437 |
+
[2024-11-07 14:41:02,109][04584] Fps is (10 sec: 7781.9, 60 sec: 6963.1, 300 sec: 7400.5). Total num frames: 6504448. Throughput: 0: 1799.3. Samples: 611900. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
| 5438 |
+
[2024-11-07 14:41:02,112][04584] Avg episode reward: [(0, '4.513')]
|
| 5439 |
+
[2024-11-07 14:41:06,194][04701] Updated weights for policy 0, policy_version 1596 (0.0034)
|
| 5440 |
+
[2024-11-07 14:41:07,108][04584] Fps is (10 sec: 7373.0, 60 sec: 6894.9, 300 sec: 7456.1). Total num frames: 6541312. Throughput: 0: 1839.9. Samples: 623244. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
| 5441 |
+
[2024-11-07 14:41:07,110][04584] Avg episode reward: [(0, '4.449')]
|
| 5442 |
+
[2024-11-07 14:41:07,219][04688] Saving /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000001598_6545408.pth...
|
| 5443 |
+
[2024-11-07 14:41:07,348][04688] Removing /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000001163_4763648.pth
|
| 5444 |
+
[2024-11-07 14:41:11,911][04701] Updated weights for policy 0, policy_version 1606 (0.0033)
|
| 5445 |
+
[2024-11-07 14:41:12,108][04584] Fps is (10 sec: 7373.5, 60 sec: 7236.3, 300 sec: 7456.1). Total num frames: 6578176. Throughput: 0: 1821.6. Samples: 634474. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
| 5446 |
+
[2024-11-07 14:41:12,110][04584] Avg episode reward: [(0, '4.328')]
|
| 5447 |
+
[2024-11-07 14:41:18,681][04584] Fps is (10 sec: 6370.7, 60 sec: 7117.9, 300 sec: 7402.7). Total num frames: 6615040. Throughput: 0: 1743.5. Samples: 639758. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
| 5448 |
+
[2024-11-07 14:41:18,683][04584] Avg episode reward: [(0, '4.432')]
|
| 5449 |
+
[2024-11-07 14:41:18,837][04701] Updated weights for policy 0, policy_version 1616 (0.0024)
|
| 5450 |
+
[2024-11-07 14:41:22,108][04584] Fps is (10 sec: 6144.0, 60 sec: 7168.0, 300 sec: 7386.7). Total num frames: 6639616. Throughput: 0: 1727.3. Samples: 648310. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
| 5451 |
+
[2024-11-07 14:41:22,112][04584] Avg episode reward: [(0, '4.391')]
|
| 5452 |
+
[2024-11-07 14:41:24,288][04701] Updated weights for policy 0, policy_version 1626 (0.0036)
|
| 5453 |
+
[2024-11-07 14:41:27,108][04584] Fps is (10 sec: 7777.0, 60 sec: 7168.0, 300 sec: 7386.7). Total num frames: 6680576. Throughput: 0: 1725.2. Samples: 660036. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
| 5454 |
+
[2024-11-07 14:41:27,110][04584] Avg episode reward: [(0, '4.320')]
|
| 5455 |
+
[2024-11-07 14:41:29,172][04701] Updated weights for policy 0, policy_version 1636 (0.0028)
|
| 5456 |
+
[2024-11-07 14:41:32,108][04584] Fps is (10 sec: 8192.0, 60 sec: 7168.0, 300 sec: 7386.7). Total num frames: 6721536. Throughput: 0: 1813.1. Samples: 666366. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
| 5457 |
+
[2024-11-07 14:41:32,110][04584] Avg episode reward: [(0, '4.490')]
|
| 5458 |
+
[2024-11-07 14:41:34,208][04701] Updated weights for policy 0, policy_version 1646 (0.0023)
|
| 5459 |
+
[2024-11-07 14:41:37,108][04584] Fps is (10 sec: 8192.0, 60 sec: 7168.0, 300 sec: 7441.3). Total num frames: 6762496. Throughput: 0: 1863.7. Samples: 678394. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
| 5460 |
+
[2024-11-07 14:41:37,109][04584] Avg episode reward: [(0, '4.537')]
|
| 5461 |
+
[2024-11-07 14:41:39,513][04701] Updated weights for policy 0, policy_version 1656 (0.0026)
|
| 5462 |
+
[2024-11-07 14:41:42,108][04584] Fps is (10 sec: 7782.3, 60 sec: 7168.5, 300 sec: 7428.3). Total num frames: 6799360. Throughput: 0: 1856.8. Samples: 689932. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
| 5463 |
+
[2024-11-07 14:41:42,110][04584] Avg episode reward: [(0, '4.371')]
|
| 5464 |
+
[2024-11-07 14:41:44,930][04701] Updated weights for policy 0, policy_version 1666 (0.0028)
|
| 5465 |
+
[2024-11-07 14:41:47,108][04584] Fps is (10 sec: 7782.4, 60 sec: 7509.4, 300 sec: 7428.3). Total num frames: 6840320. Throughput: 0: 1863.2. Samples: 695744. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
| 5466 |
+
[2024-11-07 14:41:47,110][04584] Avg episode reward: [(0, '4.330')]
|
| 5467 |
+
[2024-11-07 14:41:50,403][04701] Updated weights for policy 0, policy_version 1676 (0.0030)
|
| 5468 |
+
[2024-11-07 14:41:53,381][04584] Fps is (10 sec: 6177.0, 60 sec: 7219.7, 300 sec: 7368.8). Total num frames: 6868992. Throughput: 0: 1810.9. Samples: 707038. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
| 5469 |
+
[2024-11-07 14:41:53,382][04584] Avg episode reward: [(0, '4.407')]
|
| 5470 |
+
[2024-11-07 14:41:57,108][04584] Fps is (10 sec: 5734.4, 60 sec: 7168.0, 300 sec: 7372.8). Total num frames: 6897664. Throughput: 0: 1764.1. Samples: 713860. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
| 5471 |
+
[2024-11-07 14:41:57,110][04584] Avg episode reward: [(0, '4.690')]
|
| 5472 |
+
[2024-11-07 14:41:58,056][04701] Updated weights for policy 0, policy_version 1686 (0.0034)
|
| 5473 |
+
[2024-11-07 14:42:02,109][04584] Fps is (10 sec: 7509.1, 60 sec: 7168.1, 300 sec: 7358.9). Total num frames: 6934528. Throughput: 0: 1843.5. Samples: 719816. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
| 5474 |
+
[2024-11-07 14:42:02,110][04584] Avg episode reward: [(0, '4.442')]
|
| 5475 |
+
[2024-11-07 14:42:03,920][04701] Updated weights for policy 0, policy_version 1696 (0.0041)
|
| 5476 |
+
[2024-11-07 14:42:07,108][04584] Fps is (10 sec: 6963.1, 60 sec: 7099.7, 300 sec: 7358.9). Total num frames: 6967296. Throughput: 0: 1819.1. Samples: 730168. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
| 5477 |
+
[2024-11-07 14:42:07,111][04584] Avg episode reward: [(0, '4.615')]
|
| 5478 |
+
[2024-11-07 14:42:09,344][04701] Updated weights for policy 0, policy_version 1706 (0.0034)
|
| 5479 |
+
[2024-11-07 14:42:12,108][04584] Fps is (10 sec: 6963.5, 60 sec: 7099.7, 300 sec: 7398.6). Total num frames: 7004160. Throughput: 0: 1801.8. Samples: 741116. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
| 5480 |
+
[2024-11-07 14:42:12,111][04584] Avg episode reward: [(0, '4.542')]
|
| 5481 |
+
[2024-11-07 14:42:15,345][04701] Updated weights for policy 0, policy_version 1716 (0.0029)
|
| 5482 |
+
[2024-11-07 14:42:17,109][04584] Fps is (10 sec: 7372.2, 60 sec: 7290.8, 300 sec: 7386.7). Total num frames: 7041024. Throughput: 0: 1774.6. Samples: 746224. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
| 5483 |
+
[2024-11-07 14:42:17,130][04584] Avg episode reward: [(0, '4.596')]
|
| 5484 |
+
[2024-11-07 14:42:20,616][04701] Updated weights for policy 0, policy_version 1726 (0.0025)
|
| 5485 |
+
[2024-11-07 14:42:22,108][04584] Fps is (10 sec: 7372.7, 60 sec: 7304.5, 300 sec: 7386.7). Total num frames: 7077888. Throughput: 0: 1764.3. Samples: 757786. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
| 5486 |
+
[2024-11-07 14:42:22,110][04584] Avg episode reward: [(0, '4.395')]
|
| 5487 |
+
[2024-11-07 14:42:27,773][04584] Fps is (10 sec: 6145.8, 60 sec: 7022.0, 300 sec: 7328.5). Total num frames: 7106560. Throughput: 0: 1616.7. Samples: 763760. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
| 5488 |
+
[2024-11-07 14:42:27,774][04584] Avg episode reward: [(0, '4.528')]
|
| 5489 |
+
[2024-11-07 14:42:27,814][04701] Updated weights for policy 0, policy_version 1736 (0.0034)
|
| 5490 |
+
[2024-11-07 14:42:32,108][04584] Fps is (10 sec: 6553.6, 60 sec: 7031.4, 300 sec: 7331.1). Total num frames: 7143424. Throughput: 0: 1680.9. Samples: 771384. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
| 5491 |
+
[2024-11-07 14:42:32,113][04584] Avg episode reward: [(0, '4.745')]
|
| 5492 |
+
[2024-11-07 14:42:33,100][04701] Updated weights for policy 0, policy_version 1746 (0.0023)
|
| 5493 |
+
[2024-11-07 14:42:37,108][04584] Fps is (10 sec: 8336.2, 60 sec: 7031.4, 300 sec: 7331.2). Total num frames: 7184384. Throughput: 0: 1741.0. Samples: 783168. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
| 5494 |
+
[2024-11-07 14:42:37,110][04584] Avg episode reward: [(0, '4.300')]
|
| 5495 |
+
[2024-11-07 14:42:38,140][04701] Updated weights for policy 0, policy_version 1756 (0.0034)
|
| 5496 |
+
[2024-11-07 14:42:42,108][04584] Fps is (10 sec: 7372.9, 60 sec: 6963.2, 300 sec: 7303.4). Total num frames: 7217152. Throughput: 0: 1784.7. Samples: 794170. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
|
| 5497 |
+
[2024-11-07 14:42:42,110][04584] Avg episode reward: [(0, '4.610')]
|
| 5498 |
+
[2024-11-07 14:42:43,824][04701] Updated weights for policy 0, policy_version 1766 (0.0026)
|
| 5499 |
+
[2024-11-07 14:42:47,108][04584] Fps is (10 sec: 7372.9, 60 sec: 6963.2, 300 sec: 7370.1). Total num frames: 7258112. Throughput: 0: 1789.5. Samples: 800344. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0)
|
| 5500 |
+
[2024-11-07 14:42:47,110][04584] Avg episode reward: [(0, '4.393')]
|
| 5501 |
+
[2024-11-07 14:42:49,006][04701] Updated weights for policy 0, policy_version 1776 (0.0030)
|
| 5502 |
+
[2024-11-07 14:42:52,109][04584] Fps is (10 sec: 7781.6, 60 sec: 7253.5, 300 sec: 7358.9). Total num frames: 7294976. Throughput: 0: 1817.0. Samples: 811934. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
| 5503 |
+
[2024-11-07 14:42:52,113][04584] Avg episode reward: [(0, '4.440')]
|
| 5504 |
+
[2024-11-07 14:42:54,207][04701] Updated weights for policy 0, policy_version 1786 (0.0029)
|
| 5505 |
+
[2024-11-07 14:42:57,108][04584] Fps is (10 sec: 7782.3, 60 sec: 7304.5, 300 sec: 7358.9). Total num frames: 7335936. Throughput: 0: 1841.5. Samples: 823984. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
| 5506 |
+
[2024-11-07 14:42:57,110][04584] Avg episode reward: [(0, '4.300')]
|
| 5507 |
+
[2024-11-07 14:42:59,392][04701] Updated weights for policy 0, policy_version 1796 (0.0028)
|
| 5508 |
+
[2024-11-07 14:43:02,199][04584] Fps is (10 sec: 6495.4, 60 sec: 7089.1, 300 sec: 7315.0). Total num frames: 7360512. Throughput: 0: 1856.0. Samples: 829910. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
| 5509 |
+
[2024-11-07 14:43:02,202][04584] Avg episode reward: [(0, '4.381')]
|
| 5510 |
+
[2024-11-07 14:43:06,958][04701] Updated weights for policy 0, policy_version 1806 (0.0026)
|
| 5511 |
+
[2024-11-07 14:43:07,108][04584] Fps is (10 sec: 6144.0, 60 sec: 7168.0, 300 sec: 7303.4). Total num frames: 7397376. Throughput: 0: 1761.0. Samples: 837030. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
| 5512 |
+
[2024-11-07 14:43:07,110][04584] Avg episode reward: [(0, '4.652')]
|
| 5513 |
+
[2024-11-07 14:43:07,127][04688] Saving /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000001806_7397376.pth...
|
| 5514 |
+
[2024-11-07 14:43:07,318][04688] Removing /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000001382_5660672.pth
|
| 5515 |
+
[2024-11-07 14:43:12,082][04701] Updated weights for policy 0, policy_version 1816 (0.0029)
|
| 5516 |
+
[2024-11-07 14:43:12,108][04584] Fps is (10 sec: 7853.5, 60 sec: 7236.3, 300 sec: 7303.4). Total num frames: 7438336. Throughput: 0: 1917.2. Samples: 848760. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
| 5517 |
+
[2024-11-07 14:43:12,110][04584] Avg episode reward: [(0, '4.544')]
|
| 5518 |
+
[2024-11-07 14:43:17,108][04584] Fps is (10 sec: 7782.6, 60 sec: 7236.4, 300 sec: 7289.5). Total num frames: 7475200. Throughput: 0: 1853.6. Samples: 854794. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
| 5519 |
+
[2024-11-07 14:43:17,111][04584] Avg episode reward: [(0, '4.361')]
|
| 5520 |
+
[2024-11-07 14:43:17,153][04701] Updated weights for policy 0, policy_version 1826 (0.0025)
|
| 5521 |
+
[2024-11-07 14:43:22,108][04584] Fps is (10 sec: 7782.4, 60 sec: 7304.5, 300 sec: 7345.0). Total num frames: 7516160. Throughput: 0: 1864.3. Samples: 867062. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
| 5522 |
+
[2024-11-07 14:43:22,112][04584] Avg episode reward: [(0, '4.248')]
|
| 5523 |
+
[2024-11-07 14:43:22,238][04701] Updated weights for policy 0, policy_version 1836 (0.0026)
|
| 5524 |
+
[2024-11-07 14:43:27,109][04584] Fps is (10 sec: 8191.7, 60 sec: 7593.4, 300 sec: 7345.0). Total num frames: 7557120. Throughput: 0: 1885.3. Samples: 879010. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
| 5525 |
+
[2024-11-07 14:43:27,111][04584] Avg episode reward: [(0, '4.410')]
|
| 5526 |
+
[2024-11-07 14:43:27,492][04701] Updated weights for policy 0, policy_version 1846 (0.0023)
|
| 5527 |
+
[2024-11-07 14:43:32,108][04584] Fps is (10 sec: 7782.3, 60 sec: 7509.3, 300 sec: 7331.1). Total num frames: 7593984. Throughput: 0: 1875.4. Samples: 884738. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
| 5528 |
+
[2024-11-07 14:43:32,110][04584] Avg episode reward: [(0, '4.527')]
|
| 5529 |
+
[2024-11-07 14:43:32,694][04701] Updated weights for policy 0, policy_version 1856 (0.0022)
|
| 5530 |
+
[2024-11-07 14:43:37,108][04584] Fps is (10 sec: 6144.2, 60 sec: 7236.3, 300 sec: 7275.6). Total num frames: 7618560. Throughput: 0: 1849.5. Samples: 895158. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
| 5531 |
+
[2024-11-07 14:43:37,110][04584] Avg episode reward: [(0, '4.513')]
|
| 5532 |
+
[2024-11-07 14:43:39,970][04701] Updated weights for policy 0, policy_version 1866 (0.0035)
|
| 5533 |
+
[2024-11-07 14:43:42,108][04584] Fps is (10 sec: 6553.6, 60 sec: 7372.8, 300 sec: 7275.6). Total num frames: 7659520. Throughput: 0: 1785.7. Samples: 904342. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
| 5534 |
+
[2024-11-07 14:43:42,110][04584] Avg episode reward: [(0, '4.196')]
|
| 5535 |
+
[2024-11-07 14:43:45,251][04701] Updated weights for policy 0, policy_version 1876 (0.0029)
|
| 5536 |
+
[2024-11-07 14:43:47,108][04584] Fps is (10 sec: 7782.4, 60 sec: 7304.5, 300 sec: 7261.7). Total num frames: 7696384. Throughput: 0: 1783.9. Samples: 910026. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
| 5537 |
+
[2024-11-07 14:43:47,110][04584] Avg episode reward: [(0, '4.252')]
|
| 5538 |
+
[2024-11-07 14:43:50,305][04701] Updated weights for policy 0, policy_version 1886 (0.0024)
|
| 5539 |
+
[2024-11-07 14:43:52,108][04584] Fps is (10 sec: 7782.5, 60 sec: 7372.9, 300 sec: 7261.7). Total num frames: 7737344. Throughput: 0: 1890.9. Samples: 922118. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
| 5540 |
+
[2024-11-07 14:43:52,110][04584] Avg episode reward: [(0, '4.414')]
|
| 5541 |
+
[2024-11-07 14:43:55,379][04701] Updated weights for policy 0, policy_version 1896 (0.0020)
|
| 5542 |
+
[2024-11-07 14:43:57,109][04584] Fps is (10 sec: 8191.5, 60 sec: 7372.7, 300 sec: 7317.2). Total num frames: 7778304. Throughput: 0: 1901.1. Samples: 934312. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
| 5543 |
+
[2024-11-07 14:43:57,111][04584] Avg episode reward: [(0, '4.448')]
|
| 5544 |
+
[2024-11-07 14:44:00,489][04701] Updated weights for policy 0, policy_version 1906 (0.0023)
|
| 5545 |
+
[2024-11-07 14:44:02,108][04584] Fps is (10 sec: 7782.3, 60 sec: 7589.0, 300 sec: 7317.3). Total num frames: 7815168. Throughput: 0: 1898.8. Samples: 940240. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
| 5546 |
+
[2024-11-07 14:44:02,111][04584] Avg episode reward: [(0, '4.370')]
|
| 5547 |
+
[2024-11-07 14:44:05,857][04701] Updated weights for policy 0, policy_version 1916 (0.0025)
|
| 5548 |
+
[2024-11-07 14:44:07,108][04584] Fps is (10 sec: 7782.9, 60 sec: 7645.9, 300 sec: 7317.3). Total num frames: 7856128. Throughput: 0: 1880.7. Samples: 951692. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0)
|
| 5549 |
+
[2024-11-07 14:44:07,110][04584] Avg episode reward: [(0, '4.466')]
|
| 5550 |
+
[2024-11-07 14:44:12,108][04584] Fps is (10 sec: 6144.1, 60 sec: 7304.6, 300 sec: 7234.0). Total num frames: 7876608. Throughput: 0: 1785.3. Samples: 959350. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
| 5551 |
+
[2024-11-07 14:44:12,110][04584] Avg episode reward: [(0, '4.323')]
|
| 5552 |
+
[2024-11-07 14:44:13,147][04701] Updated weights for policy 0, policy_version 1926 (0.0023)
|
| 5553 |
+
[2024-11-07 14:44:17,108][04584] Fps is (10 sec: 6143.9, 60 sec: 7372.8, 300 sec: 7233.9). Total num frames: 7917568. Throughput: 0: 1789.9. Samples: 965282. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
| 5554 |
+
[2024-11-07 14:44:17,111][04584] Avg episode reward: [(0, '4.247')]
|
| 5555 |
+
[2024-11-07 14:44:18,201][04701] Updated weights for policy 0, policy_version 1936 (0.0026)
|
| 5556 |
+
[2024-11-07 14:44:22,109][04584] Fps is (10 sec: 8191.6, 60 sec: 7372.8, 300 sec: 7233.9). Total num frames: 7958528. Throughput: 0: 1830.9. Samples: 977550. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
| 5557 |
+
[2024-11-07 14:44:22,114][04584] Avg episode reward: [(0, '4.299')]
|
| 5558 |
+
[2024-11-07 14:44:23,245][04701] Updated weights for policy 0, policy_version 1946 (0.0026)
|
| 5559 |
+
[2024-11-07 14:44:27,109][04584] Fps is (10 sec: 8601.1, 60 sec: 7441.0, 300 sec: 7247.8). Total num frames: 8003584. Throughput: 0: 1905.0. Samples: 990068. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0)
|
| 5560 |
+
[2024-11-07 14:44:27,111][04584] Avg episode reward: [(0, '4.466')]
|
| 5561 |
+
[2024-11-07 14:44:27,617][04688] Stopping Batcher_0...
|
| 5562 |
+
[2024-11-07 14:44:27,618][04688] Loop batcher_evt_loop terminating...
|
| 5563 |
+
[2024-11-07 14:44:27,620][04688] Saving /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000001955_8007680.pth...
|
| 5564 |
+
[2024-11-07 14:44:27,619][04584] Component Batcher_0 stopped!
|
| 5565 |
+
[2024-11-07 14:44:27,676][04701] Weights refcount: 2 0
|
| 5566 |
+
[2024-11-07 14:44:27,678][04701] Stopping InferenceWorker_p0-w0...
|
| 5567 |
+
[2024-11-07 14:44:27,679][04701] Loop inference_proc0-0_evt_loop terminating...
|
| 5568 |
+
[2024-11-07 14:44:27,678][04584] Component InferenceWorker_p0-w0 stopped!
|
| 5569 |
+
[2024-11-07 14:44:27,709][04688] Removing /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000001598_6545408.pth
|
| 5570 |
+
[2024-11-07 14:44:27,722][04688] Saving /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000001955_8007680.pth...
|
| 5571 |
+
[2024-11-07 14:44:27,750][04584] Component RolloutWorker_w0 stopped!
|
| 5572 |
+
[2024-11-07 14:44:27,751][04702] Stopping RolloutWorker_w0...
|
| 5573 |
+
[2024-11-07 14:44:27,752][04702] Loop rollout_proc0_evt_loop terminating...
|
| 5574 |
+
[2024-11-07 14:44:27,756][04703] Stopping RolloutWorker_w1...
|
| 5575 |
+
[2024-11-07 14:44:27,756][04584] Component RolloutWorker_w1 stopped!
|
| 5576 |
+
[2024-11-07 14:44:27,759][04703] Loop rollout_proc1_evt_loop terminating...
|
| 5577 |
+
[2024-11-07 14:44:27,765][04707] Stopping RolloutWorker_w5...
|
| 5578 |
+
[2024-11-07 14:44:27,768][04707] Loop rollout_proc5_evt_loop terminating...
|
| 5579 |
+
[2024-11-07 14:44:27,772][04706] Stopping RolloutWorker_w4...
|
| 5580 |
+
[2024-11-07 14:44:27,774][04706] Loop rollout_proc4_evt_loop terminating...
|
| 5581 |
+
[2024-11-07 14:44:27,765][04584] Component RolloutWorker_w5 stopped!
|
| 5582 |
+
[2024-11-07 14:44:27,796][04584] Component RolloutWorker_w4 stopped!
|
| 5583 |
+
[2024-11-07 14:44:27,812][04708] Stopping RolloutWorker_w6...
|
| 5584 |
+
[2024-11-07 14:44:27,814][04708] Loop rollout_proc6_evt_loop terminating...
|
| 5585 |
+
[2024-11-07 14:44:27,812][04584] Component RolloutWorker_w6 stopped!
|
| 5586 |
+
[2024-11-07 14:44:27,845][04688] Stopping LearnerWorker_p0...
|
| 5587 |
+
[2024-11-07 14:44:27,846][04688] Loop learner_proc0_evt_loop terminating...
|
| 5588 |
+
[2024-11-07 14:44:27,846][04584] Component LearnerWorker_p0 stopped!
|
| 5589 |
+
[2024-11-07 14:44:27,888][04709] Stopping RolloutWorker_w7...
|
| 5590 |
+
[2024-11-07 14:44:27,889][04584] Component RolloutWorker_w7 stopped!
|
| 5591 |
+
[2024-11-07 14:44:27,908][04709] Loop rollout_proc7_evt_loop terminating...
|
| 5592 |
+
[2024-11-07 14:44:27,913][04704] Stopping RolloutWorker_w3...
|
| 5593 |
+
[2024-11-07 14:44:27,913][04584] Component RolloutWorker_w3 stopped!
|
| 5594 |
+
[2024-11-07 14:44:27,916][04704] Loop rollout_proc3_evt_loop terminating...
|
| 5595 |
+
[2024-11-07 14:44:28,016][04705] Stopping RolloutWorker_w2...
|
| 5596 |
+
[2024-11-07 14:44:28,016][04584] Component RolloutWorker_w2 stopped!
|
| 5597 |
+
[2024-11-07 14:44:28,019][04705] Loop rollout_proc2_evt_loop terminating...
|
| 5598 |
+
[2024-11-07 14:44:28,019][04584] Waiting for process learner_proc0 to stop...
|
| 5599 |
+
[2024-11-07 14:44:29,498][04584] Waiting for process inference_proc0-0 to join...
|
| 5600 |
+
[2024-11-07 14:44:29,500][04584] Waiting for process rollout_proc0 to join...
|
| 5601 |
+
[2024-11-07 14:44:29,501][04584] Waiting for process rollout_proc1 to join...
|
| 5602 |
+
[2024-11-07 14:44:29,504][04584] Waiting for process rollout_proc2 to join...
|
| 5603 |
+
[2024-11-07 14:44:29,653][04584] Waiting for process rollout_proc3 to join...
|
| 5604 |
+
[2024-11-07 14:44:29,655][04584] Waiting for process rollout_proc4 to join...
|
| 5605 |
+
[2024-11-07 14:44:29,656][04584] Waiting for process rollout_proc5 to join...
|
| 5606 |
+
[2024-11-07 14:44:29,657][04584] Waiting for process rollout_proc6 to join...
|
| 5607 |
+
[2024-11-07 14:44:29,660][04584] Waiting for process rollout_proc7 to join...
|
| 5608 |
+
[2024-11-07 14:44:29,661][04584] Batcher 0 profile tree view:
|
| 5609 |
+
batching: 27.4377, releasing_batches: 0.0459
|
| 5610 |
+
[2024-11-07 14:44:29,663][04584] InferenceWorker_p0-w0 profile tree view:
|
| 5611 |
+
wait_policy: 0.0000
|
| 5612 |
+
wait_policy_total: 6.5812
|
| 5613 |
+
update_model: 7.6567
|
| 5614 |
+
weight_update: 0.0027
|
| 5615 |
+
one_step: 0.0070
|
| 5616 |
+
handle_policy_step: 510.3365
|
| 5617 |
+
deserialize: 13.7673, stack: 2.4265, obs_to_device_normalize: 150.3995, forward: 227.4887, send_messages: 31.2471
|
| 5618 |
+
prepare_outputs: 68.6568
|
| 5619 |
+
to_cpu: 53.1126
|
| 5620 |
+
[2024-11-07 14:44:29,665][04584] Learner 0 profile tree view:
|
| 5621 |
+
misc: 0.0053, prepare_batch: 30.8301
|
| 5622 |
+
train: 106.7453
|
| 5623 |
+
epoch_init: 0.0079, minibatch_init: 0.0131, losses_postprocess: 0.8354, kl_divergence: 0.9434, after_optimizer: 4.0108
|
| 5624 |
+
calculate_losses: 31.3550
|
| 5625 |
+
losses_init: 0.0058, forward_head: 1.9126, bptt_initial: 21.8044, tail: 1.0207, advantages_returns: 0.3056, losses: 3.3075
|
| 5626 |
+
bptt: 2.6948
|
| 5627 |
+
bptt_forward_core: 2.5765
|
| 5628 |
+
update: 68.9694
|
| 5629 |
+
clip: 1.2501
|
| 5630 |
+
[2024-11-07 14:44:29,666][04584] RolloutWorker_w0 profile tree view:
|
| 5631 |
+
wait_for_trajectories: 0.2375, enqueue_policy_requests: 11.9428, env_step: 149.5260, overhead: 10.7908, complete_rollouts: 0.6476
|
| 5632 |
+
save_policy_outputs: 16.0346
|
| 5633 |
+
split_output_tensors: 5.4611
|
| 5634 |
+
[2024-11-07 14:44:29,670][04584] RolloutWorker_w7 profile tree view:
|
| 5635 |
+
wait_for_trajectories: 0.2022, enqueue_policy_requests: 12.7636, env_step: 249.0214, overhead: 10.6673, complete_rollouts: 0.3962
|
| 5636 |
+
save_policy_outputs: 18.4068
|
| 5637 |
+
split_output_tensors: 7.8369
|
| 5638 |
+
[2024-11-07 14:44:29,672][04584] Loop Runner_EvtLoop terminating...
|
| 5639 |
+
[2024-11-07 14:44:29,675][04584] Runner profile tree view:
|
| 5640 |
+
main_loop: 558.4363
|
| 5641 |
+
[2024-11-07 14:44:29,676][04584] Collected {0: 8007680}, FPS: 7107.4
|
| 5642 |
+
[2024-11-07 14:44:30,065][04584] Loading existing experiment configuration from /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/config.json
|
| 5643 |
+
[2024-11-07 14:44:30,066][04584] Overriding arg 'num_workers' with value 1 passed from command line
|
| 5644 |
+
[2024-11-07 14:44:30,067][04584] Adding new argument 'no_render'=True that is not in the saved config file!
|
| 5645 |
+
[2024-11-07 14:44:30,068][04584] Adding new argument 'save_video'=True that is not in the saved config file!
|
| 5646 |
+
[2024-11-07 14:44:30,071][04584] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file!
|
| 5647 |
+
[2024-11-07 14:44:30,072][04584] Adding new argument 'video_name'=None that is not in the saved config file!
|
| 5648 |
+
[2024-11-07 14:44:30,074][04584] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file!
|
| 5649 |
+
[2024-11-07 14:44:30,075][04584] Adding new argument 'max_num_episodes'=10 that is not in the saved config file!
|
| 5650 |
+
[2024-11-07 14:44:30,077][04584] Adding new argument 'push_to_hub'=False that is not in the saved config file!
|
| 5651 |
+
[2024-11-07 14:44:30,079][04584] Adding new argument 'hf_repository'=None that is not in the saved config file!
|
| 5652 |
+
[2024-11-07 14:44:30,081][04584] Adding new argument 'policy_index'=0 that is not in the saved config file!
|
| 5653 |
+
[2024-11-07 14:44:30,082][04584] Adding new argument 'eval_deterministic'=False that is not in the saved config file!
|
| 5654 |
+
[2024-11-07 14:44:30,084][04584] Adding new argument 'train_script'=None that is not in the saved config file!
|
| 5655 |
+
[2024-11-07 14:44:30,085][04584] Adding new argument 'enjoy_script'=None that is not in the saved config file!
|
| 5656 |
+
[2024-11-07 14:44:30,088][04584] Using frameskip 1 and render_action_repeat=4 for evaluation
|
| 5657 |
+
[2024-11-07 14:44:30,176][04584] Doom resolution: 160x120, resize resolution: (128, 72)
|
| 5658 |
+
[2024-11-07 14:44:30,189][04584] RunningMeanStd input shape: (3, 72, 128)
|
| 5659 |
+
[2024-11-07 14:44:30,193][04584] RunningMeanStd input shape: (1,)
|
| 5660 |
+
[2024-11-07 14:44:30,218][04584] ConvEncoder: input_channels=3
|
| 5661 |
+
[2024-11-07 14:44:30,407][04584] Conv encoder output size: 512
|
| 5662 |
+
[2024-11-07 14:44:30,408][04584] Policy head output size: 512
|
| 5663 |
+
[2024-11-07 14:44:31,399][04584] Loading state from checkpoint /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000001955_8007680.pth...
|
| 5664 |
+
[2024-11-07 14:44:32,337][04584] Num frames 100...
|
| 5665 |
+
[2024-11-07 14:44:32,546][04584] Num frames 200...
|
| 5666 |
+
[2024-11-07 14:44:32,757][04584] Num frames 300...
|
| 5667 |
+
[2024-11-07 14:44:32,967][04584] Num frames 400...
|
| 5668 |
+
[2024-11-07 14:44:33,113][04584] Avg episode rewards: #0: 5.480, true rewards: #0: 4.480
|
| 5669 |
+
[2024-11-07 14:44:33,114][04584] Avg episode reward: 5.480, avg true_objective: 4.480
|
| 5670 |
+
[2024-11-07 14:44:33,216][04584] Num frames 500...
|
| 5671 |
+
[2024-11-07 14:44:33,402][04584] Num frames 600...
|
| 5672 |
+
[2024-11-07 14:44:33,611][04584] Num frames 700...
|
| 5673 |
+
[2024-11-07 14:44:33,809][04584] Num frames 800...
|
| 5674 |
+
[2024-11-07 14:44:34,017][04584] Num frames 900...
|
| 5675 |
+
[2024-11-07 14:44:34,144][04584] Avg episode rewards: #0: 6.140, true rewards: #0: 4.640
|
| 5676 |
+
[2024-11-07 14:44:34,146][04584] Avg episode reward: 6.140, avg true_objective: 4.640
|
| 5677 |
+
[2024-11-07 14:44:34,289][04584] Num frames 1000...
|
| 5678 |
+
[2024-11-07 14:44:34,485][04584] Num frames 1100...
|
| 5679 |
+
[2024-11-07 14:44:34,693][04584] Num frames 1200...
|
| 5680 |
+
[2024-11-07 14:44:34,909][04584] Avg episode rewards: #0: 5.600, true rewards: #0: 4.267
|
| 5681 |
+
[2024-11-07 14:44:34,910][04584] Avg episode reward: 5.600, avg true_objective: 4.267
|
| 5682 |
+
[2024-11-07 14:44:34,955][04584] Num frames 1300...
|
| 5683 |
+
[2024-11-07 14:44:35,146][04584] Num frames 1400...
|
| 5684 |
+
[2024-11-07 14:44:35,332][04584] Num frames 1500...
|
| 5685 |
+
[2024-11-07 14:44:35,527][04584] Num frames 1600...
|
| 5686 |
+
[2024-11-07 14:44:35,702][04584] Avg episode rewards: #0: 5.160, true rewards: #0: 4.160
|
| 5687 |
+
[2024-11-07 14:44:35,706][04584] Avg episode reward: 5.160, avg true_objective: 4.160
|
| 5688 |
+
[2024-11-07 14:44:35,791][04584] Num frames 1700...
|
| 5689 |
+
[2024-11-07 14:44:35,959][04584] Num frames 1800...
|
| 5690 |
+
[2024-11-07 14:44:36,136][04584] Num frames 1900...
|
| 5691 |
+
[2024-11-07 14:44:36,333][04584] Num frames 2000...
|
| 5692 |
+
[2024-11-07 14:44:36,475][04584] Avg episode rewards: #0: 4.896, true rewards: #0: 4.096
|
| 5693 |
+
[2024-11-07 14:44:36,477][04584] Avg episode reward: 4.896, avg true_objective: 4.096
|
| 5694 |
+
[2024-11-07 14:44:36,590][04584] Num frames 2100...
|
| 5695 |
+
[2024-11-07 14:44:36,787][04584] Num frames 2200...
|
| 5696 |
+
[2024-11-07 14:44:36,980][04584] Num frames 2300...
|
| 5697 |
+
[2024-11-07 14:44:37,181][04584] Num frames 2400...
|
| 5698 |
+
[2024-11-07 14:44:37,439][04584] Avg episode rewards: #0: 4.993, true rewards: #0: 4.160
|
| 5699 |
+
[2024-11-07 14:44:37,440][04584] Avg episode reward: 4.993, avg true_objective: 4.160
|
| 5700 |
+
[2024-11-07 14:44:37,450][04584] Num frames 2500...
|
| 5701 |
+
[2024-11-07 14:44:37,658][04584] Num frames 2600...
|
| 5702 |
+
[2024-11-07 14:44:37,847][04584] Num frames 2700...
|
| 5703 |
+
[2024-11-07 14:44:38,030][04584] Num frames 2800...
|
| 5704 |
+
[2024-11-07 14:44:38,229][04584] Avg episode rewards: #0: 4.829, true rewards: #0: 4.114
|
| 5705 |
+
[2024-11-07 14:44:38,232][04584] Avg episode reward: 4.829, avg true_objective: 4.114
|
| 5706 |
+
[2024-11-07 14:44:38,291][04584] Num frames 2900...
|
| 5707 |
+
[2024-11-07 14:44:38,471][04584] Num frames 3000...
|
| 5708 |
+
[2024-11-07 14:44:38,681][04584] Num frames 3100...
|
| 5709 |
+
[2024-11-07 14:44:38,872][04584] Num frames 3200...
|
| 5710 |
+
[2024-11-07 14:44:39,064][04584] Avg episode rewards: #0: 4.705, true rewards: #0: 4.080
|
| 5711 |
+
[2024-11-07 14:44:39,066][04584] Avg episode reward: 4.705, avg true_objective: 4.080
|
| 5712 |
+
[2024-11-07 14:44:39,137][04584] Num frames 3300...
|
| 5713 |
+
[2024-11-07 14:44:39,304][04584] Num frames 3400...
|
| 5714 |
+
[2024-11-07 14:44:39,485][04584] Num frames 3500...
|
| 5715 |
+
[2024-11-07 14:44:39,725][04584] Num frames 3600...
|
| 5716 |
+
[2024-11-07 14:44:39,856][04584] Avg episode rewards: #0: 4.609, true rewards: #0: 4.053
|
| 5717 |
+
[2024-11-07 14:44:39,861][04584] Avg episode reward: 4.609, avg true_objective: 4.053
|
| 5718 |
+
[2024-11-07 14:44:39,965][04584] Num frames 3700...
|
| 5719 |
+
[2024-11-07 14:44:40,133][04584] Num frames 3800...
|
| 5720 |
+
[2024-11-07 14:44:40,308][04584] Num frames 3900...
|
| 5721 |
+
[2024-11-07 14:44:40,493][04584] Num frames 4000...
|
| 5722 |
+
[2024-11-07 14:44:40,602][04584] Avg episode rewards: #0: 4.532, true rewards: #0: 4.032
|
| 5723 |
+
[2024-11-07 14:44:40,606][04584] Avg episode reward: 4.532, avg true_objective: 4.032
|
| 5724 |
+
[2024-11-07 14:44:51,260][04584] Replay video saved to /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/replay.mp4!
|
| 5725 |
+
[2024-11-07 14:44:53,903][04584] Loading existing experiment configuration from /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/config.json
|
| 5726 |
+
[2024-11-07 14:44:53,904][04584] Overriding arg 'num_workers' with value 1 passed from command line
|
| 5727 |
+
[2024-11-07 14:44:53,906][04584] Adding new argument 'no_render'=True that is not in the saved config file!
|
| 5728 |
+
[2024-11-07 14:44:53,908][04584] Adding new argument 'save_video'=True that is not in the saved config file!
|
| 5729 |
+
[2024-11-07 14:44:53,909][04584] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file!
|
| 5730 |
+
[2024-11-07 14:44:53,910][04584] Adding new argument 'video_name'=None that is not in the saved config file!
|
| 5731 |
+
[2024-11-07 14:44:53,913][04584] Adding new argument 'max_num_frames'=100000 that is not in the saved config file!
|
| 5732 |
+
[2024-11-07 14:44:53,915][04584] Adding new argument 'max_num_episodes'=10 that is not in the saved config file!
|
| 5733 |
+
[2024-11-07 14:44:53,917][04584] Adding new argument 'push_to_hub'=True that is not in the saved config file!
|
| 5734 |
+
[2024-11-07 14:44:53,920][04584] Adding new argument 'hf_repository'='alidenewade/rl_course_vizdoom_health_gathering_supreme' that is not in the saved config file!
|
| 5735 |
+
[2024-11-07 14:44:53,922][04584] Adding new argument 'policy_index'=0 that is not in the saved config file!
|
| 5736 |
+
[2024-11-07 14:44:53,923][04584] Adding new argument 'eval_deterministic'=False that is not in the saved config file!
|
| 5737 |
+
[2024-11-07 14:44:53,926][04584] Adding new argument 'train_script'=None that is not in the saved config file!
|
| 5738 |
+
[2024-11-07 14:44:53,928][04584] Adding new argument 'enjoy_script'=None that is not in the saved config file!
|
| 5739 |
+
[2024-11-07 14:44:53,931][04584] Using frameskip 1 and render_action_repeat=4 for evaluation
|
| 5740 |
+
[2024-11-07 14:44:53,958][04584] RunningMeanStd input shape: (3, 72, 128)
|
| 5741 |
+
[2024-11-07 14:44:53,960][04584] RunningMeanStd input shape: (1,)
|
| 5742 |
+
[2024-11-07 14:44:53,973][04584] ConvEncoder: input_channels=3
|
| 5743 |
+
[2024-11-07 14:44:54,021][04584] Conv encoder output size: 512
|
| 5744 |
+
[2024-11-07 14:44:54,023][04584] Policy head output size: 512
|
| 5745 |
+
[2024-11-07 14:44:54,046][04584] Loading state from checkpoint /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000001955_8007680.pth...
|
| 5746 |
+
[2024-11-07 14:44:54,568][04584] Num frames 100...
|
| 5747 |
+
[2024-11-07 14:44:54,745][04584] Num frames 200...
|
| 5748 |
+
[2024-11-07 14:44:54,907][04584] Num frames 300...
|
| 5749 |
+
[2024-11-07 14:44:55,060][04584] Num frames 400...
|
| 5750 |
+
[2024-11-07 14:44:55,186][04584] Avg episode rewards: #0: 5.480, true rewards: #0: 4.480
|
| 5751 |
+
[2024-11-07 14:44:55,187][04584] Avg episode reward: 5.480, avg true_objective: 4.480
|
| 5752 |
+
[2024-11-07 14:44:55,276][04584] Num frames 500...
|
| 5753 |
+
[2024-11-07 14:44:55,445][04584] Num frames 600...
|
| 5754 |
+
[2024-11-07 14:44:55,620][04584] Num frames 700...
|
| 5755 |
+
[2024-11-07 14:44:55,759][04584] Num frames 800...
|
| 5756 |
+
[2024-11-07 14:44:55,862][04584] Avg episode rewards: #0: 4.660, true rewards: #0: 4.160
|
| 5757 |
+
[2024-11-07 14:44:55,863][04584] Avg episode reward: 4.660, avg true_objective: 4.160
|
| 5758 |
+
[2024-11-07 14:44:55,968][04584] Num frames 900...
|
| 5759 |
+
[2024-11-07 14:44:56,121][04584] Num frames 1000...
|
| 5760 |
+
[2024-11-07 14:44:56,271][04584] Num frames 1100...
|
| 5761 |
+
[2024-11-07 14:44:56,427][04584] Num frames 1200...
|
| 5762 |
+
[2024-11-07 14:44:56,511][04584] Avg episode rewards: #0: 4.387, true rewards: #0: 4.053
|
| 5763 |
+
[2024-11-07 14:44:56,512][04584] Avg episode reward: 4.387, avg true_objective: 4.053
|
| 5764 |
+
[2024-11-07 14:44:56,656][04584] Num frames 1300...
|
| 5765 |
+
[2024-11-07 14:44:56,815][04584] Num frames 1400...
|
| 5766 |
+
[2024-11-07 14:44:56,966][04584] Num frames 1500...
|
| 5767 |
+
[2024-11-07 14:44:57,111][04584] Num frames 1600...
|
| 5768 |
+
[2024-11-07 14:44:57,215][04584] Avg episode rewards: #0: 4.580, true rewards: #0: 4.080
|
| 5769 |
+
[2024-11-07 14:44:57,215][04584] Avg episode reward: 4.580, avg true_objective: 4.080
|
| 5770 |
+
[2024-11-07 14:44:57,321][04584] Num frames 1700...
|
| 5771 |
+
[2024-11-07 14:44:57,471][04584] Num frames 1800...
|
| 5772 |
+
[2024-11-07 14:44:57,627][04584] Num frames 1900...
|
| 5773 |
+
[2024-11-07 14:44:57,776][04584] Num frames 2000...
|
| 5774 |
+
[2024-11-07 14:44:57,856][04584] Avg episode rewards: #0: 4.432, true rewards: #0: 4.032
|
| 5775 |
+
[2024-11-07 14:44:57,857][04584] Avg episode reward: 4.432, avg true_objective: 4.032
|
| 5776 |
+
[2024-11-07 14:44:57,981][04584] Num frames 2100...
|
| 5777 |
+
[2024-11-07 14:44:58,132][04584] Num frames 2200...
|
| 5778 |
+
[2024-11-07 14:44:58,301][04584] Num frames 2300...
|
| 5779 |
+
[2024-11-07 14:44:58,455][04584] Num frames 2400...
|
| 5780 |
+
[2024-11-07 14:44:58,615][04584] Avg episode rewards: #0: 4.607, true rewards: #0: 4.107
|
| 5781 |
+
[2024-11-07 14:44:58,616][04584] Avg episode reward: 4.607, avg true_objective: 4.107
|
| 5782 |
+
[2024-11-07 14:44:58,681][04584] Num frames 2500...
|
| 5783 |
+
[2024-11-07 14:44:58,837][04584] Num frames 2600...
|
| 5784 |
+
[2024-11-07 14:44:58,996][04584] Num frames 2700...
|
| 5785 |
+
[2024-11-07 14:44:59,149][04584] Num frames 2800...
|
| 5786 |
+
[2024-11-07 14:44:59,330][04584] Avg episode rewards: #0: 4.686, true rewards: #0: 4.114
|
| 5787 |
+
[2024-11-07 14:44:59,331][04584] Avg episode reward: 4.686, avg true_objective: 4.114
|
| 5788 |
+
[2024-11-07 14:44:59,363][04584] Num frames 2900...
|
| 5789 |
+
[2024-11-07 14:44:59,538][04584] Num frames 3000...
|
| 5790 |
+
[2024-11-07 14:44:59,703][04584] Num frames 3100...
|
| 5791 |
+
[2024-11-07 14:44:59,853][04584] Num frames 3200...
|
| 5792 |
+
[2024-11-07 14:45:00,003][04584] Avg episode rewards: #0: 4.580, true rewards: #0: 4.080
|
| 5793 |
+
[2024-11-07 14:45:00,005][04584] Avg episode reward: 4.580, avg true_objective: 4.080
|
| 5794 |
+
[2024-11-07 14:45:00,066][04584] Num frames 3300...
|
| 5795 |
+
[2024-11-07 14:45:00,231][04584] Num frames 3400...
|
| 5796 |
+
[2024-11-07 14:45:00,389][04584] Num frames 3500...
|
| 5797 |
+
[2024-11-07 14:45:00,565][04584] Num frames 3600...
|
| 5798 |
+
[2024-11-07 14:45:00,743][04584] Num frames 3700...
|
| 5799 |
+
[2024-11-07 14:45:00,972][04584] Avg episode rewards: #0: 4.862, true rewards: #0: 4.196
|
| 5800 |
+
[2024-11-07 14:45:00,974][04584] Avg episode reward: 4.862, avg true_objective: 4.196
|
| 5801 |
+
[2024-11-07 14:45:01,026][04584] Num frames 3800...
|
| 5802 |
+
[2024-11-07 14:45:01,224][04584] Num frames 3900...
|
| 5803 |
+
[2024-11-07 14:45:01,414][04584] Num frames 4000...
|
| 5804 |
+
[2024-11-07 14:45:01,597][04584] Num frames 4100...
|
| 5805 |
+
[2024-11-07 14:45:01,777][04584] Avg episode rewards: #0: 4.760, true rewards: #0: 4.160
|
| 5806 |
+
[2024-11-07 14:45:01,778][04584] Avg episode reward: 4.760, avg true_objective: 4.160
|
| 5807 |
+
[2024-11-07 14:45:10,932][04584] Replay video saved to /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/replay.mp4!
|