| { | |
| "model_type": "vjepa2_ac", | |
| "architecture": { | |
| "encoder": { | |
| "model_name": "vit_large", | |
| "embed_dim": 1024, | |
| "num_patches": 196, | |
| "patch_size": 16, | |
| "img_size": 256, | |
| "weights": "vision_encoder.pt" | |
| }, | |
| "predictor": { | |
| "action_dim": 6, | |
| "depth": 4, | |
| "heads": 8, | |
| "embed_dim": 384, | |
| "dropout": 0.1, | |
| "weights": "pytorch_model.bin" | |
| } | |
| }, | |
| "data_config": { | |
| "camera": "observation.images.phone", | |
| "clip_len": 8, | |
| "frame_step": 2, | |
| "context_frames": 6, | |
| "target_frames": 2 | |
| }, | |
| "training_params": { | |
| "epochs": 300, | |
| "batch_size": 2, | |
| "lr": 0.0001, | |
| "weight_decay": 0.05, | |
| "warmup_epochs": 20 | |
| }, | |
| "dataset": "SO-100 Ball-Cup Robotics Dataset" | |
| } |