| data: |
| action_dim: 20 |
| action_fields: |
| - robot__action__poses__left::panda__xyz_relative |
| - robot__action__poses__right::panda__xyz_relative |
| - robot__action__poses__left::panda__rot_6d_relative |
| - robot__action__poses__right::panda__rot_6d_relative |
| - robot__action__grippers__left::panda_hand |
| - robot__action__grippers__right::panda_hand |
| allow_multiple_epochs: true |
| augmentation: |
| enabled: true |
| image: |
| color_jitter: |
| brightness: 0.2 |
| contrast: 0.4 |
| enabled: true |
| hue: |
| - -0.05 |
| - 0.05 |
| saturation: 0.2 |
| crop: |
| enabled: true |
| mode: random |
| shape: |
| - 224 |
| - 224 |
| point_cloud: |
| color_jitter: |
| brightness: 0.2 |
| contrast: 0.4 |
| enabled: false |
| hue: |
| - -0.05 |
| - 0.05 |
| saturation: 0.2 |
| camera_names: |
| - scene_right_0 |
| - scene_left_0 |
| - wrist_left_plus |
| - wrist_right_minus |
| dataset_manifest: |
| - null |
| dataset_modality: |
| - robotics |
| dataset_statistics: |
| - null |
| dataset_weighting: |
| - 1.0 |
| extrinsics_fields: |
| - extrinsics.scene_right_0 |
| - extrinsics.scene_left_0 |
| - extrinsics.wrist_left_minus |
| - extrinsics.wrist_left_plus |
| - extrinsics.wrist_right_minus |
| - extrinsics.wrist_right_plus |
| image_indices: |
| - -1 |
| - 0 |
| image_names: |
| - scene_right_0_t-1 |
| - scene_left_0_t-1 |
| - wrist_left_plus_t-1 |
| - wrist_right_minus_t-1 |
| - scene_right_0_t0 |
| - scene_left_0_t0 |
| - wrist_left_plus_t0 |
| - wrist_right_minus_t0 |
| image_size: 224 |
| img_num_tokens: 256 |
| intrinsics_fields: |
| - intrinsics.scene_right_0 |
| - intrinsics.scene_left_0 |
| - intrinsics.wrist_left_minus |
| - intrinsics.wrist_left_plus |
| - intrinsics.wrist_right_minus |
| - intrinsics.wrist_right_plus |
| language_instruction_types: |
| - original |
| - randomized |
| - verbose |
| - alternative |
| lowdim_future_timesteps: 8 |
| lowdim_past_timesteps: 1 |
| mask_padded_images: true |
| max_text_seq_len: null |
| normalization: |
| centered_norm: true |
| enabled: true |
| epsilon: 0.01 |
| field_configs: |
| robot__action__grippers__left::panda_hand: |
| enabled: true |
| epsilon: 0.01 |
| method: percentile_1_99 |
| scope: per_timestep |
| robot__action__grippers__right::panda_hand: |
| enabled: true |
| epsilon: 0.01 |
| method: percentile_1_99 |
| scope: per_timestep |
| robot__action__poses__left::panda__rot_6d_relative: |
| enabled: true |
| epsilon: 0.01 |
| method: percentile_1_99 |
| scope: per_timestep |
| robot__action__poses__left::panda__xyz_relative: |
| enabled: true |
| epsilon: 0.01 |
| method: percentile_1_99 |
| scope: per_timestep |
| robot__action__poses__right::panda__rot_6d_relative: |
| enabled: true |
| epsilon: 0.01 |
| method: percentile_1_99 |
| scope: per_timestep |
| robot__action__poses__right::panda__xyz_relative: |
| enabled: true |
| epsilon: 0.01 |
| method: percentile_1_99 |
| scope: per_timestep |
| include_fields: |
| - robot__action__poses__left::panda__xyz_relative |
| - robot__action__poses__right::panda__xyz_relative |
| - robot__action__poses__left::panda__rot_6d_relative |
| - robot__action__poses__right::panda__rot_6d_relative |
| - robot__action__grippers__left::panda_hand |
| - robot__action__grippers__right::panda_hand |
| lowdim_future_timesteps: 19 |
| lowdim_past_timesteps: 5 |
| method: percentile_1_99 |
| scope: per_timestep |
| num_workers: 24 |
| pad_missing_images: true |
| point_cloud_num_points: 4096 |
| pose_groups: |
| - name: left_panda_action |
| position_key: robot__action__poses__left::panda__xyz |
| rotation_key: robot__action__poses__left::panda__rot_6d |
| - name: right_panda_action |
| position_key: robot__action__poses__right::panda__xyz |
| rotation_key: robot__action__poses__right::panda__rot_6d |
| prefetch_factor: 2 |
| processor: Qwen/Qwen3-VL-2B-Thinking |
| processor_kwargs: |
| do_resize: false |
| proprioception_dim: 0 |
| proprioception_fields: [] |
| seed: 42 |
| seq_len: 2048 |
| shuffle: true |
| shuffle_buffer_size: 2000 |
| shuffle_initial: 500 |
| type: robotics |
| use_point_cloud: false |
| val_dataset_manifest: [] |
| val_dataset_statistics: [] |
| val_dataset_weighting: [] |
| db_logging: true |
| distributed: |
| ddp_static_graph: false |
| device: cuda:0 |
| dist_backend: nccl |
| dist_url: env:// |
| fsdp: true |
| fsdp_cpu_offload: false |
| fsdp_reshard_after_forward: false |
| local_rank: 0 |
| rank: 0 |
| use_distributed: true |
| world_size: 32 |
| ema: |
| alpha: 0.999 |
| enabled: false |
| inv_gamma: 1.0 |
| max_value: 0.9999 |
| min_value: 0.0 |
| power: 0.75 |
| type: ema |
| update_after_step: 0 |
| hparams: |
| beta1: 0.9 |
| beta2: 0.95 |
| decay: '0.3' |
| eps: 1.0e-08 |
| force_min_lr: 0.0 |
| global_batch_size: 1024 |
| grad_checkpointing: false |
| grad_clip_norm: 1.0 |
| loss_function: mse |
| lr: 5.0e-05 |
| lr_cooldown_end: 0.0 |
| lr_scheduler: cosine |
| optimizer: adamw |
| per_gpu_batch_size: 16 |
| precision: amp_bf16 |
| seed: 42 |
| torchcompile: false |
| warmup: '1000' |
| wd: 1.0e-08 |
| world_size: 32 |
| z_loss_coefficient: 0.0 |
| log_every_n_steps: 20 |
| log_level: INFO |
| max_checkpoint_limit: null |
| model: |
| action_dim: 20 |
| diffusion_step_conditioning: add |
| freeze: false |
| input_noise_std: 0.05 |
| noise_scheduler: |
| beta_end: 0.02 |
| beta_start: 0.0001 |
| clamp_range: |
| - -1.5 |
| - 1.5 |
| freeze: false |
| num_timesteps: 1000 |
| resume_from_checkpoint: null |
| resume_weights_only: false |
| type: noise_scheduler |
| num_action_head_repeats: 8 |
| proprioception_dim: 0 |
| resume_from_checkpoint: null |
| resume_weights_only: false |
| transformer: |
| attn_name: torch_attn |
| cast_output_to_float32: false |
| ffn_type: swiglu |
| freeze: false |
| hidden_dim: 1024 |
| is_causal: true |
| max_seq_len: 2048 |
| n_heads: 16 |
| n_layers: 24 |
| norm_eps: 1.0e-05 |
| norm_type: default_layer_norm |
| positional_embedding_type: rotary |
| post_embed_norm: false |
| qk_norm: false |
| resume_from_checkpoint: null |
| resume_weights_only: false |
| type: transformer |
| vocab_size: 50432 |
| weight_tying: false |
| type: diffusion_policy |
| use_diffusers_scheduler: false |
| use_flow_matching_scheduler: true |
| vision_language_backbone: |
| freeze: false |
| hf_pretrained: Qwen/Qwen3-VL-2B-Thinking |
| num_vlm_layers_to_use: 1 |
| resume_from_checkpoint: null |
| resume_weights_only: false |
| type: vlm_backbone |
| name: 2026_03_24-01_40_07-model_diffusion_policy-lr_5e-05-bsz_1024 |
| num_checkpoints: 10 |
| num_epochs: null |
| remote_sync: null |
| remote_sync_fixed_path: null |
| resolve_configs: false |
| resolve_configs_path: null |
| save_path: /tmp |
| total_train_samples: 100000000 |
| total_val_samples: null |
| val_every_n_checkpoints: 1 |
| wandb: true |
| wandb_entity: tri |
| wandb_project_name: vla_foundry |
| wandb_tags: [] |
|
|