data:
  action_dim: 20
  action_fields:
  - robot__action__poses__left::panda__xyz_relative
  - robot__action__poses__right::panda__xyz_relative
  - robot__action__poses__left::panda__rot_6d_relative
  - robot__action__poses__right::panda__rot_6d_relative
  - robot__action__grippers__left::panda_hand
  - robot__action__grippers__right::panda_hand
  allow_multiple_epochs: true
  augmentation:
    enabled: true
    image:
      color_jitter:
        brightness: 0.2
        contrast: 0.4
        enabled: true
        hue:
        - -0.05
        - 0.05
        saturation: 0.2
      crop:
        enabled: true
        mode: random
        shape:
        - 224
        - 224
    point_cloud:
      color_jitter:
        brightness: 0.2
        contrast: 0.4
        enabled: false
        hue:
        - -0.05
        - 0.05
        saturation: 0.2
  camera_names:
  - scene_right_0
  - scene_left_0
  - wrist_left_plus
  - wrist_right_minus
  dataset_manifest:
  - null
  dataset_modality:
  - robotics
  dataset_statistics:
  - null
  dataset_weighting:
  - 1.0
  extrinsics_fields:
  - extrinsics.scene_right_0
  - extrinsics.scene_left_0
  - extrinsics.wrist_left_minus
  - extrinsics.wrist_left_plus
  - extrinsics.wrist_right_minus
  - extrinsics.wrist_right_plus
  image_indices:
  - -1
  - 0
  image_names:
  - scene_right_0_t-1
  - scene_left_0_t-1
  - wrist_left_plus_t-1
  - wrist_right_minus_t-1
  - scene_right_0_t0
  - scene_left_0_t0
  - wrist_left_plus_t0
  - wrist_right_minus_t0
  image_size: 224
  img_num_tokens: 256
  intrinsics_fields:
  - intrinsics.scene_right_0
  - intrinsics.scene_left_0
  - intrinsics.wrist_left_minus
  - intrinsics.wrist_left_plus
  - intrinsics.wrist_right_minus
  - intrinsics.wrist_right_plus
  language_instruction_types:
  - original
  - randomized
  - verbose
  - alternative
  lowdim_future_timesteps: 8
  lowdim_past_timesteps: 1
  mask_padded_images: true
  max_text_seq_len: null
  normalization:
    centered_norm: true
    enabled: true
    epsilon: 0.01
    field_configs:
      robot__action__grippers__left::panda_hand:
        enabled: true
        epsilon: 0.01
        method: percentile_1_99
        scope: per_timestep
      robot__action__grippers__right::panda_hand:
        enabled: true
        epsilon: 0.01
        method: percentile_1_99
        scope: per_timestep
      robot__action__poses__left::panda__rot_6d_relative:
        enabled: true
        epsilon: 0.01
        method: percentile_1_99
        scope: per_timestep
      robot__action__poses__left::panda__xyz_relative:
        enabled: true
        epsilon: 0.01
        method: percentile_1_99
        scope: per_timestep
      robot__action__poses__right::panda__rot_6d_relative:
        enabled: true
        epsilon: 0.01
        method: percentile_1_99
        scope: per_timestep
      robot__action__poses__right::panda__xyz_relative:
        enabled: true
        epsilon: 0.01
        method: percentile_1_99
        scope: per_timestep
    include_fields:
    - robot__action__poses__left::panda__xyz_relative
    - robot__action__poses__right::panda__xyz_relative
    - robot__action__poses__left::panda__rot_6d_relative
    - robot__action__poses__right::panda__rot_6d_relative
    - robot__action__grippers__left::panda_hand
    - robot__action__grippers__right::panda_hand
    lowdim_future_timesteps: 19
    lowdim_past_timesteps: 5
    method: percentile_1_99
    scope: per_timestep
  num_workers: 24
  pad_missing_images: true
  point_cloud_num_points: 4096
  pose_groups:
  - name: left_panda_action
    position_key: robot__action__poses__left::panda__xyz
    rotation_key: robot__action__poses__left::panda__rot_6d
  - name: right_panda_action
    position_key: robot__action__poses__right::panda__xyz
    rotation_key: robot__action__poses__right::panda__rot_6d
  prefetch_factor: 2
  processor: Qwen/Qwen3-VL-2B-Thinking
  processor_kwargs:
    do_resize: false
  proprioception_dim: 0
  proprioception_fields: []
  seed: 42
  seq_len: 2048
  shuffle: true
  shuffle_buffer_size: 2000
  shuffle_initial: 500
  type: robotics
  use_point_cloud: false
  val_dataset_manifest: []
  val_dataset_statistics: []
  val_dataset_weighting: []
db_logging: true
distributed:
  ddp_static_graph: false
  device: cuda:0
  dist_backend: nccl
  dist_url: env://
  fsdp: true
  fsdp_cpu_offload: false
  fsdp_reshard_after_forward: false
  local_rank: 0
  rank: 0
  use_distributed: true
  world_size: 32
ema:
  alpha: 0.999
  enabled: false
  inv_gamma: 1.0
  max_value: 0.9999
  min_value: 0.0
  power: 0.75
  type: ema
  update_after_step: 0
hparams:
  beta1: 0.9
  beta2: 0.95
  decay: '0.3'
  eps: 1.0e-08
  force_min_lr: 0.0
  global_batch_size: 1024
  grad_checkpointing: false
  grad_clip_norm: 1.0
  loss_function: mse
  lr: 5.0e-05
  lr_cooldown_end: 0.0
  lr_scheduler: cosine
  optimizer: adamw
  per_gpu_batch_size: 16
  precision: amp_bf16
  seed: 42
  torchcompile: false
  warmup: '1000'
  wd: 1.0e-08
  world_size: 32
  z_loss_coefficient: 0.0
log_every_n_steps: 20
log_level: INFO
max_checkpoint_limit: null
model:
  action_dim: 20
  diffusion_step_conditioning: add
  freeze: false
  input_noise_std: 0.05
  noise_scheduler:
    beta_end: 0.02
    beta_start: 0.0001
    clamp_range:
    - -1.5
    - 1.5
    freeze: false
    num_timesteps: 1000
    resume_from_checkpoint: null
    resume_weights_only: false
    type: noise_scheduler
  num_action_head_repeats: 8
  proprioception_dim: 0
  resume_from_checkpoint: null
  resume_weights_only: false
  transformer:
    attn_name: torch_attn
    cast_output_to_float32: false
    ffn_type: swiglu
    freeze: false
    hidden_dim: 1024
    is_causal: true
    max_seq_len: 2048
    n_heads: 16
    n_layers: 24
    norm_eps: 1.0e-05
    norm_type: default_layer_norm
    positional_embedding_type: rotary
    post_embed_norm: false
    qk_norm: false
    resume_from_checkpoint: null
    resume_weights_only: false
    type: transformer
    vocab_size: 50432
    weight_tying: false
  type: diffusion_policy
  use_diffusers_scheduler: false
  use_flow_matching_scheduler: true
  vision_language_backbone:
    freeze: false
    hf_pretrained: Qwen/Qwen3-VL-2B-Thinking
    num_vlm_layers_to_use: 1
    resume_from_checkpoint: null
    resume_weights_only: false
    type: vlm_backbone
name: 2026_03_24-01_40_07-model_diffusion_policy-lr_5e-05-bsz_1024
num_checkpoints: 10
num_epochs: null
remote_sync: null
remote_sync_fixed_path: null
resolve_configs: false
resolve_configs_path: null
save_path: /tmp
total_train_samples: 100000000
total_val_samples: null
val_every_n_checkpoints: 1
wandb: true
wandb_entity: tri
wandb_project_name: vla_foundry
wandb_tags: []