StarVLA
/

Qwen-GR00T-Bridge

Model card Files Files and versions

Qwen-GR00T-Bridge / config.json

Jinhuiye's picture

Update config.json

48b1db4 verified 7 months ago

history blame contribute delete

3.89 kB

	{
	"run_id": "1008_qwenLfm_briage",
	"run_root_dir": "./results/Checkpoints",
	"seed": 42,
	"trackers": [
	"jsonl",
	"wandb"
	],
	"wandb_entity": "jinhuiye",
	"wandb_project": "InternM1",
	"is_debug": false,
	"framework": {
	"name": "QwenGR00T",
	"qwenvl": {
	"base_vlm": "./playground/Pretrained_models/Qwen2.5-VL-3B-Instruct",
	"attn_implementation": "flash_attention_2",
	"vl_hidden_dim": 2048
	},
	"dino": {
	"dino_backbone": "dinov2_vits14"
	},
	"action_model": {
	"action_model_type": "DiT-L",
	"hidden_size": 1024,
	"add_pos_embed": true,
	"max_seq_len": 1024,
	"action_dim": 7,
	"state_dim": 7,
	"future_action_window_size": 15,
	"action_horizon": 16,
	"past_action_window_size": 0,
	"repeated_diffusion_steps": 8,
	"noise_beta_alpha": 1.5,
	"noise_beta_beta": 1.0,
	"noise_s": 0.999,
	"num_timestep_buckets": 1000,
	"num_inference_timesteps": 4,
	"num_target_vision_tokens": 32,
	"diffusion_model_cfg": {
	"cross_attention_dim": 2048,
	"dropout": 0.2,
	"final_dropout": true,
	"interleave_self_attention": true,
	"norm_type": "ada_norm",
	"num_layers": 16,
	"output_dim": 1024,
	"positional_embeddings": null
	},
	"action_hidden_dim": 2048
	}
	},
	"datasets": {
	"vlm_data": {
	"dataset_py": "vlm_datasets",
	"dataformat": "llava_json",
	"dataset_use": "aokvqa_cauldron_llava_format,sharegpt4v_coco,sharegpt4v_knowledge,sharegpt4v_llava,sharegpt4v_sam,asv2_conversation_en,asv2_detailed_description_en,asv2_region_captioning_en,coco_internvl_longcap_en,coco_karpathy_train_567_en,coco_negative_gpt4o_en,coco_poetry_zh,coco_rem_en_zh,cocorem_exist_yorn_en,cocotextv2_en,cocotextv2_gpt4o_en,okvqa_en,refcoco_grounding_aug_en,refcoco_grounding_en,tallyqa_coco_en,toloka_grounding_aug_en,vqav2_en,vsr_en",
	"eval_dataset": "aokvqa_cauldron_llava_format",
	"data_flatten": false,
	"base_interval": 2,
	"max_pixels": 50176,
	"min_pixels": 784,
	"model_max_length": 2048,
	"model_type": "qwen2.5vl",
	"per_device_batch_size": 3
	},
	"vla_data": {
	"dataset_py": "lerobot_datasets",
	"data_root_dir": "playground/Datasets/OXE_LEROBOT",
	"data_mix": "bridge",
	"action_type": "delta_ee",
	"CoT_prompt": "Your task is {instruction}. To identify the key objects for your task. Locate their bounding boxes in [x1,y1,x2,y2] format.",
	"CoT_answer": "bbox",
	"default_image_resolution": [
	3,
	224,
	224
	],
	"per_device_batch_size": 16,
	"load_all_data_for_training": true,
	"obs": [
	"image_0"
	],
	"image_size": [
	224,
	224
	]
	}
	},
	"trainer": {
	"epochs": 100,
	"max_train_steps": 100000,
	"num_warmup_steps": 10000,
	"save_interval": 5000,
	"eval_interval": 1000,
	"learning_rate": {
	"base": 3e-05,
	"qwen_vl_interface": 1e-05,
	"action_model": 0.0001
	},
	"lr_scheduler_type": "cosine_with_min_lr",
	"scheduler_specific_kwargs": {
	"min_lr": 5e-07
	},
	"freeze_modules": true,
	"loss_scale": {
	"vla": 1.0,
	"vlm": 0.1
	},
	"repeated_diffusion_steps": 4,
	"max_grad_norm": 1.0,
	"warmup_ratio": 0.1,
	"weight_decay": 0.0,
	"logging_frequency": 10,
	"gradient_clipping": 1.0,
	"gradient_accumulation_steps": 1,
	"optimizer": {
	"name": "AdamW",
	"betas": [
	0.9,
	0.95
	],
	"eps": 1e-08,
	"weight_decay": 1e-08
	},
	"is_resume": false,
	"resume_epoch": null,
	"resume_step": null,
	"enable_gradient_checkpointing": true,
	"enable_mixed_precision_training": true
	},
	"is_resume": false,
	"output_dir": "./results/Checkpoints/1008_qwenLfm_briage"
	}