| train_old.py |
| deepspeed /workspace/llavaguard/configs/zero3.json |
| model_name_or_path lmms-lab/llava-onevision-qwen2-0.5b-ov |
| version qwen_1_5 |
| data_path /common-repos/LlavaGuard/data/LlavaGuard-DS/v24/train_oversampled.json |
| data_path_eval /common-repos/LlavaGuard/data/LlavaGuard-DS/v24/eval.json |
| image_folder /common-repos |
| mm_tunable_parts=mm_language_model |
| mm_vision_tower_lr=2e-6 |
| vision_tower google/siglip-so400m-patch14-384 |
| mm_projector_type mlp2x_gelu |
| mm_vision_select_layer -2 |
| mm_use_im_start_end False |
| mm_use_im_patch_token False |
| group_by_modality_length True |
| image_aspect_ratio anyres_max_9 |
| image_grid_pinpoints (1x1),...,(6x6) |
| mm_patch_merge_type spatial_unpad |
| bf16 True |
| run_name LlavaGuard-v1.2-mini-ov-lmms-lab_llava-onevision-qwen2-0.5b-ov-LlavaGuard-DS-v24 |
| output_dir /common-repos/LlavaGuard/models/LlavaGuard-v1.2-mini-ov/v24 |
| num_train_epochs 3 |
| per_device_train_batch_size 1 |
| per_device_eval_batch_size 2 |
| gradient_accumulation_steps 25 |
| evaluation_strategy no |
| eval_steps 1 |
| save_strategy epoch |
| save_steps 1 |
| save_total_limit 1 |
| learning_rate 1e-5 |
| weight_decay 0. |
| warmup_ratio 0.03 |
| lr_scheduler_type cosine |
| logging_steps 1 |
| tf32 True |
| model_max_length 32768 |
| gradient_checkpointing True |
| dataloader_num_workers 4 |
| lazy_preprocess True |
| report_to wandb |
| torch_compile True |
| torch_compile_backend inductor |
| dataloader_drop_last True |
| Eval date: 22/11/2024 07:49:11 |