{ "grpo": { "num_generations": 4, "temperature": 0.9, "max_completion_length": 512, "num_iterations": 1, "beta": 0.0, "epsilon": 0.2 }, "training": { "per_device_train_batch_size": 1, "gradient_accumulation_steps": 8, "learning_rate": 5e-06, "num_train_epochs": 3, "warmup_ratio": 0.1, "lr_scheduler_type": "cosine", "bf16": false, "logging_steps": 10, "save_steps": 500, "save_total_limit": 3, "fp16": true }, "model": { "name_or_path": "kshitijthakkar/loggenix-moe-0.4B-0.2A-sft-s3.1", "trust_remote_code": true, "dtype": "float16" }, "vllm_flags": [ "--enforce-eager", "--dtype", "float16" ], "lora": { "r": 16, "alpha": 32, "target_modules": [ "q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj" ], "dropout": 0.05, "task_type": "CAUSAL_LM", "bias": "none" } }