| { | |
| "grpo": { | |
| "num_generations": 4, | |
| "temperature": 0.9, | |
| "max_completion_length": 512, | |
| "num_iterations": 1, | |
| "beta": 0.0, | |
| "epsilon": 0.2 | |
| }, | |
| "training": { | |
| "per_device_train_batch_size": 1, | |
| "gradient_accumulation_steps": 8, | |
| "learning_rate": 5e-06, | |
| "num_train_epochs": 3, | |
| "warmup_ratio": 0.1, | |
| "lr_scheduler_type": "cosine", | |
| "bf16": false, | |
| "logging_steps": 10, | |
| "save_steps": 500, | |
| "save_total_limit": 3, | |
| "fp16": true | |
| }, | |
| "model": { | |
| "name_or_path": "kshitijthakkar/loggenix-moe-0.4B-0.2A-sft-s3.1", | |
| "trust_remote_code": true, | |
| "dtype": "float16" | |
| }, | |
| "vllm_flags": [ | |
| "--enforce-eager", | |
| "--dtype", | |
| "float16" | |
| ], | |
| "lora": { | |
| "r": 16, | |
| "alpha": 32, | |
| "target_modules": [ | |
| "q_proj", | |
| "k_proj", | |
| "v_proj", | |
| "o_proj", | |
| "gate_proj", | |
| "up_proj", | |
| "down_proj" | |
| ], | |
| "dropout": 0.05, | |
| "task_type": "CAUSAL_LM", | |
| "bias": "none" | |
| } | |
| } |