| { | |
| "global_step": 125, | |
| "step": 1000, | |
| "best_eval_hit_rate": -1.0, | |
| "last_metrics": {}, | |
| "config": { | |
| "model_name": "mistralai/Ministral-3-8B-Instruct-2512-BF16", | |
| "learning_rate": 5e-06, | |
| "lora_r": 16, | |
| "grpo_beta": 0.0, | |
| "num_generations": 4, | |
| "max_steps": -1, | |
| "output_dir": "./outputs/ministral_duckhunt_grpo" | |
| } | |
| } |