{ "global_step": 125, "step": 1000, "best_eval_hit_rate": -1.0, "last_metrics": {}, "config": { "model_name": "mistralai/Ministral-3-8B-Instruct-2512-BF16", "learning_rate": 5e-06, "lora_r": 16, "grpo_beta": 0.0, "num_generations": 4, "max_steps": -1, "output_dir": "./outputs/ministral_duckhunt_grpo" } }