| { | |
| "base_model": "Qwen/Qwen2.5-1.5B-Instruct", | |
| "training_method": "GRPO with Unsloth", | |
| "dataset_size": 1602, | |
| "training_steps": 360, | |
| "final_reward": 3.17, | |
| "improvement": "+67%", | |
| "key_achievements": [ | |
| "Foreign language bias detection", | |
| "Structured reasoning output", | |
| "67% reward improvement", | |
| "46% reduction in output variance" | |
| ] | |
| } |