File size: 360 Bytes
5ec9df2
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
{
  "base_model": "Qwen/Qwen2.5-1.5B-Instruct",
  "training_method": "GRPO with Unsloth",
  "dataset_size": 1602,
  "training_steps": 360,
  "final_reward": 3.17,
  "improvement": "+67%",
  "key_achievements": [
    "Foreign language bias detection",
    "Structured reasoning output",
    "67% reward improvement",
    "46% reduction in output variance"
  ]
}