Thishyaketh commited on
Commit
7920d29
·
verified ·
1 Parent(s): b8f3b84

Update .eval_results/ngen4-official-benchmarks.yaml

Browse files
.eval_results/ngen4-official-benchmarks.yaml CHANGED
@@ -278,3 +278,37 @@
278
  source:
279
  url: https://tnsaai.com/models/ngen4
280
  name: TNSA NGen-4 Evaluations
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
278
  source:
279
  url: https://tnsaai.com/models/ngen4
280
  name: TNSA NGen-4 Evaluations
281
+
282
+ - dataset:
283
+ id: TIGER-Lab/MMLU-Pro
284
+ task_id: mmlu_pro
285
+ value: 89.6
286
+ date: '2026-04-06'
287
+ source:
288
+ url: https://tnsaai.com/models/ngen4
289
+ name: TNSA NGen-4 Evaluations
290
+ - dataset:
291
+ id: Idavidrein/gpqa
292
+ task_id: diamond
293
+ value: 90.1
294
+ date: '2026-04-06'
295
+ source:
296
+ url: https://tnsaai.com/models/ngen4
297
+ name: TNSA NGen-4 Evaluations
298
+ - dataset:
299
+ id: SWE-bench/SWE-bench_Verified
300
+ task_id: swe_bench_%_resolved
301
+ value: 72.7
302
+ date: '2026-04-06'
303
+ source:
304
+ url: https://tnsaai.com/models/ngen4
305
+ name: TNSA NGen-4 Evaluations
306
+ - dataset:
307
+ id: harborframework/terminal-bench-2.0
308
+ task_id: terminal_bench
309
+ value: 42.5
310
+ date: '2026-04-06'
311
+ source:
312
+ url: https://tnsaai.com/models/ngen4
313
+ name: TNSA NGen-4 Evaluations
314
+