Thishyaketh commited on
Commit
638bd23
·
verified ·
1 Parent(s): 7920d29

Update .eval_results/ngen4-official-benchmarks.yaml

Browse files
.eval_results/ngen4-official-benchmarks.yaml CHANGED
@@ -277,38 +277,4 @@
277
  date: '2026-04-05'
278
  source:
279
  url: https://tnsaai.com/models/ngen4
280
- name: TNSA NGen-4 Evaluations
281
-
282
- - dataset:
283
- id: TIGER-Lab/MMLU-Pro
284
- task_id: mmlu_pro
285
- value: 89.6
286
- date: '2026-04-06'
287
- source:
288
- url: https://tnsaai.com/models/ngen4
289
- name: TNSA NGen-4 Evaluations
290
- - dataset:
291
- id: Idavidrein/gpqa
292
- task_id: diamond
293
- value: 90.1
294
- date: '2026-04-06'
295
- source:
296
- url: https://tnsaai.com/models/ngen4
297
- name: TNSA NGen-4 Evaluations
298
- - dataset:
299
- id: SWE-bench/SWE-bench_Verified
300
- task_id: swe_bench_%_resolved
301
- value: 72.7
302
- date: '2026-04-06'
303
- source:
304
- url: https://tnsaai.com/models/ngen4
305
- name: TNSA NGen-4 Evaluations
306
- - dataset:
307
- id: harborframework/terminal-bench-2.0
308
- task_id: terminal_bench
309
- value: 42.5
310
- date: '2026-04-06'
311
- source:
312
- url: https://tnsaai.com/models/ngen4
313
- name: TNSA NGen-4 Evaluations
314
-
 
277
  date: '2026-04-05'
278
  source:
279
  url: https://tnsaai.com/models/ngen4
280
+ name: TNSA NGen-4 Evaluations