Upload 9 files

Browse files

Files changed (9) hide show

README.md +126 -0
config.json +36 -0
generation_config.json +6 -0
huggingface-metadata.txt +9 -0
model.safetensors +3 -0
quantization_config.json +0 -0
special_tokens_map.json +23 -0
tokenizer.json +0 -0
tokenizer_config.json +0 -0

README.md ADDED Viewed

	@@ -0,0 +1,126 @@

+---
+license: cc-by-nc-4.0
+library_name: transformers
+base_model:
+- Sao10K/MN-12B-Lyra-v1
+datasets:
+- jondurbin/gutenberg-dpo-v0.1
+model-index:
+- name: Lyra-Gutenberg-mistral-nemo-12B
+  results:
+  - task:
+      type: text-generation
+      name: Text Generation
+    dataset:
+      name: IFEval (0-Shot)
+      type: HuggingFaceH4/ifeval
+      args:
+        num_few_shot: 0
+    metrics:
+    - type: inst_level_strict_acc and prompt_level_strict_acc
+      value: 34.95
+      name: strict accuracy
+    source:
+      url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=nbeerbower/Lyra-Gutenberg-mistral-nemo-12B
+      name: Open LLM Leaderboard
+  - task:
+      type: text-generation
+      name: Text Generation
+    dataset:
+      name: BBH (3-Shot)
+      type: BBH
+      args:
+        num_few_shot: 3
+    metrics:
+    - type: acc_norm
+      value: 36.99
+      name: normalized accuracy
+    source:
+      url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=nbeerbower/Lyra-Gutenberg-mistral-nemo-12B
+      name: Open LLM Leaderboard
+  - task:
+      type: text-generation
+      name: Text Generation
+    dataset:
+      name: MATH Lvl 5 (4-Shot)
+      type: hendrycks/competition_math
+      args:
+        num_few_shot: 4
+    metrics:
+    - type: exact_match
+      value: 8.31
+      name: exact match
+    source:
+      url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=nbeerbower/Lyra-Gutenberg-mistral-nemo-12B
+      name: Open LLM Leaderboard
+  - task:
+      type: text-generation
+      name: Text Generation
+    dataset:
+      name: GPQA (0-shot)
+      type: Idavidrein/gpqa
+      args:
+        num_few_shot: 0
+    metrics:
+    - type: acc_norm
+      value: 11.19
+      name: acc_norm
+    source:
+      url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=nbeerbower/Lyra-Gutenberg-mistral-nemo-12B
+      name: Open LLM Leaderboard
+  - task:
+      type: text-generation
+      name: Text Generation
+    dataset:
+      name: MuSR (0-shot)
+      type: TAUR-Lab/MuSR
+      args:
+        num_few_shot: 0
+    metrics:
+    - type: acc_norm
+      value: 14.76
+      name: acc_norm
+    source:
+      url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=nbeerbower/Lyra-Gutenberg-mistral-nemo-12B
+      name: Open LLM Leaderboard
+  - task:
+      type: text-generation
+      name: Text Generation
+    dataset:
+      name: MMLU-PRO (5-shot)
+      type: TIGER-Lab/MMLU-Pro
+      config: main
+      split: test
+      args:
+        num_few_shot: 5
+    metrics:
+    - type: acc
+      value: 29.2
+      name: accuracy
+    source:
+      url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=nbeerbower/Lyra-Gutenberg-mistral-nemo-12B
+      name: Open LLM Leaderboard
+---
+# Lyra-Gutenberg-12B
+[Sao10K/MN-12B-Lyra-v1](https://huggingface.co/Sao10K/MN-12B-Lyra-v1) finetuned on [jondurbin/gutenberg-dpo-v0.1](https://huggingface.co/datasets/jondurbin/gutenberg-dpo-v0.1).
+### Method
+Finetuned using an A100 on Google Colab for 3 epochs.
+[Fine-tune Llama 3 with ORPO](https://mlabonne.github.io/blog/posts/2024-04-19_Fine_tune_Llama_3_with_ORPO.html)
+# [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard)
+Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_nbeerbower__Lyra-Gutenberg-mistral-nemo-12B)
+|      Metric       |Value|
+|-------------------|----:|
+|Avg.               |22.57|
+|IFEval (0-Shot)    |34.95|
+|BBH (3-Shot)       |36.99|
+|MATH Lvl 5 (4-Shot)| 8.31|
+|GPQA (0-shot)      |11.19|
+|MuSR (0-shot)      |14.76|
+|MMLU-PRO (5-shot)  |29.20|

config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+    "_name_or_path": "Sao10K/MN-12B-Lyra-v1",
+    "architectures": [
+        "MistralForCausalLM"
+    ],
+    "attention_dropout": 0.0,
+    "bos_token_id": 1,
+    "eos_token_id": 2,
+    "head_dim": 128,
+    "hidden_act": "silu",
+    "hidden_size": 5120,
+    "initializer_range": 0.02,
+    "intermediate_size": 14336,
+    "max_position_embeddings": 1024000,
+    "model_type": "mistral",
+    "num_attention_heads": 32,
+    "num_hidden_layers": 40,
+    "num_key_value_heads": 8,
+    "rms_norm_eps": 1e-05,
+    "rope_theta": 1000000.0,
+    "sliding_window": null,
+    "tie_word_embeddings": false,
+    "torch_dtype": "bfloat16",
+    "transformers_version": "4.44.2",
+    "use_cache": true,
+    "vocab_size": 131072,
+    "quantization_config": {
+        "quant_method": "exl3",
+        "version": "0.0.1",
+        "bits": 4.0,
+        "calibration": {
+            "rows": 100,
+            "cols": 2048
+        }
+    }
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "transformers_version": "4.44.2"
+}

huggingface-metadata.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+url: https://huggingface.co/nbeerbower/Lyra-Gutenberg-mistral-nemo-12B
+branch: main
+download date: 2025-04-10 17:23:31
+sha256sum:
+    1d68d6a81aa65d8d4f7e4fa5fa0e4a8b6872bfeded80a40a54573a4b1f2d4f9b model-00001-of-00005.safetensors
+    8adf98d6eb3294de4b926b0fea150326ddfb043cce264814cdaeeada82d7aefe model-00002-of-00005.safetensors
+    932b0c7b5babd9a33f08f65af46e77a0e1d663ae63cacd0e4638e86c083c0d0a model-00003-of-00005.safetensors
+    417ebae31dc99c6557c2a04e60fe79107efe30fd2a5c86f73280e606bbf86b38 model-00004-of-00005.safetensors
+    93832dac9af7ea0cae80423a630fc5452a6b05b3e5e41ef361cd5e8e6bde4dc7 model-00005-of-00005.safetensors

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:15e58ebf17034289b9212dbc304d5ea3dda3f828a0d8aea3123dd927bff41d15
+size 7306420360

quantization_config.json ADDED Viewed

The diff for this file is too large to render. See raw diff

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

The diff for this file is too large to render. See raw diff