Model save
Browse files
README.md
CHANGED
|
@@ -40,9 +40,9 @@ The following hyperparameters were used during training:
|
|
| 40 |
- seed: 42
|
| 41 |
- gradient_accumulation_steps: 8
|
| 42 |
- total_train_batch_size: 32
|
| 43 |
-
- optimizer: Use
|
| 44 |
- lr_scheduler_type: linear
|
| 45 |
-
- num_epochs: 0.
|
| 46 |
- mixed_precision_training: Native AMP
|
| 47 |
|
| 48 |
### Training results
|
|
@@ -52,7 +52,7 @@ The following hyperparameters were used during training:
|
|
| 52 |
### Framework versions
|
| 53 |
|
| 54 |
- PEFT 0.14.0
|
| 55 |
-
- Transformers 4.
|
| 56 |
- Pytorch 2.5.1+cu124
|
| 57 |
-
- Datasets 3.
|
| 58 |
- Tokenizers 0.21.0
|
|
|
|
| 40 |
- seed: 42
|
| 41 |
- gradient_accumulation_steps: 8
|
| 42 |
- total_train_batch_size: 32
|
| 43 |
+
- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
|
| 44 |
- lr_scheduler_type: linear
|
| 45 |
+
- num_epochs: 0.5
|
| 46 |
- mixed_precision_training: Native AMP
|
| 47 |
|
| 48 |
### Training results
|
|
|
|
| 52 |
### Framework versions
|
| 53 |
|
| 54 |
- PEFT 0.14.0
|
| 55 |
+
- Transformers 4.48.3
|
| 56 |
- Pytorch 2.5.1+cu124
|
| 57 |
+
- Datasets 3.3.2
|
| 58 |
- Tokenizers 0.21.0
|
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1442871112
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1b47b4af34457806b0caf9903e15c5b7a20affd88c14ad3335df0cf005c38f07
|
| 3 |
size 1442871112
|
final_model/adapter_config.json
CHANGED
|
@@ -23,13 +23,13 @@
|
|
| 23 |
"rank_pattern": {},
|
| 24 |
"revision": null,
|
| 25 |
"target_modules": [
|
| 26 |
-
"gate_proj",
|
| 27 |
-
"o_proj",
|
| 28 |
-
"k_proj",
|
| 29 |
"up_proj",
|
|
|
|
|
|
|
| 30 |
"q_proj",
|
| 31 |
-
"
|
| 32 |
-
"
|
|
|
|
| 33 |
],
|
| 34 |
"task_type": "CAUSAL_LM",
|
| 35 |
"use_dora": false,
|
|
|
|
| 23 |
"rank_pattern": {},
|
| 24 |
"revision": null,
|
| 25 |
"target_modules": [
|
|
|
|
|
|
|
|
|
|
| 26 |
"up_proj",
|
| 27 |
+
"v_proj",
|
| 28 |
+
"o_proj",
|
| 29 |
"q_proj",
|
| 30 |
+
"gate_proj",
|
| 31 |
+
"k_proj",
|
| 32 |
+
"down_proj"
|
| 33 |
],
|
| 34 |
"task_type": "CAUSAL_LM",
|
| 35 |
"use_dora": false,
|
final_model/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1442871112
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1b47b4af34457806b0caf9903e15c5b7a20affd88c14ad3335df0cf005c38f07
|
| 3 |
size 1442871112
|
final_model/tokenizer_config.json
CHANGED
|
@@ -2062,6 +2062,6 @@
|
|
| 2062 |
"model_max_length": 131072,
|
| 2063 |
"pad_token": "<|finetune_right_pad_id|>",
|
| 2064 |
"padding_side": "right",
|
| 2065 |
-
"tokenizer_class": "
|
| 2066 |
"unk_token": null
|
| 2067 |
}
|
|
|
|
| 2062 |
"model_max_length": 131072,
|
| 2063 |
"pad_token": "<|finetune_right_pad_id|>",
|
| 2064 |
"padding_side": "right",
|
| 2065 |
+
"tokenizer_class": "PreTrainedTokenizerFast",
|
| 2066 |
"unk_token": null
|
| 2067 |
}
|
final_model/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5432
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4000f14248fd06aa19a74a6004f4267910a92ee75d9adceb5098f518c1ef0fd7
|
| 3 |
size 5432
|
runs/Feb28_19-54-30_731f2cbee5ec/events.out.tfevents.1740772475.731f2cbee5ec.226.0
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:70d13628a05cea17d8b992a27311a6f00a9a2a7826c022baeb75c7b6ca6197e4
|
| 3 |
+
size 6231
|