| model: | |
| name: EleutherAI/pythia-14m | |
| alias: pythia-14m | |
| revision: null | |
| subfolder: null | |
| precision: bf16 | |
| set_eos_to_pad: true | |
| dataset: | |
| name: gsm8k | |
| alias: gsm8k | |
| text_field: question | |
| max_length: 1024 | |
| trainer: | |
| group_by_length: false | |
| remove_unused_columns: true | |
| neftune_noise_alpha: null | |
| eval_accumulation_steps: 1 | |
| per_device_train_batch_size: 32 | |
| per_device_eval_batch_size: 20 | |
| gradient_accumulation_steps: 1 | |
| dataloader_num_workers: 8 | |
| dataloader_drop_last: false | |
| optim: adamw_torch_fused | |
| adafactor: false | |
| learning_rate: 0.0001 | |
| weight_decay: 0 | |
| adam_beta1: 0.9 | |
| adam_beta2: 0.999 | |
| adam_epsilon: 1.0e-08 | |
| max_grad_norm: 1.0 | |
| lr_scheduler_type: linear | |
| warmup_ratio: 0.0 | |
| warmup_steps: 0 | |
| num_train_epochs: 1 | |
| max_steps: -1 | |
| eval_steps: 100 | |
| output_dir: ./ | |
| logging_strategy: steps | |
| logging_first_step: true | |
| logging_steps: 1 | |
| log_level: info | |
| report_to: tensorboard | |
| logging_dir: tb_logs | |
| disable_tqdm: false | |
| push_to_hub: true | |
| save_strategy: epoch | |
| save_steps: 100 | |
| save_only_model: true | |
| seed: 42 | |
| data_seed: 42 | |
| full_determinism: true | |
| tf32: true | |
| lora: | |
| r: 64 | |
| lora_alpha: 16 | |
| bias: none | |
| task_type: CAUSAL_LM | |
| target_modules: null | |
| use_peft: true | |
| global_seed: 42 | |
| experiment_group: training | |
| run_name: pythia-14m_2024-01-17T00-07-52 | |