| defaults: |
| - _self_ |
| - /callbacks: [checkpoint_every_n_steps, checkpoint_monitor, learning_rate_monitor] |
| - /data: Korean_dataset |
| - /model: tiny-ar |
| - /strategy: ddp |
| - /noise: loglinear |
| - /lr_scheduler: constant_warmup |
|
|
| mode: sample_eval |
| diffusion: absorbing_state |
| backbone: ar |
| parameterization: ar |
| time_conditioning: False |
| T: 0 |
| subs_masking: False |
|
|
| seed: 1 |
|
|
| loader: |
| global_batch_size: 32 |
| eval_global_batch_size: ${.global_batch_size} |
| |
| batch_size: ${div_up:${.global_batch_size}, ${eval:${trainer.devices} * ${trainer.num_nodes}}} |
| eval_batch_size: 1 |
| |
| num_workers: ${eval:"len(__import__('os').sched_getaffinity(0))"} |
| pin_memory: True |
|
|
| sampling: |
| predictor: ddpm_cache |
| steps: 128 |
| noise_removal: True |
| |
| num_sample_batches: 1 |
| num_sample_log: 1 |
| semi_ar: False |
| stride_length: 1 |
| num_strides: 1 |
|
|
| training: |
| ema: 0.9999 |
| antithetic_sampling: True |
| importance_sampling: False |
| sampling_eps: 1e-3 |
| change_of_variables: False |
|
|
| eval: |
| checkpoint_path: /home/elicer/lhb01/mdlm/outputs/parkseongjun/psjkodata/2025.04.05/051927/checkpoints/best.ckpt |
| disable_ema: False |
| compute_generative_perplexity: True |
| perplexity_batch_size: 8 |
| compute_perplexity_on_sanity: False |
| gen_ppl_eval_model_name_or_path: gpt2-large |
| generate_samples: True |
|
|
| optim: |
| weight_decay: 0.01 |
| lr: 5e-5 |
| beta1: 0.9 |
| beta2: 0.999 |
| eps: 1e-8 |
|
|
| trainer: |
| _target_: lightning.Trainer |
| accelerator: cuda |
| num_nodes: 1 |
| devices: ${device_count:} |
| accumulate_grad_batches: ${div_up:${loader.global_batch_size}, ${eval:${trainer.devices} * ${loader.batch_size} * ${trainer.num_nodes}}} |
| gradient_clip_val: 1.0 |
| precision: 'bf16' |
| num_sanity_val_steps: 0 |
| max_steps: 50000 |
| log_every_n_steps: 10 |
| limit_train_batches: 1.0 |
| limit_val_batches: 1.0 |
| val_check_interval: 0.5 |
|
|
| wandb: |
| project: test-ar |
| mode: online |
| notes: Mulan for text |
| resume: must |
| group: null |
| job_type: null |
| name: ar |
| id: f12b7c5e-07c9-48ae-96fa-4798823b8492 |
| tags: |
| - ${noise.type} |
| - ${data.train} |
| - ${data.valid} |
|
|
| hydra: |
| run: |
| dir: ./outputs/${data.train}/${now:%Y.%m.%d}/${now:%H%M%S} |
| job: |
| chdir: true |
|
|
| checkpointing: |
| |
| save_dir: ${cwd:} |
| |
| resume_from_ckpt: true |
| resume_ckpt_path: /home/elicer/lhb01/mdlm/outputs/parkseongjun/psjkodata/2025.04.05/045928/checkpoints/last.ckpt |
|
|