Buckets:

rtrm's picture
download
raw
12.9 kB
import{s as Me,n as De,o as Ae}from"../chunks/scheduler.56725da7.js";import{S as Ee,i as Ie,e as l,s as r,c,h as Oe,a as i,d as n,b as o,f as A,g,j as f,k as E,l as t,m as p,n as d,t as b,o as h,p as N}from"../chunks/index.18a26576.js";import{C as He}from"../chunks/CopyLLMTxtMenu.3134fcef.js";import{D as J}from"../chunks/Docstring.69b6e7bf.js";import{H as me}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.868449a1.js";function qe(Te){let $,Q,W,X,C,Y,F,Z,x,ye='<a href="https://huggingface.co/docs/trl/en/index" rel="nofollow">TRL</a>-compatible trainers for AWS Trainium accelerators.',ee,S,te,w,ne,s,k,pe,I,Ce="Configuration class for Neuron-optimized SFT training.",ue,O,Fe=`Inherits from both NeuronTrainingArguments (for Trainium-specific settings) and
trl’s SFTConfig (for SFT-specific settings).`,_e,H,xe="Key Neuron-specific behavior:",ve,q,Se="<li>padding_free is always set to False to avoid recompilation on Trainium devices</li> <li>All other SFT parameters from trl 0.24.0+ are supported</li>",ae,z,re,m,L,ce,R,we="<code>SFTTrainer</code> adapted for Neuron (Trainium) devices.",ge,T,P,fe,V,ke="Compute training loss for Neuron-optimized training.",de,u,M,be,j,ze="Override SFTTrainer’s log method to use NeuronTrainer’s implementation.",he,U,Le="SFTTrainer has custom metrics tracking that we don’t use for Neuron training.",Ne,y,D,$e,B,Pe="Perform a training step for Neuron-optimized training.",oe,G,le;return C=new He({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),F=new me({props:{title:"Neuron TRL Trainers",local:"neuron-trl-trainers",headingTag:"h1"}}),S=new me({props:{title:"NeuronSFTTrainer",local:"neuronsfttrainer",headingTag:"h2"}}),w=new me({props:{title:"NeuronSFTConfig",local:"optimum.neuron.NeuronSFTConfig",headingTag:"h3"}}),k=new J({props:{name:"class optimum.neuron.NeuronSFTConfig",anchor:"optimum.neuron.NeuronSFTConfig",parameters:[{name:"output_dir",val:": str | None = None"},{name:"overwrite_output_dir",val:": bool = False"},{name:"do_train",val:": bool = False"},{name:"do_eval",val:": bool = False"},{name:"eval_strategy",val:": transformers.trainer_utils.IntervalStrategy | str = 'no'"},{name:"per_device_train_batch_size",val:": int = 1"},{name:"per_device_eval_batch_size",val:": int = 1"},{name:"gradient_accumulation_steps",val:": int = 1"},{name:"learning_rate",val:": float = 5e-05"},{name:"weight_decay",val:": float = 0.0"},{name:"adam_beta1",val:": float = 0.9"},{name:"adam_beta2",val:": float = 0.999"},{name:"adam_epsilon",val:": float = 1e-08"},{name:"max_grad_norm",val:": float = 1.0"},{name:"num_train_epochs",val:": float = 3.0"},{name:"max_steps",val:": int = -1"},{name:"lr_scheduler_type",val:": transformers.trainer_utils.SchedulerType | str = 'linear'"},{name:"lr_scheduler_kwargs",val:": dict[str, typing.Any] | str | None = <factory>"},{name:"warmup_ratio",val:": float = 0.0"},{name:"warmup_steps",val:": int = 0"},{name:"log_level",val:": str = 'info'"},{name:"log_level_replica",val:": str = 'silent'"},{name:"logging_dir",val:": str | None = None"},{name:"logging_strategy",val:": transformers.trainer_utils.IntervalStrategy | str = 'steps'"},{name:"logging_first_step",val:": bool = False"},{name:"logging_steps",val:": float = 500"},{name:"save_strategy",val:": transformers.trainer_utils.SaveStrategy | str = 'steps'"},{name:"save_steps",val:": float = 500"},{name:"save_total_limit",val:": int | None = None"},{name:"save_only_model",val:": bool = False"},{name:"restore_callback_states_from_checkpoint",val:": bool = False"},{name:"seed",val:": int = 42"},{name:"bf16",val:": bool = False"},{name:"dataloader_drop_last",val:": bool = False"},{name:"eval_steps",val:": float | None = None"},{name:"dataloader_num_workers",val:": int = 0"},{name:"dataloader_prefetch_factor",val:": int | None = None"},{name:"run_name",val:": str | None = None"},{name:"disable_tqdm",val:": bool | None = None"},{name:"remove_unused_columns",val:": bool | None = True"},{name:"label_names",val:": list[str] | None = None"},{name:"accelerator_config",val:": dict | str | None = None"},{name:"label_smoothing_factor",val:": float = 0.0"},{name:"optim",val:": transformers.training_args.OptimizerNames | str = 'adamw_torch'"},{name:"optim_args",val:": str | None = None"},{name:"report_to",val:": None | str | list[str] = None"},{name:"resume_from_checkpoint",val:": str | None = None"},{name:"gradient_checkpointing",val:": bool = False"},{name:"gradient_checkpointing_kwargs",val:": dict[str, typing.Any] | str | None = None"},{name:"use_liger_kernel",val:": bool | None = False"},{name:"average_tokens_across_devices",val:": bool | None = False"},{name:"dataloader_prefetch_size",val:": int = None"},{name:"skip_cache_push",val:": bool = False"},{name:"use_autocast",val:": bool = False"},{name:"zero_1",val:": bool = True"},{name:"stochastic_rounding_enabled",val:": bool = True"},{name:"optimizer_use_master_weights",val:": bool = True"},{name:"optimizer_use_fp32_grad_acc",val:": bool = True"},{name:"optimizer_save_master_weights_in_ckpt",val:": bool = False"},{name:"tensor_parallel_size",val:": int = 1"},{name:"disable_sequence_parallel",val:": bool = False"},{name:"pipeline_parallel_size",val:": int = 1"},{name:"pipeline_parallel_num_microbatches",val:": int = -1"},{name:"kv_size_multiplier",val:": int | None = None"},{name:"num_local_ranks_per_step",val:": int = 8"},{name:"use_xser",val:": bool = True"},{name:"async_save",val:": bool = False"},{name:"fuse_qkv",val:": bool = False"},{name:"recompute_causal_mask",val:": bool = True"},{name:"enable_throughput_metrics",val:": bool = True"},{name:"enable_mfu_metrics",val:": bool = True"},{name:"enable_efficiency_metrics",val:": bool = True"},{name:"metrics_window_size",val:": int = 50"}],source:"https://github.com/huggingface/optimum-neuron/blob/v0.4.3/optimum/neuron/trainers/sft_config.py#L34"}}),z=new me({props:{title:"NeuronSFTTrainer",local:"optimum.neuron.NeuronSFTTrainer",headingTag:"h3"}}),L=new J({props:{name:"class optimum.neuron.NeuronSFTTrainer",anchor:"optimum.neuron.NeuronSFTTrainer",parameters:[{name:"model",val:": transformers.modeling_utils.PreTrainedModel | torch.nn.modules.module.Module | str"},{name:"args",val:": optimum.neuron.trainers.sft_trainer.SFTConfig | None = None"},{name:"data_collator",val:": typing.Optional[typing.Callable[[list[typing.Any]], dict[str, typing.Any]]] = None"},{name:"train_dataset",val:": Dataset | IterableDataset | datasets.Dataset | None = None"},{name:"eval_dataset",val:": Dataset | dict[str, Dataset] | datasets.Dataset | None = None"},{name:"processing_class",val:": transformers.tokenization_utils_base.PreTrainedTokenizerBase | transformers.processing_utils.ProcessorMixin | None = None"},{name:"compute_loss_func",val:": typing.Optional[typing.Callable] = None"},{name:"compute_metrics",val:": typing.Optional[typing.Callable] = None"},{name:"callbacks",val:": list[transformers.trainer_callback.TrainerCallback] | None = None"},{name:"optimizers",val:": tuple[torch.optim.optimizer.Optimizer | None, torch.optim.lr_scheduler.LambdaLR | None] = (None, None)"},{name:"optimizer_cls_and_kwargs",val:": tuple[type[torch.optim.optimizer.Optimizer], dict[str, typing.Any]] | None = None"},{name:"preprocess_logits_for_metrics",val:": typing.Optional[typing.Callable] = None"},{name:"peft_config",val:": peft.config.PeftConfig | None = None"},{name:"formatting_func",val:": typing.Optional[typing.Callable] = None"}],source:"https://github.com/huggingface/optimum-neuron/blob/v0.4.3/optimum/neuron/trainers/sft_trainer.py#L132"}}),P=new J({props:{name:"compute_loss",anchor:"optimum.neuron.NeuronSFTTrainer.compute_loss",parameters:[{name:"model",val:""},{name:"inputs",val:""},{name:"return_outputs",val:" = False"},{name:"num_items_in_batch",val:" = None"}],source:"https://github.com/huggingface/optimum-neuron/blob/v0.4.3/optimum/neuron/trainers/sft_trainer.py#L403"}}),M=new J({props:{name:"log",anchor:"optimum.neuron.NeuronSFTTrainer.log",parameters:[{name:"logs",val:": dict[str, float]"}],source:"https://github.com/huggingface/optimum-neuron/blob/v0.4.3/optimum/neuron/trainers/sft_trainer.py#L387"}}),D=new J({props:{name:"training_step",anchor:"optimum.neuron.NeuronSFTTrainer.training_step",parameters:[{name:"model",val:": Module"},{name:"inputs",val:": dict[str, typing.Any]"},{name:"num_items_in_batch",val:": int | None = None"}],source:"https://github.com/huggingface/optimum-neuron/blob/v0.4.3/optimum/neuron/trainers/sft_trainer.py#L413"}}),{c(){$=l("meta"),Q=r(),W=l("p"),X=r(),c(C.$$.fragment),Y=r(),c(F.$$.fragment),Z=r(),x=l("p"),x.innerHTML=ye,ee=r(),c(S.$$.fragment),te=r(),c(w.$$.fragment),ne=r(),s=l("div"),c(k.$$.fragment),pe=r(),I=l("p"),I.textContent=Ce,ue=r(),O=l("p"),O.textContent=Fe,_e=r(),H=l("p"),H.textContent=xe,ve=r(),q=l("ul"),q.innerHTML=Se,ae=r(),c(z.$$.fragment),re=r(),m=l("div"),c(L.$$.fragment),ce=r(),R=l("p"),R.innerHTML=we,ge=r(),T=l("div"),c(P.$$.fragment),fe=r(),V=l("p"),V.textContent=ke,de=r(),u=l("div"),c(M.$$.fragment),be=r(),j=l("p"),j.textContent=ze,he=r(),U=l("p"),U.textContent=Le,Ne=r(),y=l("div"),c(D.$$.fragment),$e=r(),B=l("p"),B.textContent=Pe,oe=r(),G=l("p"),this.h()},l(e){const a=Oe("svelte-u9bgzb",document.head);$=i(a,"META",{name:!0,content:!0}),a.forEach(n),Q=o(e),W=i(e,"P",{}),A(W).forEach(n),X=o(e),g(C.$$.fragment,e),Y=o(e),g(F.$$.fragment,e),Z=o(e),x=i(e,"P",{"data-svelte-h":!0}),f(x)!=="svelte-15pg36"&&(x.innerHTML=ye),ee=o(e),g(S.$$.fragment,e),te=o(e),g(w.$$.fragment,e),ne=o(e),s=i(e,"DIV",{class:!0});var _=A(s);g(k.$$.fragment,_),pe=o(_),I=i(_,"P",{"data-svelte-h":!0}),f(I)!=="svelte-1kqlri5"&&(I.textContent=Ce),ue=o(_),O=i(_,"P",{"data-svelte-h":!0}),f(O)!=="svelte-12kieyk"&&(O.textContent=Fe),_e=o(_),H=i(_,"P",{"data-svelte-h":!0}),f(H)!=="svelte-l9y7i1"&&(H.textContent=xe),ve=o(_),q=i(_,"UL",{"data-svelte-h":!0}),f(q)!=="svelte-1b8e36h"&&(q.innerHTML=Se),_.forEach(n),ae=o(e),g(z.$$.fragment,e),re=o(e),m=i(e,"DIV",{class:!0});var v=A(m);g(L.$$.fragment,v),ce=o(v),R=i(v,"P",{"data-svelte-h":!0}),f(R)!=="svelte-1hij1rp"&&(R.innerHTML=we),ge=o(v),T=i(v,"DIV",{class:!0});var ie=A(T);g(P.$$.fragment,ie),fe=o(ie),V=i(ie,"P",{"data-svelte-h":!0}),f(V)!=="svelte-1jjbfym"&&(V.textContent=ke),ie.forEach(n),de=o(v),u=i(v,"DIV",{class:!0});var K=A(u);g(M.$$.fragment,K),be=o(K),j=i(K,"P",{"data-svelte-h":!0}),f(j)!=="svelte-bpxeyn"&&(j.textContent=ze),he=o(K),U=i(K,"P",{"data-svelte-h":!0}),f(U)!=="svelte-pbquzb"&&(U.textContent=Le),K.forEach(n),Ne=o(v),y=i(v,"DIV",{class:!0});var se=A(y);g(D.$$.fragment,se),$e=o(se),B=i(se,"P",{"data-svelte-h":!0}),f(B)!=="svelte-1kvi7o"&&(B.textContent=Pe),se.forEach(n),v.forEach(n),oe=o(e),G=i(e,"P",{}),A(G).forEach(n),this.h()},h(){E($,"name","hf:doc:metadata"),E($,"content",Re),E(s,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),E(T,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),E(u,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),E(y,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),E(m,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,a){t(document.head,$),p(e,Q,a),p(e,W,a),p(e,X,a),d(C,e,a),p(e,Y,a),d(F,e,a),p(e,Z,a),p(e,x,a),p(e,ee,a),d(S,e,a),p(e,te,a),d(w,e,a),p(e,ne,a),p(e,s,a),d(k,s,null),t(s,pe),t(s,I),t(s,ue),t(s,O),t(s,_e),t(s,H),t(s,ve),t(s,q),p(e,ae,a),d(z,e,a),p(e,re,a),p(e,m,a),d(L,m,null),t(m,ce),t(m,R),t(m,ge),t(m,T),d(P,T,null),t(T,fe),t(T,V),t(m,de),t(m,u),d(M,u,null),t(u,be),t(u,j),t(u,he),t(u,U),t(m,Ne),t(m,y),d(D,y,null),t(y,$e),t(y,B),p(e,oe,a),p(e,G,a),le=!0},p:De,i(e){le||(b(C.$$.fragment,e),b(F.$$.fragment,e),b(S.$$.fragment,e),b(w.$$.fragment,e),b(k.$$.fragment,e),b(z.$$.fragment,e),b(L.$$.fragment,e),b(P.$$.fragment,e),b(M.$$.fragment,e),b(D.$$.fragment,e),le=!0)},o(e){h(C.$$.fragment,e),h(F.$$.fragment,e),h(S.$$.fragment,e),h(w.$$.fragment,e),h(k.$$.fragment,e),h(z.$$.fragment,e),h(L.$$.fragment,e),h(P.$$.fragment,e),h(M.$$.fragment,e),h(D.$$.fragment,e),le=!1},d(e){e&&(n(Q),n(W),n(X),n(Y),n(Z),n(x),n(ee),n(te),n(ne),n(s),n(ae),n(re),n(m),n(oe),n(G)),n($),N(C,e),N(F,e),N(S,e),N(w,e),N(k),N(z,e),N(L),N(P),N(M),N(D)}}}const Re='{"title":"Neuron TRL Trainers","local":"neuron-trl-trainers","sections":[{"title":"NeuronSFTTrainer","local":"neuronsfttrainer","sections":[{"title":"NeuronSFTConfig","local":"optimum.neuron.NeuronSFTConfig","sections":[],"depth":3},{"title":"NeuronSFTTrainer","local":"optimum.neuron.NeuronSFTTrainer","sections":[],"depth":3}],"depth":2}],"depth":1}';function Ve(Te){return Ae(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Ge extends Ee{constructor($){super(),Ie(this,$,Ve,qe,Me,{})}}export{Ge as component};

Xet Storage Details

Size:
12.9 kB
·
Xet hash:
0da03807ff33575112940d82fcdb5eda06c1bd97d2c1d166556510084b066fb2

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.