Buckets:

download
raw
38.1 kB
import{s as ge,n as Ne,o as We}from"../chunks/scheduler.85c25b89.js";import{S as Se,i as ve,g as M,s as n,r as o,A as He,h as i,f as t,c as a,j as Qe,u as U,x as p,k as Xe,y as ke,a as s,v as y,d as r,t as T,w as m}from"../chunks/index.c9bcf812.js";import{C as c}from"../chunks/CodeBlock.c004bd26.js";import{H as pl}from"../chunks/getInferenceSnippets.5ea0a804.js";function xe(ae){let u,yl,ol,rl,d,Tl,j,Me="This tutorial shows how to fine-tune the Qwen3 model on AWS Trainium accelerators using optimum-neuron.",ml,J,ie='<strong>This is based on the <a href="https://github.com/huggingface/optimum-neuron/tree/main/examples/training/qwen3" rel="nofollow">Qwen3 fine-tuning example script</a>.</strong>',cl,w,ul,h,pe="We’ll use a <code>trn1.32xlarge</code> instance with 16 Trainium Accelerators (32 Neuron Cores) and the Hugging Face Neuron Deep Learning AMI.",dl,b,oe="The Hugging Face AMI includes all required libraries pre-installed:",jl,f,Ue="<li><code>datasets</code>, <code>transformers</code>, <code>optimum-neuron</code></li> <li>Neuron SDK packages</li> <li>No additional environment setup needed</li>",Jl,Z,ye='To create your instance, follow the guide <a href="https://huggingface.co/docs/optimum-neuron/guides/setup_aws_instance" rel="nofollow">here</a>.',wl,I,hl,C,re='We’ll use the <a href="https://huggingface.co/datasets/tengomucho/simple_recipes" rel="nofollow">simple recipes dataset</a> to fine-tune our model for recipe generation.',bl,E,fl,B,Te="To load the dataset we use the <code>load_dataset()</code> method from the <code>datasets</code> library.",Zl,R,Il,V,me="To tune our model we need to convert our structured examples into a collection of quotes with a given context, so we define our tokenization function that we will be able to map on the dataset.",Cl,_,ce=`The dataset should be structured with input-output pairs, where each input is a prompt and the output is the expected response from the model.
We will make use of the model’s tokenizer chat template and preprocess the dataset to be fed to the trainer.`,El,A,Bl,F,ue='Note: these functions make references of <code>eos_token</code> and <code>tokenizer</code>, they are well-defined in the <a href="https://github.com/huggingface/optimum-neuron/blob/main/examples/training/qwen3/finetune_qwen3.py" rel="nofollow">Python script</a> to run this tutorial.',Rl,G,Vl,Q,de='For standard PyTorch fine-tuning, you’d typically use <a href="https://github.com/huggingface/peft" rel="nofollow">PEFT</a> with LoRA adapters and the <a href="https://huggingface.co/docs/trl/en/sft_trainer" rel="nofollow"><code>SFTTrainer</code></a>.',_l,X,je="On AWS Trainium, <code>optimum-neuron</code> provides <code>NeuronSFTTrainer</code> as a drop-in replacement.",Al,g,Je=`<strong>Distributed Training on Trainium:</strong>
Since Qwen3 doesn’t fit on a single accelerator, we use distributed training techniques:`,Fl,N,we="<li>Data Parallel (DDP)</li> <li>Tensor Parallelism</li> <li>Pipeline Parallelism</li>",Gl,W,he="Model loading and LoRA configuration work similarly to other accelerators.",Ql,S,be="Combining all the pieces together, and assuming the dataset has already been loaded, we can write the following code to fine-tune Qwen3 on AWS Trainium:",Xl,v,gl,H,fe='📝 <strong>Complete script available:</strong> All steps above are combined in a ready-to-use script <a href="https://github.com/huggingface/optimum-neuron/blob/main/examples/training/qwen3/finetune_qwen3.py" rel="nofollow">finetune_qwen3.py</a>.',Nl,k,Ze="To launch training, just run the following command in your AWS Trainium instance:",Wl,x,Sl,z,Ie='🔧 <strong>Single command execution:</strong> The complete bash training script <a href="https://github.com/huggingface/optimum-neuron/blob/main/examples/training/qwen3/finetune_qwen3.sh" rel="nofollow">finetune_qwen3.sh</a> is available:',vl,Y,Hl,$,kl,L,Ce="Optimum Neuron saves model shards separately during distributed training. These need to be consolidated before use.",xl,q,Ee="Use the Optimum CLI to consolidate:",zl,P,Yl,D,Be="This will create an <code>adapter_model.safetensors</code> file, the LoRA adapter weights that we trained in the previous step. We can now reload the model and merge it, so it can be loaded for evaluation:",$l,O,Ll,K,Re="Once this step is done, it is possible to test the model with a new prompt.",ql,ll,Ve="You have successfully created a fine-tuned model from Qwen3!",Pl,el,Dl,tl,_e="Share your fine-tuned model with the community by uploading it to the Hugging Face Hub.",Ol,sl,Ae="<strong>Step 1: Authentication</strong>",Kl,nl,le,al,Fe="<strong>Step 2: Upload your model</strong>",ee,Ml,te,il,Ge="🎉 <strong>Your fine-tuned Qwen3 model is now available on the Hub for others to use!</strong>",se,Ul,ne;return d=new pl({props:{title:"🚀 Fine-Tune Qwen3 on AWS Trainium",local:"-fine-tune-qwen3-on-aws-trainium",headingTag:"h1"}}),w=new pl({props:{title:"1. 🛠️ Setup AWS Environment",local:"1--setup-aws-environment",headingTag:"h2"}}),I=new pl({props:{title:"2. 📊 Load and Prepare the Dataset",local:"2--load-and-prepare-the-dataset",headingTag:"h2"}}),E=new c({props:{code:"JTdCJTBBJTIwJTIwJTIwJTIwJ3JlY2lwZXMnJTNBJTIwJTIyLSUyMFByZWhlYXQlMjBvdmVuJTIwdG8lMjAzNTAlMjBkZWdyZWVzJTVDbi0lMjBCdXR0ZXIlMjB0d28lMjA5eDUnJTIwbG9hZiUyMHBhbnMlNUNuLSUyMENyZWFtJTIwdGhlJTIwc3VnYXIlMjBhbmQlMjB0aGUlMjBidXR0ZXIlMjB1bnRpbCUyMGxpZ2h0JTIwYW5kJTIwd2hpcHBlZCU1Q24tJTIwQWRkJTIwdGhlJTIwYmFuYW5hcyUyQyUyMGVnZ3MlMkMlMjBsZW1vbiUyMGp1aWNlJTJDJTIwb3JhbmdlJTIwcmluZCU1Q24tJTIwQmVhdCUyMHVudGlsJTIwYmxlbmRlZCUyMHVuaWZvcm1seSU1Q24tJTIwQmUlMjBwYXRpZW50JTJDJTIwYW5kJTIwYmVhdCUyMHVudGlsJTIwdGhlJTIwYmFuYW5hJTIwbHVtcHMlMjBhcmUlMjBnb25lJTVDbi0lMjBTaWZ0JTIwdGhlJTIwZHJ5JTIwaW5ncmVkaWVudHMlMjB0b2dldGhlciU1Q24tJTIwRm9sZCUyMGxpZ2h0bHklMjBhbmQlMjB0aG9yb3VnaGx5JTIwaW50byUyMHRoZSUyMGJhbmFuYSUyMG1peHR1cmUlNUNuLSUyMFBvdXIlMjB0aGUlMjBiYXR0ZXIlMjBpbnRvJTIwcHJlcGFyZWQlMjBsb2FmJTIwcGFucyU1Q24tJTIwQmFrZSUyMGZvciUyMDQ1JTIwdG8lMjA1NSUyMG1pbnV0ZXMlMkMlMjB1bnRpbCUyMHRoZSUyMGxvYXZlcyUyMGFyZSUyMGZpcm0lMjBpbiUyMHRoZSUyMG1pZGRsZSUyMGFuZCUyMHRoZSUyMGVkZ2VzJTIwYmVnaW4lMjB0byUyMHB1bGwlMjBhd2F5JTIwZnJvbSUyMHRoZSUyMHBhbnMlNUNuLSUyMENvb2wlMjB0aGUlMjBsb2F2ZXMlMjBvbiUyMHJhY2tzJTIwZm9yJTIwMzAlMjBtaW51dGVzJTIwYmVmb3JlJTIwcmVtb3ZpbmclMjBmcm9tJTIwdGhlJTIwcGFucyU1Q24tJTIwRnJlZXplcyUyMHdlbGwlMjIlMkMlMEElMjAlMjAlMjAlMjAnbmFtZXMnJTNBJTIwJ0JlYXQlMjB0aGlzJTIwYmFuYW5hJTIwYnJlYWQnJTBBJTdE",highlighted:`{
<span class="hljs-string">&#x27;recipes&#x27;</span>: <span class="hljs-comment">&quot;- Preheat oven to 350 degrees\\n- Butter two 9x5&#x27; loaf pans\\n- Cream the sugar and the butter until light and whipped\\n- Add the bananas, eggs, lemon juice, orange rind\\n- Beat until blended uniformly\\n- Be patient, and beat until the banana lumps are gone\\n- Sift the dry ingredients together\\n- Fold lightly and thoroughly into the banana mixture\\n- Pour the batter into prepared loaf pans\\n- Bake for 45 to 55 minutes, until the loaves are firm in the middle and the edges begin to pull away from the pans\\n- Cool the loaves on racks for 30 minutes before removing from the pans\\n- Freezes well&quot;</span>,
<span class="hljs-string">&#x27;names&#x27;</span>: <span class="hljs-string">&#x27;Beat this banana bread&#x27;</span>
}`,wrap:!1}}),R=new c({props:{code:"ZnJvbSUyMHJhbmRvbSUyMGltcG9ydCUyMHJhbmRyYW5nZSUwQSUwQWZyb20lMjBkYXRhc2V0cyUyMGltcG9ydCUyMGxvYWRfZGF0YXNldCUwQSUwQSUwQSUyMyUyMExvYWQlMjBkYXRhc2V0JTIwZnJvbSUyMHRoZSUyMGh1YiUwQWRhdGFzZXRfaWQlMjAlM0QlMjAlMjJ0ZW5nb211Y2hvJTJGc2ltcGxlX3JlY2lwZXMlMjIlMEFyZWNpcGVzJTIwJTNEJTIwbG9hZF9kYXRhc2V0KGRhdGFzZXRfaWQlMkMlMjBzcGxpdCUzRCUyMnRyYWluJTIyKSUwQSUwQWRhdGFzZXRfc2l6ZSUyMCUzRCUyMGxlbihyZWNpcGVzKSUwQXByaW50KGYlMjJkYXRhc2V0JTIwc2l6ZSUzQSUyMCU3QmRhdGFzZXRfc2l6ZSU3RCUyMiklMEFwcmludChyZWNpcGVzJTVCcmFuZHJhbmdlKGRhdGFzZXRfc2l6ZSklNUQpJTBBJTIzJTIwZGF0YXNldCUyMHNpemUlM0ElMjAyMDAwMA==",highlighted:`<span class="hljs-keyword">from</span> random <span class="hljs-keyword">import</span> randrange
<span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset
<span class="hljs-comment"># Load dataset from the hub</span>
dataset_id = <span class="hljs-string">&quot;tengomucho/simple_recipes&quot;</span>
recipes = load_dataset(dataset_id, split=<span class="hljs-string">&quot;train&quot;</span>)
dataset_size = <span class="hljs-built_in">len</span>(recipes)
<span class="hljs-built_in">print</span>(<span class="hljs-string">f&quot;dataset size: <span class="hljs-subst">{dataset_size}</span>&quot;</span>)
<span class="hljs-built_in">print</span>(recipes[randrange(dataset_size)])
<span class="hljs-comment"># dataset size: 20000</span>`,wrap:!1}}),A=new c({props:{code:"JTIzJTIwUHJlcHJvY2Vzc2VzJTIwdGhlJTIwZGF0YXNldCUwQWRlZiUyMHByZXByb2Nlc3NfZGF0YXNldF93aXRoX2Vvcyhlb3NfdG9rZW4pJTNBJTBBJTIwJTIwJTIwJTIwZGVmJTIwcHJlcHJvY2Vzc19mdW5jdGlvbihleGFtcGxlcyklM0ElMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjByZWNpcGVzJTIwJTNEJTIwZXhhbXBsZXMlNUIlMjJyZWNpcGVzJTIyJTVEJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwbmFtZXMlMjAlM0QlMjBleGFtcGxlcyU1QiUyMm5hbWVzJTIyJTVEJTBBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwY2hhdHMlMjAlM0QlMjAlNUIlNUQlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBmb3IlMjByZWNpcGUlMkMlMjBuYW1lJTIwaW4lMjB6aXAocmVjaXBlcyUyQyUyMG5hbWVzKSUzQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMyUyMEFwcGVuZCUyMHRoZSUyMEVPUyUyMHRva2VuJTIwdG8lMjB0aGUlMjByZXNwb25zZSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHJlY2lwZSUyMCUyQiUzRCUyMGVvc190b2tlbiUwQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGNoYXQlMjAlM0QlMjAlNUIlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0IlMjJyb2xlJTIyJTNBJTIwJTIydXNlciUyMiUyQyUyMCUyMmNvbnRlbnQlMjIlM0ElMjBmJTIySG93JTIwY2FuJTIwSSUyMG1ha2UlMjAlN0JuYW1lJTdEJTNGJTIyJTdEJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdCJTIycm9sZSUyMiUzQSUyMCUyMmFzc2lzdGFudCUyMiUyQyUyMCUyMmNvbnRlbnQlMjIlM0ElMjByZWNpcGUlN0QlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlNUQlMEElMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBjaGF0cy5hcHBlbmQoY2hhdCklMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjByZXR1cm4lMjAlN0IlMjJtZXNzYWdlcyUyMiUzQSUyMGNoYXRzJTdEJTBBJTBBJTIwJTIwJTIwJTIwZGF0YXNldCUyMCUzRCUyMHJlY2lwZXMubWFwKHByZXByb2Nlc3NfZnVuY3Rpb24lMkMlMjBiYXRjaGVkJTNEVHJ1ZSUyQyUyMHJlbW92ZV9jb2x1bW5zJTNEcmVjaXBlcy5jb2x1bW5fbmFtZXMpJTBBJTIwJTIwJTIwJTIwcmV0dXJuJTIwZGF0YXNldCUwQSUwQSUyMyUyMFN0cnVjdHVyZXMlMjB0aGUlMjBkYXRhc2V0JTIwaW50byUyMHByb21wdC1leHBlY3RlZCUyMG91dHB1dCUyMHBhaXJzLiUwQWRlZiUyMGZvcm1hdHRpbmdfZnVuY3Rpb24oZXhhbXBsZXMpJTNBJTBBJTIwJTIwJTIwJTIwcmV0dXJuJTIwdG9rZW5pemVyLmFwcGx5X2NoYXRfdGVtcGxhdGUoZXhhbXBsZXMlNUIlMjJtZXNzYWdlcyUyMiU1RCUyQyUyMHRva2VuaXplJTNERmFsc2UlMkMlMjBhZGRfZ2VuZXJhdGlvbl9wcm9tcHQlM0RGYWxzZSk=",highlighted:`<span class="hljs-comment"># Preprocesses the dataset</span>
<span class="hljs-keyword">def</span> <span class="hljs-title function_">preprocess_dataset_with_eos</span>(<span class="hljs-params">eos_token</span>):
<span class="hljs-keyword">def</span> <span class="hljs-title function_">preprocess_function</span>(<span class="hljs-params">examples</span>):
recipes = examples[<span class="hljs-string">&quot;recipes&quot;</span>]
names = examples[<span class="hljs-string">&quot;names&quot;</span>]
chats = []
<span class="hljs-keyword">for</span> recipe, name <span class="hljs-keyword">in</span> <span class="hljs-built_in">zip</span>(recipes, names):
<span class="hljs-comment"># Append the EOS token to the response</span>
recipe += eos_token
chat = [
{<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">f&quot;How can I make <span class="hljs-subst">{name}</span>?&quot;</span>},
{<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;assistant&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: recipe},
]
chats.append(chat)
<span class="hljs-keyword">return</span> {<span class="hljs-string">&quot;messages&quot;</span>: chats}
dataset = recipes.<span class="hljs-built_in">map</span>(preprocess_function, batched=<span class="hljs-literal">True</span>, remove_columns=recipes.column_names)
<span class="hljs-keyword">return</span> dataset
<span class="hljs-comment"># Structures the dataset into prompt-expected output pairs.</span>
<span class="hljs-keyword">def</span> <span class="hljs-title function_">formatting_function</span>(<span class="hljs-params">examples</span>):
<span class="hljs-keyword">return</span> tokenizer.apply_chat_template(examples[<span class="hljs-string">&quot;messages&quot;</span>], tokenize=<span class="hljs-literal">False</span>, add_generation_prompt=<span class="hljs-literal">False</span>)`,wrap:!1}}),G=new pl({props:{title:"3. 🎯 Fine-tune Qwen3 with NeuronSFTTrainer and PEFT",local:"3--fine-tune-qwen3-with-neuronsfttrainer-and-peft",headingTag:"h2"}}),v=new c({props:{code:"bW9kZWxfaWQlMjAlM0QlMjAlMjJRd2VuJTJGUXdlbjMtOEIlMjIlMEElMEElMjMlMjBEZWZpbmUlMjB0aGUlMjB0cmFpbmluZyUyMGFyZ3VtZW50cyUwQW91dHB1dF9kaXIlMjAlM0QlMjAlMjJxd2VuMy1maW5ldHVuZWQtcmVjaXBlcyUyMiUwQXRyYWluaW5nX2FyZ3MlMjAlM0QlMjBOZXVyb25UcmFpbmluZ0FyZ3VtZW50cyglMEElMjAlMjAlMjAlMjBvdXRwdXRfZGlyJTNEb3V0cHV0X2RpciUyQyUwQSUyMCUyMCUyMCUyMG51bV90cmFpbl9lcG9jaHMlM0QzJTJDJTBBJTIwJTIwJTIwJTIwZG9fdHJhaW4lM0RUcnVlJTJDJTBBJTIwJTIwJTIwJTIwbWF4X3N0ZXBzJTNELTElMkMlMjAlMjAlMjMlMjAtMSUyMG1lYW5zJTIwdHJhaW4lMjB1bnRpbCUyMHRoZSUyMGVuZCUyMG9mJTIwdGhlJTIwZGF0YXNldCUwQSUyMCUyMCUyMCUyMHBlcl9kZXZpY2VfdHJhaW5fYmF0Y2hfc2l6ZSUzRDElMkMlMEElMjAlMjAlMjAlMjBncmFkaWVudF9hY2N1bXVsYXRpb25fc3RlcHMlM0Q4JTJDJTBBJTIwJTIwJTIwJTIwbGVhcm5pbmdfcmF0ZSUzRDVlLTQlMkMlMEElMjAlMjAlMjAlMjBiZjE2JTNEVHJ1ZSUyQyUyMCUyMCUwQSUyMCUyMCUyMCUyMHRlbnNvcl9wYXJhbGxlbF9zaXplJTNEOCUyQyUwQSUyMCUyMCUyMCUyMGxvZ2dpbmdfc3RlcHMlM0QyJTJDJTBBJTIwJTIwJTIwJTIwbHJfc2NoZWR1bGVyX3R5cGUlM0QlMjJjb3NpbmUlMjIlMkMlMEElMjAlMjAlMjAlMjBvdmVyd3JpdGVfb3V0cHV0X2RpciUzRFRydWUlMkMlMEEpJTBBJTBBJTIzJTIwTG9hZCUyMHRoZSUyMG1vZGVsJTIwd2l0aCUyMHRoZSUyME5ldXJvbk1vZGVsRm9yQ2F1c2FsTE0lMjBjbGFzcy4lMEElMjMlMjBJdCUyMHdpbGwlMjBsb2FkJTIwdGhlJTIwbW9kZWwlMjB3aXRoJTIwYSUyMGN1c3RvbSUyMG1vZGVsaW5nJTIwc3BlZmljaWNhbGx5JTIwZGVzaWduZWQlMjBmb3IlMjBBV1MlMjBUcmFpbml1bS4lMEF0cm5fY29uZmlnJTIwJTNEJTIwdHJhaW5pbmdfYXJncy50cm5fY29uZmlnJTBBZHR5cGUlMjAlM0QlMjB0b3JjaC5iZmxvYXQxNiUyMGlmJTIwdHJhaW5pbmdfYXJncy5iZjE2JTIwZWxzZSUyMHRvcmNoLmZsb2F0MzIlMEFtb2RlbCUyMCUzRCUyME5ldXJvbk1vZGVsRm9yQ2F1c2FsTE0uZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMG1vZGVsX2lkJTJDJTBBJTIwJTIwJTIwJTIwdHJuX2NvbmZpZyUyQyUwQSUyMCUyMCUyMCUyMHRvcmNoX2R0eXBlJTNEZHR5cGUlMkMlMEElMjAlMjAlMjAlMjAlMjMlMjBVc2UlMjBGbGFzaEF0dGVudGlvbjIlMjBmb3IlMjBiZXR0ZXIlMjBwZXJmb3JtYW5jZSUyMGFuZCUyMHRvJTIwYmUlMjBhYmxlJTIwdG8lMjB1c2UlMjBsYXJnZXIlMjBzZXF1ZW5jZSUyMGxlbmd0aHMuJTBBJTIwJTIwJTIwJTIwdXNlX2ZsYXNoX2F0dGVudGlvbl8yJTNEVHJ1ZSUyQyUwQSklMEElMEFsb3JhX2NvbmZpZyUyMCUzRCUyMExvcmFDb25maWcoJTBBJTIwJTIwJTIwJTIwciUzRDY0JTJDJTBBJTIwJTIwJTIwJTIwbG9yYV9hbHBoYSUzRDEyOCUyQyUwQSUyMCUyMCUyMCUyMGxvcmFfZHJvcG91dCUzRDAuMDUlMkMlMEElMjAlMjAlMjAlMjB0YXJnZXRfbW9kdWxlcyUzRCU1QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMmVtYmVkX3Rva2VucyUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMnFfcHJvaiUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMnZfcHJvaiUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMm9fcHJvaiUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMmtfcHJvaiUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMnVwX3Byb2olMjIlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJkb3duX3Byb2olMjIlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJnYXRlX3Byb2olMjIlMkMlMEElMjAlMjAlMjAlMjAlNUQlMkMlMEElMjAlMjAlMjAlMjBiaWFzJTNEJTIybm9uZSUyMiUyQyUwQSUyMCUyMCUyMCUyMHRhc2tfdHlwZSUzRCUyMkNBVVNBTF9MTSUyMiUyQyUwQSklMEElMEElMjMlMjBDb252ZXJ0aW5nJTIwdGhlJTIwTmV1cm9uVHJhaW5pbmdBcmd1bWVudHMlMjB0byUyMGElMjBkaWN0aW9uYXJ5JTIwdG8lMjBmZWVkJTIwdGhlbSUyMHRvJTIwdGhlJTIwTmV1cm9uU0ZUQ29uZmlnLiUwQWFyZ3MlMjAlM0QlMjB0cmFpbmluZ19hcmdzLnRvX2RpY3QoKSUwQSUwQXNmdF9jb25maWclMjAlM0QlMjBOZXVyb25TRlRDb25maWcoJTBBJTIwJTIwJTIwJTIwbWF4X3NlcV9sZW5ndGglM0Q0MDk2JTJDJTBBJTIwJTIwJTIwJTIwcGFja2luZyUzRFRydWUlMkMlMEElMjAlMjAlMjAlMjAqKmFyZ3MlMkMlMEEpJTBBJTBBdG9rZW5pemVyJTIwJTNEJTIwQXV0b1Rva2VuaXplci5mcm9tX3ByZXRyYWluZWQobW9kZWxfaWQpJTBBZGF0YXNldCUyMCUzRCUyMHByZXByb2Nlc3NfZGF0YXNldF93aXRoX2Vvcyh0b2tlbml6ZXIuZW9zX3Rva2VuKSUwQSUwQSUyMGRlZiUyMGZvcm1hdHRpbmdfZnVuY3Rpb24oZXhhbXBsZXMpJTNBJTBBJTIwJTIwJTIwJTIwJTIwcmV0dXJuJTIwdG9rZW5pemVyLmFwcGx5X2NoYXRfdGVtcGxhdGUoZXhhbXBsZXMlNUIlMjJtZXNzYWdlcyUyMiU1RCUyQyUyMHRva2VuaXplJTNERmFsc2UlMkMlMjBhZGRfZ2VuZXJhdGlvbl9wcm9tcHQlM0RGYWxzZSklMEElMEElMjAlMjMlMjBUaGUlMjBOZXVyb25TRlRUcmFpbmVyJTIwd2lsbCUyMHVzZSUyMCU2MGZvcm1hdHRpbmdfZnVuY3Rpb24lNjAlMjB0byUyMGZvcm1hdCUyMHRoZSUyMGRhdGFzZXQlMjBhbmQlMjAlNjBsb3JhX2NvbmZpZyU2MCUyMHRvJTIwYXBwbHklMjBMb1JBJTIwb24lMjB0aGUlMEElMjAlMjMlMjBtb2RlbC4lMEElMjB0cmFpbmVyJTIwJTNEJTIwTmV1cm9uU0ZUVHJhaW5lciglMEElMjAlMjAlMjAlMjAlMjBhcmdzJTNEc2Z0X2NvbmZpZyUyQyUwQSUyMCUyMCUyMCUyMCUyMG1vZGVsJTNEbW9kZWwlMkMlMEElMjAlMjAlMjAlMjAlMjBwZWZ0X2NvbmZpZyUzRGxvcmFfY29uZmlnJTJDJTBBJTIwJTIwJTIwJTIwJTIwdG9rZW5pemVyJTNEdG9rZW5pemVyJTJDJTBBJTIwJTIwJTIwJTIwJTIwdHJhaW5fZGF0YXNldCUzRGRhdGFzZXQlMkMlMEElMjAlMjAlMjAlMjAlMjBmb3JtYXR0aW5nX2Z1bmMlM0Rmb3JtYXR0aW5nX2Z1bmN0aW9uJTJDJTBBJTIwKSUwQSUyMHRyYWluZXIudHJhaW4oKQ==",highlighted:`model_id = <span class="hljs-string">&quot;Qwen/Qwen3-8B&quot;</span>
<span class="hljs-comment"># Define the training arguments</span>
output_dir = <span class="hljs-string">&quot;qwen3-finetuned-recipes&quot;</span>
training_args = NeuronTrainingArguments(
output_dir=output_dir,
num_train_epochs=<span class="hljs-number">3</span>,
do_train=<span class="hljs-literal">True</span>,
max_steps=-<span class="hljs-number">1</span>, <span class="hljs-comment"># -1 means train until the end of the dataset</span>
per_device_train_batch_size=<span class="hljs-number">1</span>,
gradient_accumulation_steps=<span class="hljs-number">8</span>,
learning_rate=<span class="hljs-number">5e-4</span>,
bf16=<span class="hljs-literal">True</span>,
tensor_parallel_size=<span class="hljs-number">8</span>,
logging_steps=<span class="hljs-number">2</span>,
lr_scheduler_type=<span class="hljs-string">&quot;cosine&quot;</span>,
overwrite_output_dir=<span class="hljs-literal">True</span>,
)
<span class="hljs-comment"># Load the model with the NeuronModelForCausalLM class.</span>
<span class="hljs-comment"># It will load the model with a custom modeling speficically designed for AWS Trainium.</span>
trn_config = training_args.trn_config
dtype = torch.bfloat16 <span class="hljs-keyword">if</span> training_args.bf16 <span class="hljs-keyword">else</span> torch.float32
model = NeuronModelForCausalLM.from_pretrained(
model_id,
trn_config,
torch_dtype=dtype,
<span class="hljs-comment"># Use FlashAttention2 for better performance and to be able to use larger sequence lengths.</span>
use_flash_attention_2=<span class="hljs-literal">True</span>,
)
lora_config = LoraConfig(
r=<span class="hljs-number">64</span>,
lora_alpha=<span class="hljs-number">128</span>,
lora_dropout=<span class="hljs-number">0.05</span>,
target_modules=[
<span class="hljs-string">&quot;embed_tokens&quot;</span>,
<span class="hljs-string">&quot;q_proj&quot;</span>,
<span class="hljs-string">&quot;v_proj&quot;</span>,
<span class="hljs-string">&quot;o_proj&quot;</span>,
<span class="hljs-string">&quot;k_proj&quot;</span>,
<span class="hljs-string">&quot;up_proj&quot;</span>,
<span class="hljs-string">&quot;down_proj&quot;</span>,
<span class="hljs-string">&quot;gate_proj&quot;</span>,
],
bias=<span class="hljs-string">&quot;none&quot;</span>,
task_type=<span class="hljs-string">&quot;CAUSAL_LM&quot;</span>,
)
<span class="hljs-comment"># Converting the NeuronTrainingArguments to a dictionary to feed them to the NeuronSFTConfig.</span>
args = training_args.to_dict()
sft_config = NeuronSFTConfig(
max_seq_length=<span class="hljs-number">4096</span>,
packing=<span class="hljs-literal">True</span>,
**args,
)
tokenizer = AutoTokenizer.from_pretrained(model_id)
dataset = preprocess_dataset_with_eos(tokenizer.eos_token)
<span class="hljs-keyword">def</span> <span class="hljs-title function_">formatting_function</span>(<span class="hljs-params">examples</span>):
<span class="hljs-keyword">return</span> tokenizer.apply_chat_template(examples[<span class="hljs-string">&quot;messages&quot;</span>], tokenize=<span class="hljs-literal">False</span>, add_generation_prompt=<span class="hljs-literal">False</span>)
<span class="hljs-comment"># The NeuronSFTTrainer will use \`formatting_function\` to format the dataset and \`lora_config\` to apply LoRA on the</span>
<span class="hljs-comment"># model.</span>
trainer = NeuronSFTTrainer(
args=sft_config,
model=model,
peft_config=lora_config,
tokenizer=tokenizer,
train_dataset=dataset,
formatting_func=formatting_function,
)
trainer.train()`,wrap:!1}}),x=new c({props:{code:"JTIzJTIwRmxhZ3MlMjBmb3IlMjBOZXVyb24lMjBjb21waWxhdGlvbiUwQWV4cG9ydCUyME5FVVJPTl9DQ19GTEFHUyUzRCUyMi0tbW9kZWwtdHlwZSUyMHRyYW5zZm9ybWVyJTIwLS1yZXRyeV9mYWlsZWRfY29tcGlsYXRpb24lMjIlMEFleHBvcnQlMjBORVVST05fRlVTRV9TT0ZUTUFYJTNEMSUwQWV4cG9ydCUyME5FVVJPTl9SVF9BU1lOQ19FWEVDX01BWF9JTkZMSUdIVF9SRVFVRVNUUyUzRDMlMjAlMjMlMjBBc3luYyUyMFJ1bnRpbWUlMEFleHBvcnQlMjBNQUxMT0NfQVJFTkFfTUFYJTNENjQlMjAlMjMlMjBIb3N0JTIwT09NJTIwbWl0aWdhdGlvbiUwQSUwQSUyMyUyMFZhcmlhYmxlcyUyMGZvciUyMHRyYWluaW5nJTBBUFJPQ0VTU0VTX1BFUl9OT0RFJTNEMzIlMEFOVU1fRVBPQ0hTJTNEMyUwQVRQX0RFR1JFRSUzRDglMEFCUyUzRDElMEFHUkFESUVOVF9BQ0NVTVVMQVRJT05fU1RFUFMlM0Q4JTBBTE9HR0lOR19TVEVQUyUzRDIlMEFNT0RFTF9OQU1FJTNEJTIyUXdlbiUyRlF3ZW4zLThCJTIyJTIwJTIzJTIwQ2hhbmdlJTIwdGhpcyUyMHRvJTIwdGhlJTIwZGVzaXJlZCUyMG1vZGVsJTIwbmFtZSUwQU9VVFBVVF9ESVIlM0QlMjIlMjQoZWNobyUyMCUyNE1PREVMX05BTUUlMjAlN0MlMjBjdXQlMjAtZCclMkYnJTIwLWYyKS1maW5ldHVuZWQlMjIlMEFESVNUUklCVVRFRF9BUkdTJTNEJTIyLS1ucHJvY19wZXJfbm9kZSUyMCUyNFBST0NFU1NFU19QRVJfTk9ERSUyMiUwQVNDUklQVF9ESVIlM0QlMjQoJTIwY2QlMjAtLSUyMCUyMiUyNCglMjBkaXJuYW1lJTIwLS0lMjAlMjIlMjQlN0JCQVNIX1NPVVJDRSU1QjAlNUQlN0QlMjIlMjApJTIyJTIwJTI2JTNFJTIwJTJGZGV2JTJGbnVsbCUyMCUyNiUyNiUyMHB3ZCUyMCklMEElMEFpZiUyMCU1QiUyMCUyMiUyNE5FVVJPTl9FWFRSQUNUX0dSQVBIU19PTkxZJTIyJTIwJTNEJTIwJTIyMSUyMiUyMCU1RCUzQiUyMHRoZW4lMEElMjAlMjAlMjAlMjBNQVhfU1RFUFMlM0Q1JTBBZWxzZSUwQSUyMCUyMCUyMCUyME1BWF9TVEVQUyUzRC0xJTBBZmklMEElMEF0b3JjaHJ1biUyMC0tbnByb2NfcGVyX25vZGUlMjAlMjRQUk9DRVNTRVNfUEVSX05PREUlMjBmaW5ldHVuZV9xd2VuMy5weSUyMCU1QyUwQSUyMCUyMC0tbW9kZWxfaWQlMjAlMjRNT0RFTF9OQU1FJTIwJTVDJTBBJTIwJTIwLS1udW1fdHJhaW5fZXBvY2hzJTIwJTI0TlVNX0VQT0NIUyUyMCU1QyUwQSUyMCUyMC0tZG9fdHJhaW4lMjAlNUMlMEElMjAlMjAtLW1heF9zdGVwcyUyMCUyNE1BWF9TVEVQUyUyMCU1QyUwQSUyMCUyMC0tcGVyX2RldmljZV90cmFpbl9iYXRjaF9zaXplJTIwJTI0QlMlMjAlNUMlMEElMjAlMjAtLWdyYWRpZW50X2FjY3VtdWxhdGlvbl9zdGVwcyUyMCUyNEdSQURJRU5UX0FDQ1VNVUxBVElPTl9TVEVQUyUyMCU1QyUwQSUyMCUyMC0tbGVhcm5pbmdfcmF0ZSUyMDhlLTQlMjAlNUMlMEElMjAlMjAtLWJmMTYlMjAlNUMlMEElMjAlMjAtLXRlbnNvcl9wYXJhbGxlbF9zaXplJTIwJTI0VFBfREVHUkVFJTIwJTVDJTBBJTIwJTIwLS16ZXJvXzElMjAlNUMlMEElMjAlMjAtLWFzeW5jX3NhdmUlMjAlNUMlMEElMjAlMjAtLWxvZ2dpbmdfc3RlcHMlMjAlMjRMT0dHSU5HX1NURVBTJTIwJTVDJTBBJTIwJTIwLS1vdXRwdXRfZGlyJTIwJTI0T1VUUFVUX0RJUiUyMCU1QyUwQSUyMCUyMC0tbHJfc2NoZWR1bGVyX3R5cGUlMjAlMjJjb3NpbmUlMjIlMjAlNUMlMEElMjAlMjAtLW92ZXJ3cml0ZV9vdXRwdXRfZGly",highlighted:`<span class="hljs-comment"># Flags for Neuron compilation</span>
<span class="hljs-built_in">export</span> NEURON_CC_FLAGS=<span class="hljs-string">&quot;--model-type transformer --retry_failed_compilation&quot;</span>
<span class="hljs-built_in">export</span> NEURON_FUSE_SOFTMAX=1
<span class="hljs-built_in">export</span> NEURON_RT_ASYNC_EXEC_MAX_INFLIGHT_REQUESTS=3 <span class="hljs-comment"># Async Runtime</span>
<span class="hljs-built_in">export</span> MALLOC_ARENA_MAX=64 <span class="hljs-comment"># Host OOM mitigation</span>
<span class="hljs-comment"># Variables for training</span>
PROCESSES_PER_NODE=32
NUM_EPOCHS=3
TP_DEGREE=8
BS=1
GRADIENT_ACCUMULATION_STEPS=8
LOGGING_STEPS=2
MODEL_NAME=<span class="hljs-string">&quot;Qwen/Qwen3-8B&quot;</span> <span class="hljs-comment"># Change this to the desired model name</span>
OUTPUT_DIR=<span class="hljs-string">&quot;<span class="hljs-subst">$(echo $MODEL_NAME | cut -d&#x27;/&#x27; -f2)</span>-finetuned&quot;</span>
DISTRIBUTED_ARGS=<span class="hljs-string">&quot;--nproc_per_node <span class="hljs-variable">$PROCESSES_PER_NODE</span>&quot;</span>
SCRIPT_DIR=$( <span class="hljs-built_in">cd</span> -- <span class="hljs-string">&quot;<span class="hljs-subst">$( dirname -- <span class="hljs-string">&quot;<span class="hljs-variable">\${BASH_SOURCE[0]}</span>&quot;</span> )</span>&quot;</span> &amp;&gt; /dev/null &amp;&amp; <span class="hljs-built_in">pwd</span> )
<span class="hljs-keyword">if</span> [ <span class="hljs-string">&quot;<span class="hljs-variable">$NEURON_EXTRACT_GRAPHS_ONLY</span>&quot;</span> = <span class="hljs-string">&quot;1&quot;</span> ]; <span class="hljs-keyword">then</span>
MAX_STEPS=5
<span class="hljs-keyword">else</span>
MAX_STEPS=-1
<span class="hljs-keyword">fi</span>
torchrun --nproc_per_node <span class="hljs-variable">$PROCESSES_PER_NODE</span> finetune_qwen3.py \\
--model_id <span class="hljs-variable">$MODEL_NAME</span> \\
--num_train_epochs <span class="hljs-variable">$NUM_EPOCHS</span> \\
--do_train \\
--max_steps <span class="hljs-variable">$MAX_STEPS</span> \\
--per_device_train_batch_size <span class="hljs-variable">$BS</span> \\
--gradient_accumulation_steps <span class="hljs-variable">$GRADIENT_ACCUMULATION_STEPS</span> \\
--learning_rate 8e-4 \\
--bf16 \\
--tensor_parallel_size <span class="hljs-variable">$TP_DEGREE</span> \\
--zero_1 \\
--async_save \\
--logging_steps <span class="hljs-variable">$LOGGING_STEPS</span> \\
--output_dir <span class="hljs-variable">$OUTPUT_DIR</span> \\
--lr_scheduler_type <span class="hljs-string">&quot;cosine&quot;</span> \\
--overwrite_output_dir`,wrap:!1}}),Y=new c({props:{code:"LiUyRmZpbmV0dW5lX3F3ZW4zLnNo",highlighted:"./finetune_qwen3.sh",wrap:!1}}),$=new pl({props:{title:"4. 🔄 Consolidate and Test the Fine-Tuned Model",local:"4--consolidate-and-test-the-fine-tuned-model",headingTag:"h2"}}),P=new c({props:{code:"b3B0aW11bS1jbGklMjBuZXVyb24lMjBjb25zb2xpZGF0ZSUyMFF3ZW4zLThCLWZpbmV0dW5lZCUyMFF3ZW4zLThCLWZpbmV0dW5lZCUyRmFkYXB0ZXJfZGVmYXVsdA==",highlighted:"optimum-cli neuron consolidate Qwen3-8B-finetuned Qwen3-8B-finetuned/adapter_default",wrap:!1}}),O=new c({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Nb2RlbEZvckNhdXNhbExNJTJDJTIwQXV0b1Rva2VuaXplciUwQWZyb20lMjBwZWZ0JTIwaW1wb3J0JTIwUGVmdE1vZGVsJTJDJTIwUGVmdENvbmZpZyUwQSUwQSUwQU1PREVMX05BTUUlMjAlM0QlMjAlMjJRd2VuJTJGUXdlbjMtOEIlMjIlMEFBREFQVEVSX1BBVEglMjAlM0QlMjAlMjJRd2VuMy04Qi1maW5ldHVuZWQlMkZhZGFwdGVyX2RlZmF1bHQlMjIlMEFNRVJHRURfTU9ERUxfUEFUSCUyMCUzRCUyMCUyMlF3ZW4zLThCLXJlY2lwZXMlMjIlMEElMEElMjMlMjBMb2FkJTIwYmFzZSUyMG1vZGVsJTBBbW9kZWwlMjAlM0QlMjBBdXRvTW9kZWxGb3JDYXVzYWxMTS5mcm9tX3ByZXRyYWluZWQoTU9ERUxfTkFNRSklMEF0b2tlbml6ZXIlMjAlM0QlMjBBdXRvVG9rZW5pemVyLmZyb21fcHJldHJhaW5lZChNT0RFTF9OQU1FKSUwQSUwQSUyMyUyMExvYWQlMjBhZGFwdGVyJTIwY29uZmlndXJhdGlvbiUyMGFuZCUyMG1vZGVsJTBBYWRhcHRlcl9jb25maWclMjAlM0QlMjBQZWZ0Q29uZmlnLmZyb21fcHJldHJhaW5lZChBREFQVEVSX1BBVEgpJTBBZmluZXR1bmVkX21vZGVsJTIwJTNEJTIwUGVmdE1vZGVsLmZyb21fcHJldHJhaW5lZChtb2RlbCUyQyUyMEFEQVBURVJfUEFUSCUyQyUyMGNvbmZpZyUzRGFkYXB0ZXJfY29uZmlnKSUwQSUwQXByaW50KCUyMlNhdmluZyUyMHRva2VuaXplciUyMiklMEF0b2tlbml6ZXIuc2F2ZV9wcmV0cmFpbmVkKE1FUkdFRF9NT0RFTF9QQVRIKSUwQXByaW50KCUyMlNhdmluZyUyMG1vZGVsJTIyKSUwQWZpbmV0dW5lZF9tb2RlbCUyMCUzRCUyMGZpbmV0dW5lZF9tb2RlbC5tZXJnZV9hbmRfdW5sb2FkKCklMEFmaW5ldHVuZWRfbW9kZWwuc2F2ZV9wcmV0cmFpbmVkKE1FUkdFRF9NT0RFTF9QQVRIKQ==",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM, AutoTokenizer
<span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> PeftModel, PeftConfig
MODEL_NAME = <span class="hljs-string">&quot;Qwen/Qwen3-8B&quot;</span>
ADAPTER_PATH = <span class="hljs-string">&quot;Qwen3-8B-finetuned/adapter_default&quot;</span>
MERGED_MODEL_PATH = <span class="hljs-string">&quot;Qwen3-8B-recipes&quot;</span>
<span class="hljs-comment"># Load base model</span>
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
<span class="hljs-comment"># Load adapter configuration and model</span>
adapter_config = PeftConfig.from_pretrained(ADAPTER_PATH)
finetuned_model = PeftModel.from_pretrained(model, ADAPTER_PATH, config=adapter_config)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;Saving tokenizer&quot;</span>)
tokenizer.save_pretrained(MERGED_MODEL_PATH)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;Saving model&quot;</span>)
finetuned_model = finetuned_model.merge_and_unload()
finetuned_model.save_pretrained(MERGED_MODEL_PATH)`,wrap:!1}}),el=new pl({props:{title:"5. 🤗 Push to Hugging Face Hub",local:"5--push-to-hugging-face-hub",headingTag:"h2"}}),nl=new c({props:{code:"aHVnZ2luZ2ZhY2UtY2xpJTIwbG9naW4=",highlighted:"huggingface-cli login",wrap:!1}}),Ml=new c({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Nb2RlbEZvckNhdXNhbExNJTJDJTIwQXV0b1Rva2VuaXplciUwQSUwQU1FUkdFRF9NT0RFTF9QQVRIJTIwJTNEJTIwJTIyUXdlbjMtOEItcmVjaXBlcyUyMiUwQUhVQl9NT0RFTF9OQU1FJTIwJTNEJTIwJTIyeW91ci11c2VybmFtZSUyRnF3ZW4zLThiLXJlY2lwZXMlMjIlMEElMEElMjMlMjBMb2FkJTIwYW5kJTIwcHVzaCUyMHRva2VuaXplciUwQXRva2VuaXplciUyMCUzRCUyMEF1dG9Ub2tlbml6ZXIuZnJvbV9wcmV0cmFpbmVkKE1FUkdFRF9NT0RFTF9QQVRIKSUwQXRva2VuaXplci5wdXNoX3RvX2h1YihIVUJfTU9ERUxfTkFNRSklMEElMEElMjMlMjBMb2FkJTIwYW5kJTIwcHVzaCUyMG1vZGVsJTBBbW9kZWwlMjAlM0QlMjBBdXRvTW9kZWxGb3JDYXVzYWxMTS5mcm9tX3ByZXRyYWluZWQoTUVSR0VEX01PREVMX1BBVEgpJTBBbW9kZWwucHVzaF90b19odWIoSFVCX01PREVMX05BTUUp",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM, AutoTokenizer
MERGED_MODEL_PATH = <span class="hljs-string">&quot;Qwen3-8B-recipes&quot;</span>
HUB_MODEL_NAME = <span class="hljs-string">&quot;your-username/qwen3-8b-recipes&quot;</span>
<span class="hljs-comment"># Load and push tokenizer</span>
tokenizer = AutoTokenizer.from_pretrained(MERGED_MODEL_PATH)
tokenizer.push_to_hub(HUB_MODEL_NAME)
<span class="hljs-comment"># Load and push model</span>
model = AutoModelForCausalLM.from_pretrained(MERGED_MODEL_PATH)
model.push_to_hub(HUB_MODEL_NAME)`,wrap:!1}}),{c(){u=M("meta"),yl=n(),ol=M("p"),rl=n(),o(d.$$.fragment),Tl=n(),j=M("p"),j.textContent=Me,ml=n(),J=M("p"),J.innerHTML=ie,cl=n(),o(w.$$.fragment),ul=n(),h=M("p"),h.innerHTML=pe,dl=n(),b=M("p"),b.textContent=oe,jl=n(),f=M("ul"),f.innerHTML=Ue,Jl=n(),Z=M("p"),Z.innerHTML=ye,wl=n(),o(I.$$.fragment),hl=n(),C=M("p"),C.innerHTML=re,bl=n(),o(E.$$.fragment),fl=n(),B=M("p"),B.innerHTML=Te,Zl=n(),o(R.$$.fragment),Il=n(),V=M("p"),V.textContent=me,Cl=n(),_=M("p"),_.textContent=ce,El=n(),o(A.$$.fragment),Bl=n(),F=M("p"),F.innerHTML=ue,Rl=n(),o(G.$$.fragment),Vl=n(),Q=M("p"),Q.innerHTML=de,_l=n(),X=M("p"),X.innerHTML=je,Al=n(),g=M("p"),g.innerHTML=Je,Fl=n(),N=M("ul"),N.innerHTML=we,Gl=n(),W=M("p"),W.textContent=he,Ql=n(),S=M("p"),S.textContent=be,Xl=n(),o(v.$$.fragment),gl=n(),H=M("p"),H.innerHTML=fe,Nl=n(),k=M("p"),k.textContent=Ze,Wl=n(),o(x.$$.fragment),Sl=n(),z=M("p"),z.innerHTML=Ie,vl=n(),o(Y.$$.fragment),Hl=n(),o($.$$.fragment),kl=n(),L=M("p"),L.textContent=Ce,xl=n(),q=M("p"),q.textContent=Ee,zl=n(),o(P.$$.fragment),Yl=n(),D=M("p"),D.innerHTML=Be,$l=n(),o(O.$$.fragment),Ll=n(),K=M("p"),K.textContent=Re,ql=n(),ll=M("p"),ll.textContent=Ve,Pl=n(),o(el.$$.fragment),Dl=n(),tl=M("p"),tl.textContent=_e,Ol=n(),sl=M("p"),sl.innerHTML=Ae,Kl=n(),o(nl.$$.fragment),le=n(),al=M("p"),al.innerHTML=Fe,ee=n(),o(Ml.$$.fragment),te=n(),il=M("p"),il.innerHTML=Ge,se=n(),Ul=M("p"),this.h()},l(l){const e=He("svelte-u9bgzb",document.head);u=i(e,"META",{name:!0,content:!0}),e.forEach(t),yl=a(l),ol=i(l,"P",{}),Qe(ol).forEach(t),rl=a(l),U(d.$$.fragment,l),Tl=a(l),j=i(l,"P",{"data-svelte-h":!0}),p(j)!=="svelte-61i5q2"&&(j.textContent=Me),ml=a(l),J=i(l,"P",{"data-svelte-h":!0}),p(J)!=="svelte-1vtxnz9"&&(J.innerHTML=ie),cl=a(l),U(w.$$.fragment,l),ul=a(l),h=i(l,"P",{"data-svelte-h":!0}),p(h)!=="svelte-43r1bv"&&(h.innerHTML=pe),dl=a(l),b=i(l,"P",{"data-svelte-h":!0}),p(b)!=="svelte-1ktungo"&&(b.textContent=oe),jl=a(l),f=i(l,"UL",{"data-svelte-h":!0}),p(f)!=="svelte-1efvabb"&&(f.innerHTML=Ue),Jl=a(l),Z=i(l,"P",{"data-svelte-h":!0}),p(Z)!=="svelte-ea5rfz"&&(Z.innerHTML=ye),wl=a(l),U(I.$$.fragment,l),hl=a(l),C=i(l,"P",{"data-svelte-h":!0}),p(C)!=="svelte-1uki1hp"&&(C.innerHTML=re),bl=a(l),U(E.$$.fragment,l),fl=a(l),B=i(l,"P",{"data-svelte-h":!0}),p(B)!=="svelte-25b86y"&&(B.innerHTML=Te),Zl=a(l),U(R.$$.fragment,l),Il=a(l),V=i(l,"P",{"data-svelte-h":!0}),p(V)!=="svelte-1e4z25x"&&(V.textContent=me),Cl=a(l),_=i(l,"P",{"data-svelte-h":!0}),p(_)!=="svelte-13ur3kw"&&(_.textContent=ce),El=a(l),U(A.$$.fragment,l),Bl=a(l),F=i(l,"P",{"data-svelte-h":!0}),p(F)!=="svelte-rgxks7"&&(F.innerHTML=ue),Rl=a(l),U(G.$$.fragment,l),Vl=a(l),Q=i(l,"P",{"data-svelte-h":!0}),p(Q)!=="svelte-1iw1mwz"&&(Q.innerHTML=de),_l=a(l),X=i(l,"P",{"data-svelte-h":!0}),p(X)!=="svelte-dh3p5f"&&(X.innerHTML=je),Al=a(l),g=i(l,"P",{"data-svelte-h":!0}),p(g)!=="svelte-1l7hprp"&&(g.innerHTML=Je),Fl=a(l),N=i(l,"UL",{"data-svelte-h":!0}),p(N)!=="svelte-qjcfm2"&&(N.innerHTML=we),Gl=a(l),W=i(l,"P",{"data-svelte-h":!0}),p(W)!=="svelte-1xxdciy"&&(W.textContent=he),Ql=a(l),S=i(l,"P",{"data-svelte-h":!0}),p(S)!=="svelte-1v29c1g"&&(S.textContent=be),Xl=a(l),U(v.$$.fragment,l),gl=a(l),H=i(l,"P",{"data-svelte-h":!0}),p(H)!=="svelte-1loxsby"&&(H.innerHTML=fe),Nl=a(l),k=i(l,"P",{"data-svelte-h":!0}),p(k)!=="svelte-pvqso5"&&(k.textContent=Ze),Wl=a(l),U(x.$$.fragment,l),Sl=a(l),z=i(l,"P",{"data-svelte-h":!0}),p(z)!=="svelte-2cag06"&&(z.innerHTML=Ie),vl=a(l),U(Y.$$.fragment,l),Hl=a(l),U($.$$.fragment,l),kl=a(l),L=i(l,"P",{"data-svelte-h":!0}),p(L)!=="svelte-46jexq"&&(L.textContent=Ce),xl=a(l),q=i(l,"P",{"data-svelte-h":!0}),p(q)!=="svelte-9a9g75"&&(q.textContent=Ee),zl=a(l),U(P.$$.fragment,l),Yl=a(l),D=i(l,"P",{"data-svelte-h":!0}),p(D)!=="svelte-1cigzjn"&&(D.innerHTML=Be),$l=a(l),U(O.$$.fragment,l),Ll=a(l),K=i(l,"P",{"data-svelte-h":!0}),p(K)!=="svelte-vght15"&&(K.textContent=Re),ql=a(l),ll=i(l,"P",{"data-svelte-h":!0}),p(ll)!=="svelte-1sz4goc"&&(ll.textContent=Ve),Pl=a(l),U(el.$$.fragment,l),Dl=a(l),tl=i(l,"P",{"data-svelte-h":!0}),p(tl)!=="svelte-a60mly"&&(tl.textContent=_e),Ol=a(l),sl=i(l,"P",{"data-svelte-h":!0}),p(sl)!=="svelte-z9yml0"&&(sl.innerHTML=Ae),Kl=a(l),U(nl.$$.fragment,l),le=a(l),al=i(l,"P",{"data-svelte-h":!0}),p(al)!=="svelte-h2brza"&&(al.innerHTML=Fe),ee=a(l),U(Ml.$$.fragment,l),te=a(l),il=i(l,"P",{"data-svelte-h":!0}),p(il)!=="svelte-fnz57k"&&(il.innerHTML=Ge),se=a(l),Ul=i(l,"P",{}),Qe(Ul).forEach(t),this.h()},h(){Xe(u,"name","hf:doc:metadata"),Xe(u,"content",ze)},m(l,e){ke(document.head,u),s(l,yl,e),s(l,ol,e),s(l,rl,e),y(d,l,e),s(l,Tl,e),s(l,j,e),s(l,ml,e),s(l,J,e),s(l,cl,e),y(w,l,e),s(l,ul,e),s(l,h,e),s(l,dl,e),s(l,b,e),s(l,jl,e),s(l,f,e),s(l,Jl,e),s(l,Z,e),s(l,wl,e),y(I,l,e),s(l,hl,e),s(l,C,e),s(l,bl,e),y(E,l,e),s(l,fl,e),s(l,B,e),s(l,Zl,e),y(R,l,e),s(l,Il,e),s(l,V,e),s(l,Cl,e),s(l,_,e),s(l,El,e),y(A,l,e),s(l,Bl,e),s(l,F,e),s(l,Rl,e),y(G,l,e),s(l,Vl,e),s(l,Q,e),s(l,_l,e),s(l,X,e),s(l,Al,e),s(l,g,e),s(l,Fl,e),s(l,N,e),s(l,Gl,e),s(l,W,e),s(l,Ql,e),s(l,S,e),s(l,Xl,e),y(v,l,e),s(l,gl,e),s(l,H,e),s(l,Nl,e),s(l,k,e),s(l,Wl,e),y(x,l,e),s(l,Sl,e),s(l,z,e),s(l,vl,e),y(Y,l,e),s(l,Hl,e),y($,l,e),s(l,kl,e),s(l,L,e),s(l,xl,e),s(l,q,e),s(l,zl,e),y(P,l,e),s(l,Yl,e),s(l,D,e),s(l,$l,e),y(O,l,e),s(l,Ll,e),s(l,K,e),s(l,ql,e),s(l,ll,e),s(l,Pl,e),y(el,l,e),s(l,Dl,e),s(l,tl,e),s(l,Ol,e),s(l,sl,e),s(l,Kl,e),y(nl,l,e),s(l,le,e),s(l,al,e),s(l,ee,e),y(Ml,l,e),s(l,te,e),s(l,il,e),s(l,se,e),s(l,Ul,e),ne=!0},p:Ne,i(l){ne||(r(d.$$.fragment,l),r(w.$$.fragment,l),r(I.$$.fragment,l),r(E.$$.fragment,l),r(R.$$.fragment,l),r(A.$$.fragment,l),r(G.$$.fragment,l),r(v.$$.fragment,l),r(x.$$.fragment,l),r(Y.$$.fragment,l),r($.$$.fragment,l),r(P.$$.fragment,l),r(O.$$.fragment,l),r(el.$$.fragment,l),r(nl.$$.fragment,l),r(Ml.$$.fragment,l),ne=!0)},o(l){T(d.$$.fragment,l),T(w.$$.fragment,l),T(I.$$.fragment,l),T(E.$$.fragment,l),T(R.$$.fragment,l),T(A.$$.fragment,l),T(G.$$.fragment,l),T(v.$$.fragment,l),T(x.$$.fragment,l),T(Y.$$.fragment,l),T($.$$.fragment,l),T(P.$$.fragment,l),T(O.$$.fragment,l),T(el.$$.fragment,l),T(nl.$$.fragment,l),T(Ml.$$.fragment,l),ne=!1},d(l){l&&(t(yl),t(ol),t(rl),t(Tl),t(j),t(ml),t(J),t(cl),t(ul),t(h),t(dl),t(b),t(jl),t(f),t(Jl),t(Z),t(wl),t(hl),t(C),t(bl),t(fl),t(B),t(Zl),t(Il),t(V),t(Cl),t(_),t(El),t(Bl),t(F),t(Rl),t(Vl),t(Q),t(_l),t(X),t(Al),t(g),t(Fl),t(N),t(Gl),t(W),t(Ql),t(S),t(Xl),t(gl),t(H),t(Nl),t(k),t(Wl),t(Sl),t(z),t(vl),t(Hl),t(kl),t(L),t(xl),t(q),t(zl),t(Yl),t(D),t($l),t(Ll),t(K),t(ql),t(ll),t(Pl),t(Dl),t(tl),t(Ol),t(sl),t(Kl),t(le),t(al),t(ee),t(te),t(il),t(se),t(Ul)),t(u),m(d,l),m(w,l),m(I,l),m(E,l),m(R,l),m(A,l),m(G,l),m(v,l),m(x,l),m(Y,l),m($,l),m(P,l),m(O,l),m(el,l),m(nl,l),m(Ml,l)}}}const ze='{"title":"🚀 Fine-Tune Qwen3 on AWS Trainium","local":"-fine-tune-qwen3-on-aws-trainium","sections":[{"title":"1. 🛠️ Setup AWS Environment","local":"1--setup-aws-environment","sections":[],"depth":2},{"title":"2. 📊 Load and Prepare the Dataset","local":"2--load-and-prepare-the-dataset","sections":[],"depth":2},{"title":"3. 🎯 Fine-tune Qwen3 with NeuronSFTTrainer and PEFT","local":"3--fine-tune-qwen3-with-neuronsfttrainer-and-peft","sections":[],"depth":2},{"title":"4. 🔄 Consolidate and Test the Fine-Tuned Model","local":"4--consolidate-and-test-the-fine-tuned-model","sections":[],"depth":2},{"title":"5. 🤗 Push to Hugging Face Hub","local":"5--push-to-hugging-face-hub","sections":[],"depth":2}],"depth":1}';function Ye(ae){return We(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class De extends Se{constructor(u){super(),ve(this,u,Ye,xe,ge,{})}}export{De as component};

Xet Storage Details

Size:
38.1 kB
·
Xet hash:
70702169cb7ee01750a536ff29a44d23f847daa7270f7b4661f57562869b511d

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.