Buckets:
| import{s as ge,n as Ne,o as We}from"../chunks/scheduler.85c25b89.js";import{S as Se,i as ve,g as M,s as n,r as o,A as He,h as i,f as t,c as a,j as Qe,u as U,x as p,k as Xe,y as ke,a as s,v as y,d as r,t as T,w as m}from"../chunks/index.c9bcf812.js";import{C as c}from"../chunks/CodeBlock.c004bd26.js";import{H as pl}from"../chunks/getInferenceSnippets.5ea0a804.js";function xe(ae){let u,yl,ol,rl,d,Tl,j,Me="This tutorial shows how to fine-tune the Qwen3 model on AWS Trainium accelerators using optimum-neuron.",ml,J,ie='<strong>This is based on the <a href="https://github.com/huggingface/optimum-neuron/tree/main/examples/training/qwen3" rel="nofollow">Qwen3 fine-tuning example script</a>.</strong>',cl,w,ul,h,pe="We’ll use a <code>trn1.32xlarge</code> instance with 16 Trainium Accelerators (32 Neuron Cores) and the Hugging Face Neuron Deep Learning AMI.",dl,b,oe="The Hugging Face AMI includes all required libraries pre-installed:",jl,f,Ue="<li><code>datasets</code>, <code>transformers</code>, <code>optimum-neuron</code></li> <li>Neuron SDK packages</li> <li>No additional environment setup needed</li>",Jl,Z,ye='To create your instance, follow the guide <a href="https://huggingface.co/docs/optimum-neuron/guides/setup_aws_instance" rel="nofollow">here</a>.',wl,I,hl,C,re='We’ll use the <a href="https://huggingface.co/datasets/tengomucho/simple_recipes" rel="nofollow">simple recipes dataset</a> to fine-tune our model for recipe generation.',bl,E,fl,B,Te="To load the dataset we use the <code>load_dataset()</code> method from the <code>datasets</code> library.",Zl,R,Il,V,me="To tune our model we need to convert our structured examples into a collection of quotes with a given context, so we define our tokenization function that we will be able to map on the dataset.",Cl,_,ce=`The dataset should be structured with input-output pairs, where each input is a prompt and the output is the expected response from the model. | |
| We will make use of the model’s tokenizer chat template and preprocess the dataset to be fed to the trainer.`,El,A,Bl,F,ue='Note: these functions make references of <code>eos_token</code> and <code>tokenizer</code>, they are well-defined in the <a href="https://github.com/huggingface/optimum-neuron/blob/main/examples/training/qwen3/finetune_qwen3.py" rel="nofollow">Python script</a> to run this tutorial.',Rl,G,Vl,Q,de='For standard PyTorch fine-tuning, you’d typically use <a href="https://github.com/huggingface/peft" rel="nofollow">PEFT</a> with LoRA adapters and the <a href="https://huggingface.co/docs/trl/en/sft_trainer" rel="nofollow"><code>SFTTrainer</code></a>.',_l,X,je="On AWS Trainium, <code>optimum-neuron</code> provides <code>NeuronSFTTrainer</code> as a drop-in replacement.",Al,g,Je=`<strong>Distributed Training on Trainium:</strong> | |
| Since Qwen3 doesn’t fit on a single accelerator, we use distributed training techniques:`,Fl,N,we="<li>Data Parallel (DDP)</li> <li>Tensor Parallelism</li> <li>Pipeline Parallelism</li>",Gl,W,he="Model loading and LoRA configuration work similarly to other accelerators.",Ql,S,be="Combining all the pieces together, and assuming the dataset has already been loaded, we can write the following code to fine-tune Qwen3 on AWS Trainium:",Xl,v,gl,H,fe='📝 <strong>Complete script available:</strong> All steps above are combined in a ready-to-use script <a href="https://github.com/huggingface/optimum-neuron/blob/main/examples/training/qwen3/finetune_qwen3.py" rel="nofollow">finetune_qwen3.py</a>.',Nl,k,Ze="To launch training, just run the following command in your AWS Trainium instance:",Wl,x,Sl,z,Ie='🔧 <strong>Single command execution:</strong> The complete bash training script <a href="https://github.com/huggingface/optimum-neuron/blob/main/examples/training/qwen3/finetune_qwen3.sh" rel="nofollow">finetune_qwen3.sh</a> is available:',vl,Y,Hl,$,kl,L,Ce="Optimum Neuron saves model shards separately during distributed training. These need to be consolidated before use.",xl,q,Ee="Use the Optimum CLI to consolidate:",zl,P,Yl,D,Be="This will create an <code>adapter_model.safetensors</code> file, the LoRA adapter weights that we trained in the previous step. We can now reload the model and merge it, so it can be loaded for evaluation:",$l,O,Ll,K,Re="Once this step is done, it is possible to test the model with a new prompt.",ql,ll,Ve="You have successfully created a fine-tuned model from Qwen3!",Pl,el,Dl,tl,_e="Share your fine-tuned model with the community by uploading it to the Hugging Face Hub.",Ol,sl,Ae="<strong>Step 1: Authentication</strong>",Kl,nl,le,al,Fe="<strong>Step 2: Upload your model</strong>",ee,Ml,te,il,Ge="🎉 <strong>Your fine-tuned Qwen3 model is now available on the Hub for others to use!</strong>",se,Ul,ne;return d=new pl({props:{title:"🚀 Fine-Tune Qwen3 on AWS Trainium",local:"-fine-tune-qwen3-on-aws-trainium",headingTag:"h1"}}),w=new pl({props:{title:"1. 🛠️ Setup AWS Environment",local:"1--setup-aws-environment",headingTag:"h2"}}),I=new pl({props:{title:"2. 📊 Load and Prepare the Dataset",local:"2--load-and-prepare-the-dataset",headingTag:"h2"}}),E=new c({props:{code:"JTdCJTBBJTIwJTIwJTIwJTIwJ3JlY2lwZXMnJTNBJTIwJTIyLSUyMFByZWhlYXQlMjBvdmVuJTIwdG8lMjAzNTAlMjBkZWdyZWVzJTVDbi0lMjBCdXR0ZXIlMjB0d28lMjA5eDUnJTIwbG9hZiUyMHBhbnMlNUNuLSUyMENyZWFtJTIwdGhlJTIwc3VnYXIlMjBhbmQlMjB0aGUlMjBidXR0ZXIlMjB1bnRpbCUyMGxpZ2h0JTIwYW5kJTIwd2hpcHBlZCU1Q24tJTIwQWRkJTIwdGhlJTIwYmFuYW5hcyUyQyUyMGVnZ3MlMkMlMjBsZW1vbiUyMGp1aWNlJTJDJTIwb3JhbmdlJTIwcmluZCU1Q24tJTIwQmVhdCUyMHVudGlsJTIwYmxlbmRlZCUyMHVuaWZvcm1seSU1Q24tJTIwQmUlMjBwYXRpZW50JTJDJTIwYW5kJTIwYmVhdCUyMHVudGlsJTIwdGhlJTIwYmFuYW5hJTIwbHVtcHMlMjBhcmUlMjBnb25lJTVDbi0lMjBTaWZ0JTIwdGhlJTIwZHJ5JTIwaW5ncmVkaWVudHMlMjB0b2dldGhlciU1Q24tJTIwRm9sZCUyMGxpZ2h0bHklMjBhbmQlMjB0aG9yb3VnaGx5JTIwaW50byUyMHRoZSUyMGJhbmFuYSUyMG1peHR1cmUlNUNuLSUyMFBvdXIlMjB0aGUlMjBiYXR0ZXIlMjBpbnRvJTIwcHJlcGFyZWQlMjBsb2FmJTIwcGFucyU1Q24tJTIwQmFrZSUyMGZvciUyMDQ1JTIwdG8lMjA1NSUyMG1pbnV0ZXMlMkMlMjB1bnRpbCUyMHRoZSUyMGxvYXZlcyUyMGFyZSUyMGZpcm0lMjBpbiUyMHRoZSUyMG1pZGRsZSUyMGFuZCUyMHRoZSUyMGVkZ2VzJTIwYmVnaW4lMjB0byUyMHB1bGwlMjBhd2F5JTIwZnJvbSUyMHRoZSUyMHBhbnMlNUNuLSUyMENvb2wlMjB0aGUlMjBsb2F2ZXMlMjBvbiUyMHJhY2tzJTIwZm9yJTIwMzAlMjBtaW51dGVzJTIwYmVmb3JlJTIwcmVtb3ZpbmclMjBmcm9tJTIwdGhlJTIwcGFucyU1Q24tJTIwRnJlZXplcyUyMHdlbGwlMjIlMkMlMEElMjAlMjAlMjAlMjAnbmFtZXMnJTNBJTIwJ0JlYXQlMjB0aGlzJTIwYmFuYW5hJTIwYnJlYWQnJTBBJTdE",highlighted:`{ | |
| <span class="hljs-string">'recipes'</span>: <span class="hljs-comment">"- Preheat oven to 350 degrees\\n- Butter two 9x5' loaf pans\\n- Cream the sugar and the butter until light and whipped\\n- Add the bananas, eggs, lemon juice, orange rind\\n- Beat until blended uniformly\\n- Be patient, and beat until the banana lumps are gone\\n- Sift the dry ingredients together\\n- Fold lightly and thoroughly into the banana mixture\\n- Pour the batter into prepared loaf pans\\n- Bake for 45 to 55 minutes, until the loaves are firm in the middle and the edges begin to pull away from the pans\\n- Cool the loaves on racks for 30 minutes before removing from the pans\\n- Freezes well"</span>, | |
| <span class="hljs-string">'names'</span>: <span class="hljs-string">'Beat this banana bread'</span> | |
| }`,wrap:!1}}),R=new c({props:{code:"ZnJvbSUyMHJhbmRvbSUyMGltcG9ydCUyMHJhbmRyYW5nZSUwQSUwQWZyb20lMjBkYXRhc2V0cyUyMGltcG9ydCUyMGxvYWRfZGF0YXNldCUwQSUwQSUwQSUyMyUyMExvYWQlMjBkYXRhc2V0JTIwZnJvbSUyMHRoZSUyMGh1YiUwQWRhdGFzZXRfaWQlMjAlM0QlMjAlMjJ0ZW5nb211Y2hvJTJGc2ltcGxlX3JlY2lwZXMlMjIlMEFyZWNpcGVzJTIwJTNEJTIwbG9hZF9kYXRhc2V0KGRhdGFzZXRfaWQlMkMlMjBzcGxpdCUzRCUyMnRyYWluJTIyKSUwQSUwQWRhdGFzZXRfc2l6ZSUyMCUzRCUyMGxlbihyZWNpcGVzKSUwQXByaW50KGYlMjJkYXRhc2V0JTIwc2l6ZSUzQSUyMCU3QmRhdGFzZXRfc2l6ZSU3RCUyMiklMEFwcmludChyZWNpcGVzJTVCcmFuZHJhbmdlKGRhdGFzZXRfc2l6ZSklNUQpJTBBJTIzJTIwZGF0YXNldCUyMHNpemUlM0ElMjAyMDAwMA==",highlighted:`<span class="hljs-keyword">from</span> random <span class="hljs-keyword">import</span> randrange | |
| <span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset | |
| <span class="hljs-comment"># Load dataset from the hub</span> | |
| dataset_id = <span class="hljs-string">"tengomucho/simple_recipes"</span> | |
| recipes = load_dataset(dataset_id, split=<span class="hljs-string">"train"</span>) | |
| dataset_size = <span class="hljs-built_in">len</span>(recipes) | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f"dataset size: <span class="hljs-subst">{dataset_size}</span>"</span>) | |
| <span class="hljs-built_in">print</span>(recipes[randrange(dataset_size)]) | |
| <span class="hljs-comment"># dataset size: 20000</span>`,wrap:!1}}),A=new c({props:{code:"JTIzJTIwUHJlcHJvY2Vzc2VzJTIwdGhlJTIwZGF0YXNldCUwQWRlZiUyMHByZXByb2Nlc3NfZGF0YXNldF93aXRoX2Vvcyhlb3NfdG9rZW4pJTNBJTBBJTIwJTIwJTIwJTIwZGVmJTIwcHJlcHJvY2Vzc19mdW5jdGlvbihleGFtcGxlcyklM0ElMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjByZWNpcGVzJTIwJTNEJTIwZXhhbXBsZXMlNUIlMjJyZWNpcGVzJTIyJTVEJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwbmFtZXMlMjAlM0QlMjBleGFtcGxlcyU1QiUyMm5hbWVzJTIyJTVEJTBBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwY2hhdHMlMjAlM0QlMjAlNUIlNUQlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBmb3IlMjByZWNpcGUlMkMlMjBuYW1lJTIwaW4lMjB6aXAocmVjaXBlcyUyQyUyMG5hbWVzKSUzQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMyUyMEFwcGVuZCUyMHRoZSUyMEVPUyUyMHRva2VuJTIwdG8lMjB0aGUlMjByZXNwb25zZSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHJlY2lwZSUyMCUyQiUzRCUyMGVvc190b2tlbiUwQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGNoYXQlMjAlM0QlMjAlNUIlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0IlMjJyb2xlJTIyJTNBJTIwJTIydXNlciUyMiUyQyUyMCUyMmNvbnRlbnQlMjIlM0ElMjBmJTIySG93JTIwY2FuJTIwSSUyMG1ha2UlMjAlN0JuYW1lJTdEJTNGJTIyJTdEJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdCJTIycm9sZSUyMiUzQSUyMCUyMmFzc2lzdGFudCUyMiUyQyUyMCUyMmNvbnRlbnQlMjIlM0ElMjByZWNpcGUlN0QlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlNUQlMEElMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBjaGF0cy5hcHBlbmQoY2hhdCklMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjByZXR1cm4lMjAlN0IlMjJtZXNzYWdlcyUyMiUzQSUyMGNoYXRzJTdEJTBBJTBBJTIwJTIwJTIwJTIwZGF0YXNldCUyMCUzRCUyMHJlY2lwZXMubWFwKHByZXByb2Nlc3NfZnVuY3Rpb24lMkMlMjBiYXRjaGVkJTNEVHJ1ZSUyQyUyMHJlbW92ZV9jb2x1bW5zJTNEcmVjaXBlcy5jb2x1bW5fbmFtZXMpJTBBJTIwJTIwJTIwJTIwcmV0dXJuJTIwZGF0YXNldCUwQSUwQSUyMyUyMFN0cnVjdHVyZXMlMjB0aGUlMjBkYXRhc2V0JTIwaW50byUyMHByb21wdC1leHBlY3RlZCUyMG91dHB1dCUyMHBhaXJzLiUwQWRlZiUyMGZvcm1hdHRpbmdfZnVuY3Rpb24oZXhhbXBsZXMpJTNBJTBBJTIwJTIwJTIwJTIwcmV0dXJuJTIwdG9rZW5pemVyLmFwcGx5X2NoYXRfdGVtcGxhdGUoZXhhbXBsZXMlNUIlMjJtZXNzYWdlcyUyMiU1RCUyQyUyMHRva2VuaXplJTNERmFsc2UlMkMlMjBhZGRfZ2VuZXJhdGlvbl9wcm9tcHQlM0RGYWxzZSk=",highlighted:`<span class="hljs-comment"># Preprocesses the dataset</span> | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">preprocess_dataset_with_eos</span>(<span class="hljs-params">eos_token</span>): | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">preprocess_function</span>(<span class="hljs-params">examples</span>): | |
| recipes = examples[<span class="hljs-string">"recipes"</span>] | |
| names = examples[<span class="hljs-string">"names"</span>] | |
| chats = [] | |
| <span class="hljs-keyword">for</span> recipe, name <span class="hljs-keyword">in</span> <span class="hljs-built_in">zip</span>(recipes, names): | |
| <span class="hljs-comment"># Append the EOS token to the response</span> | |
| recipe += eos_token | |
| chat = [ | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">f"How can I make <span class="hljs-subst">{name}</span>?"</span>}, | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: recipe}, | |
| ] | |
| chats.append(chat) | |
| <span class="hljs-keyword">return</span> {<span class="hljs-string">"messages"</span>: chats} | |
| dataset = recipes.<span class="hljs-built_in">map</span>(preprocess_function, batched=<span class="hljs-literal">True</span>, remove_columns=recipes.column_names) | |
| <span class="hljs-keyword">return</span> dataset | |
| <span class="hljs-comment"># Structures the dataset into prompt-expected output pairs.</span> | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">formatting_function</span>(<span class="hljs-params">examples</span>): | |
| <span class="hljs-keyword">return</span> tokenizer.apply_chat_template(examples[<span class="hljs-string">"messages"</span>], tokenize=<span class="hljs-literal">False</span>, add_generation_prompt=<span class="hljs-literal">False</span>)`,wrap:!1}}),G=new pl({props:{title:"3. 🎯 Fine-tune Qwen3 with NeuronSFTTrainer and PEFT",local:"3--fine-tune-qwen3-with-neuronsfttrainer-and-peft",headingTag:"h2"}}),v=new c({props:{code:"bW9kZWxfaWQlMjAlM0QlMjAlMjJRd2VuJTJGUXdlbjMtOEIlMjIlMEElMEElMjMlMjBEZWZpbmUlMjB0aGUlMjB0cmFpbmluZyUyMGFyZ3VtZW50cyUwQW91dHB1dF9kaXIlMjAlM0QlMjAlMjJxd2VuMy1maW5ldHVuZWQtcmVjaXBlcyUyMiUwQXRyYWluaW5nX2FyZ3MlMjAlM0QlMjBOZXVyb25UcmFpbmluZ0FyZ3VtZW50cyglMEElMjAlMjAlMjAlMjBvdXRwdXRfZGlyJTNEb3V0cHV0X2RpciUyQyUwQSUyMCUyMCUyMCUyMG51bV90cmFpbl9lcG9jaHMlM0QzJTJDJTBBJTIwJTIwJTIwJTIwZG9fdHJhaW4lM0RUcnVlJTJDJTBBJTIwJTIwJTIwJTIwbWF4X3N0ZXBzJTNELTElMkMlMjAlMjAlMjMlMjAtMSUyMG1lYW5zJTIwdHJhaW4lMjB1bnRpbCUyMHRoZSUyMGVuZCUyMG9mJTIwdGhlJTIwZGF0YXNldCUwQSUyMCUyMCUyMCUyMHBlcl9kZXZpY2VfdHJhaW5fYmF0Y2hfc2l6ZSUzRDElMkMlMEElMjAlMjAlMjAlMjBncmFkaWVudF9hY2N1bXVsYXRpb25fc3RlcHMlM0Q4JTJDJTBBJTIwJTIwJTIwJTIwbGVhcm5pbmdfcmF0ZSUzRDVlLTQlMkMlMEElMjAlMjAlMjAlMjBiZjE2JTNEVHJ1ZSUyQyUyMCUyMCUwQSUyMCUyMCUyMCUyMHRlbnNvcl9wYXJhbGxlbF9zaXplJTNEOCUyQyUwQSUyMCUyMCUyMCUyMGxvZ2dpbmdfc3RlcHMlM0QyJTJDJTBBJTIwJTIwJTIwJTIwbHJfc2NoZWR1bGVyX3R5cGUlM0QlMjJjb3NpbmUlMjIlMkMlMEElMjAlMjAlMjAlMjBvdmVyd3JpdGVfb3V0cHV0X2RpciUzRFRydWUlMkMlMEEpJTBBJTBBJTIzJTIwTG9hZCUyMHRoZSUyMG1vZGVsJTIwd2l0aCUyMHRoZSUyME5ldXJvbk1vZGVsRm9yQ2F1c2FsTE0lMjBjbGFzcy4lMEElMjMlMjBJdCUyMHdpbGwlMjBsb2FkJTIwdGhlJTIwbW9kZWwlMjB3aXRoJTIwYSUyMGN1c3RvbSUyMG1vZGVsaW5nJTIwc3BlZmljaWNhbGx5JTIwZGVzaWduZWQlMjBmb3IlMjBBV1MlMjBUcmFpbml1bS4lMEF0cm5fY29uZmlnJTIwJTNEJTIwdHJhaW5pbmdfYXJncy50cm5fY29uZmlnJTBBZHR5cGUlMjAlM0QlMjB0b3JjaC5iZmxvYXQxNiUyMGlmJTIwdHJhaW5pbmdfYXJncy5iZjE2JTIwZWxzZSUyMHRvcmNoLmZsb2F0MzIlMEFtb2RlbCUyMCUzRCUyME5ldXJvbk1vZGVsRm9yQ2F1c2FsTE0uZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMG1vZGVsX2lkJTJDJTBBJTIwJTIwJTIwJTIwdHJuX2NvbmZpZyUyQyUwQSUyMCUyMCUyMCUyMHRvcmNoX2R0eXBlJTNEZHR5cGUlMkMlMEElMjAlMjAlMjAlMjAlMjMlMjBVc2UlMjBGbGFzaEF0dGVudGlvbjIlMjBmb3IlMjBiZXR0ZXIlMjBwZXJmb3JtYW5jZSUyMGFuZCUyMHRvJTIwYmUlMjBhYmxlJTIwdG8lMjB1c2UlMjBsYXJnZXIlMjBzZXF1ZW5jZSUyMGxlbmd0aHMuJTBBJTIwJTIwJTIwJTIwdXNlX2ZsYXNoX2F0dGVudGlvbl8yJTNEVHJ1ZSUyQyUwQSklMEElMEFsb3JhX2NvbmZpZyUyMCUzRCUyMExvcmFDb25maWcoJTBBJTIwJTIwJTIwJTIwciUzRDY0JTJDJTBBJTIwJTIwJTIwJTIwbG9yYV9hbHBoYSUzRDEyOCUyQyUwQSUyMCUyMCUyMCUyMGxvcmFfZHJvcG91dCUzRDAuMDUlMkMlMEElMjAlMjAlMjAlMjB0YXJnZXRfbW9kdWxlcyUzRCU1QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMmVtYmVkX3Rva2VucyUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMnFfcHJvaiUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMnZfcHJvaiUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMm9fcHJvaiUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMmtfcHJvaiUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMnVwX3Byb2olMjIlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJkb3duX3Byb2olMjIlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJnYXRlX3Byb2olMjIlMkMlMEElMjAlMjAlMjAlMjAlNUQlMkMlMEElMjAlMjAlMjAlMjBiaWFzJTNEJTIybm9uZSUyMiUyQyUwQSUyMCUyMCUyMCUyMHRhc2tfdHlwZSUzRCUyMkNBVVNBTF9MTSUyMiUyQyUwQSklMEElMEElMjMlMjBDb252ZXJ0aW5nJTIwdGhlJTIwTmV1cm9uVHJhaW5pbmdBcmd1bWVudHMlMjB0byUyMGElMjBkaWN0aW9uYXJ5JTIwdG8lMjBmZWVkJTIwdGhlbSUyMHRvJTIwdGhlJTIwTmV1cm9uU0ZUQ29uZmlnLiUwQWFyZ3MlMjAlM0QlMjB0cmFpbmluZ19hcmdzLnRvX2RpY3QoKSUwQSUwQXNmdF9jb25maWclMjAlM0QlMjBOZXVyb25TRlRDb25maWcoJTBBJTIwJTIwJTIwJTIwbWF4X3NlcV9sZW5ndGglM0Q0MDk2JTJDJTBBJTIwJTIwJTIwJTIwcGFja2luZyUzRFRydWUlMkMlMEElMjAlMjAlMjAlMjAqKmFyZ3MlMkMlMEEpJTBBJTBBdG9rZW5pemVyJTIwJTNEJTIwQXV0b1Rva2VuaXplci5mcm9tX3ByZXRyYWluZWQobW9kZWxfaWQpJTBBZGF0YXNldCUyMCUzRCUyMHByZXByb2Nlc3NfZGF0YXNldF93aXRoX2Vvcyh0b2tlbml6ZXIuZW9zX3Rva2VuKSUwQSUwQSUyMGRlZiUyMGZvcm1hdHRpbmdfZnVuY3Rpb24oZXhhbXBsZXMpJTNBJTBBJTIwJTIwJTIwJTIwJTIwcmV0dXJuJTIwdG9rZW5pemVyLmFwcGx5X2NoYXRfdGVtcGxhdGUoZXhhbXBsZXMlNUIlMjJtZXNzYWdlcyUyMiU1RCUyQyUyMHRva2VuaXplJTNERmFsc2UlMkMlMjBhZGRfZ2VuZXJhdGlvbl9wcm9tcHQlM0RGYWxzZSklMEElMEElMjAlMjMlMjBUaGUlMjBOZXVyb25TRlRUcmFpbmVyJTIwd2lsbCUyMHVzZSUyMCU2MGZvcm1hdHRpbmdfZnVuY3Rpb24lNjAlMjB0byUyMGZvcm1hdCUyMHRoZSUyMGRhdGFzZXQlMjBhbmQlMjAlNjBsb3JhX2NvbmZpZyU2MCUyMHRvJTIwYXBwbHklMjBMb1JBJTIwb24lMjB0aGUlMEElMjAlMjMlMjBtb2RlbC4lMEElMjB0cmFpbmVyJTIwJTNEJTIwTmV1cm9uU0ZUVHJhaW5lciglMEElMjAlMjAlMjAlMjAlMjBhcmdzJTNEc2Z0X2NvbmZpZyUyQyUwQSUyMCUyMCUyMCUyMCUyMG1vZGVsJTNEbW9kZWwlMkMlMEElMjAlMjAlMjAlMjAlMjBwZWZ0X2NvbmZpZyUzRGxvcmFfY29uZmlnJTJDJTBBJTIwJTIwJTIwJTIwJTIwdG9rZW5pemVyJTNEdG9rZW5pemVyJTJDJTBBJTIwJTIwJTIwJTIwJTIwdHJhaW5fZGF0YXNldCUzRGRhdGFzZXQlMkMlMEElMjAlMjAlMjAlMjAlMjBmb3JtYXR0aW5nX2Z1bmMlM0Rmb3JtYXR0aW5nX2Z1bmN0aW9uJTJDJTBBJTIwKSUwQSUyMHRyYWluZXIudHJhaW4oKQ==",highlighted:`model_id = <span class="hljs-string">"Qwen/Qwen3-8B"</span> | |
| <span class="hljs-comment"># Define the training arguments</span> | |
| output_dir = <span class="hljs-string">"qwen3-finetuned-recipes"</span> | |
| training_args = NeuronTrainingArguments( | |
| output_dir=output_dir, | |
| num_train_epochs=<span class="hljs-number">3</span>, | |
| do_train=<span class="hljs-literal">True</span>, | |
| max_steps=-<span class="hljs-number">1</span>, <span class="hljs-comment"># -1 means train until the end of the dataset</span> | |
| per_device_train_batch_size=<span class="hljs-number">1</span>, | |
| gradient_accumulation_steps=<span class="hljs-number">8</span>, | |
| learning_rate=<span class="hljs-number">5e-4</span>, | |
| bf16=<span class="hljs-literal">True</span>, | |
| tensor_parallel_size=<span class="hljs-number">8</span>, | |
| logging_steps=<span class="hljs-number">2</span>, | |
| lr_scheduler_type=<span class="hljs-string">"cosine"</span>, | |
| overwrite_output_dir=<span class="hljs-literal">True</span>, | |
| ) | |
| <span class="hljs-comment"># Load the model with the NeuronModelForCausalLM class.</span> | |
| <span class="hljs-comment"># It will load the model with a custom modeling speficically designed for AWS Trainium.</span> | |
| trn_config = training_args.trn_config | |
| dtype = torch.bfloat16 <span class="hljs-keyword">if</span> training_args.bf16 <span class="hljs-keyword">else</span> torch.float32 | |
| model = NeuronModelForCausalLM.from_pretrained( | |
| model_id, | |
| trn_config, | |
| torch_dtype=dtype, | |
| <span class="hljs-comment"># Use FlashAttention2 for better performance and to be able to use larger sequence lengths.</span> | |
| use_flash_attention_2=<span class="hljs-literal">True</span>, | |
| ) | |
| lora_config = LoraConfig( | |
| r=<span class="hljs-number">64</span>, | |
| lora_alpha=<span class="hljs-number">128</span>, | |
| lora_dropout=<span class="hljs-number">0.05</span>, | |
| target_modules=[ | |
| <span class="hljs-string">"embed_tokens"</span>, | |
| <span class="hljs-string">"q_proj"</span>, | |
| <span class="hljs-string">"v_proj"</span>, | |
| <span class="hljs-string">"o_proj"</span>, | |
| <span class="hljs-string">"k_proj"</span>, | |
| <span class="hljs-string">"up_proj"</span>, | |
| <span class="hljs-string">"down_proj"</span>, | |
| <span class="hljs-string">"gate_proj"</span>, | |
| ], | |
| bias=<span class="hljs-string">"none"</span>, | |
| task_type=<span class="hljs-string">"CAUSAL_LM"</span>, | |
| ) | |
| <span class="hljs-comment"># Converting the NeuronTrainingArguments to a dictionary to feed them to the NeuronSFTConfig.</span> | |
| args = training_args.to_dict() | |
| sft_config = NeuronSFTConfig( | |
| max_seq_length=<span class="hljs-number">4096</span>, | |
| packing=<span class="hljs-literal">True</span>, | |
| **args, | |
| ) | |
| tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| dataset = preprocess_dataset_with_eos(tokenizer.eos_token) | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">formatting_function</span>(<span class="hljs-params">examples</span>): | |
| <span class="hljs-keyword">return</span> tokenizer.apply_chat_template(examples[<span class="hljs-string">"messages"</span>], tokenize=<span class="hljs-literal">False</span>, add_generation_prompt=<span class="hljs-literal">False</span>) | |
| <span class="hljs-comment"># The NeuronSFTTrainer will use \`formatting_function\` to format the dataset and \`lora_config\` to apply LoRA on the</span> | |
| <span class="hljs-comment"># model.</span> | |
| trainer = NeuronSFTTrainer( | |
| args=sft_config, | |
| model=model, | |
| peft_config=lora_config, | |
| tokenizer=tokenizer, | |
| train_dataset=dataset, | |
| formatting_func=formatting_function, | |
| ) | |
| trainer.train()`,wrap:!1}}),x=new c({props:{code:"JTIzJTIwRmxhZ3MlMjBmb3IlMjBOZXVyb24lMjBjb21waWxhdGlvbiUwQWV4cG9ydCUyME5FVVJPTl9DQ19GTEFHUyUzRCUyMi0tbW9kZWwtdHlwZSUyMHRyYW5zZm9ybWVyJTIwLS1yZXRyeV9mYWlsZWRfY29tcGlsYXRpb24lMjIlMEFleHBvcnQlMjBORVVST05fRlVTRV9TT0ZUTUFYJTNEMSUwQWV4cG9ydCUyME5FVVJPTl9SVF9BU1lOQ19FWEVDX01BWF9JTkZMSUdIVF9SRVFVRVNUUyUzRDMlMjAlMjMlMjBBc3luYyUyMFJ1bnRpbWUlMEFleHBvcnQlMjBNQUxMT0NfQVJFTkFfTUFYJTNENjQlMjAlMjMlMjBIb3N0JTIwT09NJTIwbWl0aWdhdGlvbiUwQSUwQSUyMyUyMFZhcmlhYmxlcyUyMGZvciUyMHRyYWluaW5nJTBBUFJPQ0VTU0VTX1BFUl9OT0RFJTNEMzIlMEFOVU1fRVBPQ0hTJTNEMyUwQVRQX0RFR1JFRSUzRDglMEFCUyUzRDElMEFHUkFESUVOVF9BQ0NVTVVMQVRJT05fU1RFUFMlM0Q4JTBBTE9HR0lOR19TVEVQUyUzRDIlMEFNT0RFTF9OQU1FJTNEJTIyUXdlbiUyRlF3ZW4zLThCJTIyJTIwJTIzJTIwQ2hhbmdlJTIwdGhpcyUyMHRvJTIwdGhlJTIwZGVzaXJlZCUyMG1vZGVsJTIwbmFtZSUwQU9VVFBVVF9ESVIlM0QlMjIlMjQoZWNobyUyMCUyNE1PREVMX05BTUUlMjAlN0MlMjBjdXQlMjAtZCclMkYnJTIwLWYyKS1maW5ldHVuZWQlMjIlMEFESVNUUklCVVRFRF9BUkdTJTNEJTIyLS1ucHJvY19wZXJfbm9kZSUyMCUyNFBST0NFU1NFU19QRVJfTk9ERSUyMiUwQVNDUklQVF9ESVIlM0QlMjQoJTIwY2QlMjAtLSUyMCUyMiUyNCglMjBkaXJuYW1lJTIwLS0lMjAlMjIlMjQlN0JCQVNIX1NPVVJDRSU1QjAlNUQlN0QlMjIlMjApJTIyJTIwJTI2JTNFJTIwJTJGZGV2JTJGbnVsbCUyMCUyNiUyNiUyMHB3ZCUyMCklMEElMEFpZiUyMCU1QiUyMCUyMiUyNE5FVVJPTl9FWFRSQUNUX0dSQVBIU19PTkxZJTIyJTIwJTNEJTIwJTIyMSUyMiUyMCU1RCUzQiUyMHRoZW4lMEElMjAlMjAlMjAlMjBNQVhfU1RFUFMlM0Q1JTBBZWxzZSUwQSUyMCUyMCUyMCUyME1BWF9TVEVQUyUzRC0xJTBBZmklMEElMEF0b3JjaHJ1biUyMC0tbnByb2NfcGVyX25vZGUlMjAlMjRQUk9DRVNTRVNfUEVSX05PREUlMjBmaW5ldHVuZV9xd2VuMy5weSUyMCU1QyUwQSUyMCUyMC0tbW9kZWxfaWQlMjAlMjRNT0RFTF9OQU1FJTIwJTVDJTBBJTIwJTIwLS1udW1fdHJhaW5fZXBvY2hzJTIwJTI0TlVNX0VQT0NIUyUyMCU1QyUwQSUyMCUyMC0tZG9fdHJhaW4lMjAlNUMlMEElMjAlMjAtLW1heF9zdGVwcyUyMCUyNE1BWF9TVEVQUyUyMCU1QyUwQSUyMCUyMC0tcGVyX2RldmljZV90cmFpbl9iYXRjaF9zaXplJTIwJTI0QlMlMjAlNUMlMEElMjAlMjAtLWdyYWRpZW50X2FjY3VtdWxhdGlvbl9zdGVwcyUyMCUyNEdSQURJRU5UX0FDQ1VNVUxBVElPTl9TVEVQUyUyMCU1QyUwQSUyMCUyMC0tbGVhcm5pbmdfcmF0ZSUyMDhlLTQlMjAlNUMlMEElMjAlMjAtLWJmMTYlMjAlNUMlMEElMjAlMjAtLXRlbnNvcl9wYXJhbGxlbF9zaXplJTIwJTI0VFBfREVHUkVFJTIwJTVDJTBBJTIwJTIwLS16ZXJvXzElMjAlNUMlMEElMjAlMjAtLWFzeW5jX3NhdmUlMjAlNUMlMEElMjAlMjAtLWxvZ2dpbmdfc3RlcHMlMjAlMjRMT0dHSU5HX1NURVBTJTIwJTVDJTBBJTIwJTIwLS1vdXRwdXRfZGlyJTIwJTI0T1VUUFVUX0RJUiUyMCU1QyUwQSUyMCUyMC0tbHJfc2NoZWR1bGVyX3R5cGUlMjAlMjJjb3NpbmUlMjIlMjAlNUMlMEElMjAlMjAtLW92ZXJ3cml0ZV9vdXRwdXRfZGly",highlighted:`<span class="hljs-comment"># Flags for Neuron compilation</span> | |
| <span class="hljs-built_in">export</span> NEURON_CC_FLAGS=<span class="hljs-string">"--model-type transformer --retry_failed_compilation"</span> | |
| <span class="hljs-built_in">export</span> NEURON_FUSE_SOFTMAX=1 | |
| <span class="hljs-built_in">export</span> NEURON_RT_ASYNC_EXEC_MAX_INFLIGHT_REQUESTS=3 <span class="hljs-comment"># Async Runtime</span> | |
| <span class="hljs-built_in">export</span> MALLOC_ARENA_MAX=64 <span class="hljs-comment"># Host OOM mitigation</span> | |
| <span class="hljs-comment"># Variables for training</span> | |
| PROCESSES_PER_NODE=32 | |
| NUM_EPOCHS=3 | |
| TP_DEGREE=8 | |
| BS=1 | |
| GRADIENT_ACCUMULATION_STEPS=8 | |
| LOGGING_STEPS=2 | |
| MODEL_NAME=<span class="hljs-string">"Qwen/Qwen3-8B"</span> <span class="hljs-comment"># Change this to the desired model name</span> | |
| OUTPUT_DIR=<span class="hljs-string">"<span class="hljs-subst">$(echo $MODEL_NAME | cut -d'/' -f2)</span>-finetuned"</span> | |
| DISTRIBUTED_ARGS=<span class="hljs-string">"--nproc_per_node <span class="hljs-variable">$PROCESSES_PER_NODE</span>"</span> | |
| SCRIPT_DIR=$( <span class="hljs-built_in">cd</span> -- <span class="hljs-string">"<span class="hljs-subst">$( dirname -- <span class="hljs-string">"<span class="hljs-variable">\${BASH_SOURCE[0]}</span>"</span> )</span>"</span> &> /dev/null && <span class="hljs-built_in">pwd</span> ) | |
| <span class="hljs-keyword">if</span> [ <span class="hljs-string">"<span class="hljs-variable">$NEURON_EXTRACT_GRAPHS_ONLY</span>"</span> = <span class="hljs-string">"1"</span> ]; <span class="hljs-keyword">then</span> | |
| MAX_STEPS=5 | |
| <span class="hljs-keyword">else</span> | |
| MAX_STEPS=-1 | |
| <span class="hljs-keyword">fi</span> | |
| torchrun --nproc_per_node <span class="hljs-variable">$PROCESSES_PER_NODE</span> finetune_qwen3.py \\ | |
| --model_id <span class="hljs-variable">$MODEL_NAME</span> \\ | |
| --num_train_epochs <span class="hljs-variable">$NUM_EPOCHS</span> \\ | |
| --do_train \\ | |
| --max_steps <span class="hljs-variable">$MAX_STEPS</span> \\ | |
| --per_device_train_batch_size <span class="hljs-variable">$BS</span> \\ | |
| --gradient_accumulation_steps <span class="hljs-variable">$GRADIENT_ACCUMULATION_STEPS</span> \\ | |
| --learning_rate 8e-4 \\ | |
| --bf16 \\ | |
| --tensor_parallel_size <span class="hljs-variable">$TP_DEGREE</span> \\ | |
| --zero_1 \\ | |
| --async_save \\ | |
| --logging_steps <span class="hljs-variable">$LOGGING_STEPS</span> \\ | |
| --output_dir <span class="hljs-variable">$OUTPUT_DIR</span> \\ | |
| --lr_scheduler_type <span class="hljs-string">"cosine"</span> \\ | |
| --overwrite_output_dir`,wrap:!1}}),Y=new c({props:{code:"LiUyRmZpbmV0dW5lX3F3ZW4zLnNo",highlighted:"./finetune_qwen3.sh",wrap:!1}}),$=new pl({props:{title:"4. 🔄 Consolidate and Test the Fine-Tuned Model",local:"4--consolidate-and-test-the-fine-tuned-model",headingTag:"h2"}}),P=new c({props:{code:"b3B0aW11bS1jbGklMjBuZXVyb24lMjBjb25zb2xpZGF0ZSUyMFF3ZW4zLThCLWZpbmV0dW5lZCUyMFF3ZW4zLThCLWZpbmV0dW5lZCUyRmFkYXB0ZXJfZGVmYXVsdA==",highlighted:"optimum-cli neuron consolidate Qwen3-8B-finetuned Qwen3-8B-finetuned/adapter_default",wrap:!1}}),O=new c({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Nb2RlbEZvckNhdXNhbExNJTJDJTIwQXV0b1Rva2VuaXplciUwQWZyb20lMjBwZWZ0JTIwaW1wb3J0JTIwUGVmdE1vZGVsJTJDJTIwUGVmdENvbmZpZyUwQSUwQSUwQU1PREVMX05BTUUlMjAlM0QlMjAlMjJRd2VuJTJGUXdlbjMtOEIlMjIlMEFBREFQVEVSX1BBVEglMjAlM0QlMjAlMjJRd2VuMy04Qi1maW5ldHVuZWQlMkZhZGFwdGVyX2RlZmF1bHQlMjIlMEFNRVJHRURfTU9ERUxfUEFUSCUyMCUzRCUyMCUyMlF3ZW4zLThCLXJlY2lwZXMlMjIlMEElMEElMjMlMjBMb2FkJTIwYmFzZSUyMG1vZGVsJTBBbW9kZWwlMjAlM0QlMjBBdXRvTW9kZWxGb3JDYXVzYWxMTS5mcm9tX3ByZXRyYWluZWQoTU9ERUxfTkFNRSklMEF0b2tlbml6ZXIlMjAlM0QlMjBBdXRvVG9rZW5pemVyLmZyb21fcHJldHJhaW5lZChNT0RFTF9OQU1FKSUwQSUwQSUyMyUyMExvYWQlMjBhZGFwdGVyJTIwY29uZmlndXJhdGlvbiUyMGFuZCUyMG1vZGVsJTBBYWRhcHRlcl9jb25maWclMjAlM0QlMjBQZWZ0Q29uZmlnLmZyb21fcHJldHJhaW5lZChBREFQVEVSX1BBVEgpJTBBZmluZXR1bmVkX21vZGVsJTIwJTNEJTIwUGVmdE1vZGVsLmZyb21fcHJldHJhaW5lZChtb2RlbCUyQyUyMEFEQVBURVJfUEFUSCUyQyUyMGNvbmZpZyUzRGFkYXB0ZXJfY29uZmlnKSUwQSUwQXByaW50KCUyMlNhdmluZyUyMHRva2VuaXplciUyMiklMEF0b2tlbml6ZXIuc2F2ZV9wcmV0cmFpbmVkKE1FUkdFRF9NT0RFTF9QQVRIKSUwQXByaW50KCUyMlNhdmluZyUyMG1vZGVsJTIyKSUwQWZpbmV0dW5lZF9tb2RlbCUyMCUzRCUyMGZpbmV0dW5lZF9tb2RlbC5tZXJnZV9hbmRfdW5sb2FkKCklMEFmaW5ldHVuZWRfbW9kZWwuc2F2ZV9wcmV0cmFpbmVkKE1FUkdFRF9NT0RFTF9QQVRIKQ==",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM, AutoTokenizer | |
| <span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> PeftModel, PeftConfig | |
| MODEL_NAME = <span class="hljs-string">"Qwen/Qwen3-8B"</span> | |
| ADAPTER_PATH = <span class="hljs-string">"Qwen3-8B-finetuned/adapter_default"</span> | |
| MERGED_MODEL_PATH = <span class="hljs-string">"Qwen3-8B-recipes"</span> | |
| <span class="hljs-comment"># Load base model</span> | |
| model = AutoModelForCausalLM.from_pretrained(MODEL_NAME) | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| <span class="hljs-comment"># Load adapter configuration and model</span> | |
| adapter_config = PeftConfig.from_pretrained(ADAPTER_PATH) | |
| finetuned_model = PeftModel.from_pretrained(model, ADAPTER_PATH, config=adapter_config) | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">"Saving tokenizer"</span>) | |
| tokenizer.save_pretrained(MERGED_MODEL_PATH) | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">"Saving model"</span>) | |
| finetuned_model = finetuned_model.merge_and_unload() | |
| finetuned_model.save_pretrained(MERGED_MODEL_PATH)`,wrap:!1}}),el=new pl({props:{title:"5. 🤗 Push to Hugging Face Hub",local:"5--push-to-hugging-face-hub",headingTag:"h2"}}),nl=new c({props:{code:"aHVnZ2luZ2ZhY2UtY2xpJTIwbG9naW4=",highlighted:"huggingface-cli login",wrap:!1}}),Ml=new c({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Nb2RlbEZvckNhdXNhbExNJTJDJTIwQXV0b1Rva2VuaXplciUwQSUwQU1FUkdFRF9NT0RFTF9QQVRIJTIwJTNEJTIwJTIyUXdlbjMtOEItcmVjaXBlcyUyMiUwQUhVQl9NT0RFTF9OQU1FJTIwJTNEJTIwJTIyeW91ci11c2VybmFtZSUyRnF3ZW4zLThiLXJlY2lwZXMlMjIlMEElMEElMjMlMjBMb2FkJTIwYW5kJTIwcHVzaCUyMHRva2VuaXplciUwQXRva2VuaXplciUyMCUzRCUyMEF1dG9Ub2tlbml6ZXIuZnJvbV9wcmV0cmFpbmVkKE1FUkdFRF9NT0RFTF9QQVRIKSUwQXRva2VuaXplci5wdXNoX3RvX2h1YihIVUJfTU9ERUxfTkFNRSklMEElMEElMjMlMjBMb2FkJTIwYW5kJTIwcHVzaCUyMG1vZGVsJTBBbW9kZWwlMjAlM0QlMjBBdXRvTW9kZWxGb3JDYXVzYWxMTS5mcm9tX3ByZXRyYWluZWQoTUVSR0VEX01PREVMX1BBVEgpJTBBbW9kZWwucHVzaF90b19odWIoSFVCX01PREVMX05BTUUp",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM, AutoTokenizer | |
| MERGED_MODEL_PATH = <span class="hljs-string">"Qwen3-8B-recipes"</span> | |
| HUB_MODEL_NAME = <span class="hljs-string">"your-username/qwen3-8b-recipes"</span> | |
| <span class="hljs-comment"># Load and push tokenizer</span> | |
| tokenizer = AutoTokenizer.from_pretrained(MERGED_MODEL_PATH) | |
| tokenizer.push_to_hub(HUB_MODEL_NAME) | |
| <span class="hljs-comment"># Load and push model</span> | |
| model = AutoModelForCausalLM.from_pretrained(MERGED_MODEL_PATH) | |
| model.push_to_hub(HUB_MODEL_NAME)`,wrap:!1}}),{c(){u=M("meta"),yl=n(),ol=M("p"),rl=n(),o(d.$$.fragment),Tl=n(),j=M("p"),j.textContent=Me,ml=n(),J=M("p"),J.innerHTML=ie,cl=n(),o(w.$$.fragment),ul=n(),h=M("p"),h.innerHTML=pe,dl=n(),b=M("p"),b.textContent=oe,jl=n(),f=M("ul"),f.innerHTML=Ue,Jl=n(),Z=M("p"),Z.innerHTML=ye,wl=n(),o(I.$$.fragment),hl=n(),C=M("p"),C.innerHTML=re,bl=n(),o(E.$$.fragment),fl=n(),B=M("p"),B.innerHTML=Te,Zl=n(),o(R.$$.fragment),Il=n(),V=M("p"),V.textContent=me,Cl=n(),_=M("p"),_.textContent=ce,El=n(),o(A.$$.fragment),Bl=n(),F=M("p"),F.innerHTML=ue,Rl=n(),o(G.$$.fragment),Vl=n(),Q=M("p"),Q.innerHTML=de,_l=n(),X=M("p"),X.innerHTML=je,Al=n(),g=M("p"),g.innerHTML=Je,Fl=n(),N=M("ul"),N.innerHTML=we,Gl=n(),W=M("p"),W.textContent=he,Ql=n(),S=M("p"),S.textContent=be,Xl=n(),o(v.$$.fragment),gl=n(),H=M("p"),H.innerHTML=fe,Nl=n(),k=M("p"),k.textContent=Ze,Wl=n(),o(x.$$.fragment),Sl=n(),z=M("p"),z.innerHTML=Ie,vl=n(),o(Y.$$.fragment),Hl=n(),o($.$$.fragment),kl=n(),L=M("p"),L.textContent=Ce,xl=n(),q=M("p"),q.textContent=Ee,zl=n(),o(P.$$.fragment),Yl=n(),D=M("p"),D.innerHTML=Be,$l=n(),o(O.$$.fragment),Ll=n(),K=M("p"),K.textContent=Re,ql=n(),ll=M("p"),ll.textContent=Ve,Pl=n(),o(el.$$.fragment),Dl=n(),tl=M("p"),tl.textContent=_e,Ol=n(),sl=M("p"),sl.innerHTML=Ae,Kl=n(),o(nl.$$.fragment),le=n(),al=M("p"),al.innerHTML=Fe,ee=n(),o(Ml.$$.fragment),te=n(),il=M("p"),il.innerHTML=Ge,se=n(),Ul=M("p"),this.h()},l(l){const e=He("svelte-u9bgzb",document.head);u=i(e,"META",{name:!0,content:!0}),e.forEach(t),yl=a(l),ol=i(l,"P",{}),Qe(ol).forEach(t),rl=a(l),U(d.$$.fragment,l),Tl=a(l),j=i(l,"P",{"data-svelte-h":!0}),p(j)!=="svelte-61i5q2"&&(j.textContent=Me),ml=a(l),J=i(l,"P",{"data-svelte-h":!0}),p(J)!=="svelte-1vtxnz9"&&(J.innerHTML=ie),cl=a(l),U(w.$$.fragment,l),ul=a(l),h=i(l,"P",{"data-svelte-h":!0}),p(h)!=="svelte-43r1bv"&&(h.innerHTML=pe),dl=a(l),b=i(l,"P",{"data-svelte-h":!0}),p(b)!=="svelte-1ktungo"&&(b.textContent=oe),jl=a(l),f=i(l,"UL",{"data-svelte-h":!0}),p(f)!=="svelte-1efvabb"&&(f.innerHTML=Ue),Jl=a(l),Z=i(l,"P",{"data-svelte-h":!0}),p(Z)!=="svelte-ea5rfz"&&(Z.innerHTML=ye),wl=a(l),U(I.$$.fragment,l),hl=a(l),C=i(l,"P",{"data-svelte-h":!0}),p(C)!=="svelte-1uki1hp"&&(C.innerHTML=re),bl=a(l),U(E.$$.fragment,l),fl=a(l),B=i(l,"P",{"data-svelte-h":!0}),p(B)!=="svelte-25b86y"&&(B.innerHTML=Te),Zl=a(l),U(R.$$.fragment,l),Il=a(l),V=i(l,"P",{"data-svelte-h":!0}),p(V)!=="svelte-1e4z25x"&&(V.textContent=me),Cl=a(l),_=i(l,"P",{"data-svelte-h":!0}),p(_)!=="svelte-13ur3kw"&&(_.textContent=ce),El=a(l),U(A.$$.fragment,l),Bl=a(l),F=i(l,"P",{"data-svelte-h":!0}),p(F)!=="svelte-rgxks7"&&(F.innerHTML=ue),Rl=a(l),U(G.$$.fragment,l),Vl=a(l),Q=i(l,"P",{"data-svelte-h":!0}),p(Q)!=="svelte-1iw1mwz"&&(Q.innerHTML=de),_l=a(l),X=i(l,"P",{"data-svelte-h":!0}),p(X)!=="svelte-dh3p5f"&&(X.innerHTML=je),Al=a(l),g=i(l,"P",{"data-svelte-h":!0}),p(g)!=="svelte-1l7hprp"&&(g.innerHTML=Je),Fl=a(l),N=i(l,"UL",{"data-svelte-h":!0}),p(N)!=="svelte-qjcfm2"&&(N.innerHTML=we),Gl=a(l),W=i(l,"P",{"data-svelte-h":!0}),p(W)!=="svelte-1xxdciy"&&(W.textContent=he),Ql=a(l),S=i(l,"P",{"data-svelte-h":!0}),p(S)!=="svelte-1v29c1g"&&(S.textContent=be),Xl=a(l),U(v.$$.fragment,l),gl=a(l),H=i(l,"P",{"data-svelte-h":!0}),p(H)!=="svelte-1loxsby"&&(H.innerHTML=fe),Nl=a(l),k=i(l,"P",{"data-svelte-h":!0}),p(k)!=="svelte-pvqso5"&&(k.textContent=Ze),Wl=a(l),U(x.$$.fragment,l),Sl=a(l),z=i(l,"P",{"data-svelte-h":!0}),p(z)!=="svelte-2cag06"&&(z.innerHTML=Ie),vl=a(l),U(Y.$$.fragment,l),Hl=a(l),U($.$$.fragment,l),kl=a(l),L=i(l,"P",{"data-svelte-h":!0}),p(L)!=="svelte-46jexq"&&(L.textContent=Ce),xl=a(l),q=i(l,"P",{"data-svelte-h":!0}),p(q)!=="svelte-9a9g75"&&(q.textContent=Ee),zl=a(l),U(P.$$.fragment,l),Yl=a(l),D=i(l,"P",{"data-svelte-h":!0}),p(D)!=="svelte-1cigzjn"&&(D.innerHTML=Be),$l=a(l),U(O.$$.fragment,l),Ll=a(l),K=i(l,"P",{"data-svelte-h":!0}),p(K)!=="svelte-vght15"&&(K.textContent=Re),ql=a(l),ll=i(l,"P",{"data-svelte-h":!0}),p(ll)!=="svelte-1sz4goc"&&(ll.textContent=Ve),Pl=a(l),U(el.$$.fragment,l),Dl=a(l),tl=i(l,"P",{"data-svelte-h":!0}),p(tl)!=="svelte-a60mly"&&(tl.textContent=_e),Ol=a(l),sl=i(l,"P",{"data-svelte-h":!0}),p(sl)!=="svelte-z9yml0"&&(sl.innerHTML=Ae),Kl=a(l),U(nl.$$.fragment,l),le=a(l),al=i(l,"P",{"data-svelte-h":!0}),p(al)!=="svelte-h2brza"&&(al.innerHTML=Fe),ee=a(l),U(Ml.$$.fragment,l),te=a(l),il=i(l,"P",{"data-svelte-h":!0}),p(il)!=="svelte-fnz57k"&&(il.innerHTML=Ge),se=a(l),Ul=i(l,"P",{}),Qe(Ul).forEach(t),this.h()},h(){Xe(u,"name","hf:doc:metadata"),Xe(u,"content",ze)},m(l,e){ke(document.head,u),s(l,yl,e),s(l,ol,e),s(l,rl,e),y(d,l,e),s(l,Tl,e),s(l,j,e),s(l,ml,e),s(l,J,e),s(l,cl,e),y(w,l,e),s(l,ul,e),s(l,h,e),s(l,dl,e),s(l,b,e),s(l,jl,e),s(l,f,e),s(l,Jl,e),s(l,Z,e),s(l,wl,e),y(I,l,e),s(l,hl,e),s(l,C,e),s(l,bl,e),y(E,l,e),s(l,fl,e),s(l,B,e),s(l,Zl,e),y(R,l,e),s(l,Il,e),s(l,V,e),s(l,Cl,e),s(l,_,e),s(l,El,e),y(A,l,e),s(l,Bl,e),s(l,F,e),s(l,Rl,e),y(G,l,e),s(l,Vl,e),s(l,Q,e),s(l,_l,e),s(l,X,e),s(l,Al,e),s(l,g,e),s(l,Fl,e),s(l,N,e),s(l,Gl,e),s(l,W,e),s(l,Ql,e),s(l,S,e),s(l,Xl,e),y(v,l,e),s(l,gl,e),s(l,H,e),s(l,Nl,e),s(l,k,e),s(l,Wl,e),y(x,l,e),s(l,Sl,e),s(l,z,e),s(l,vl,e),y(Y,l,e),s(l,Hl,e),y($,l,e),s(l,kl,e),s(l,L,e),s(l,xl,e),s(l,q,e),s(l,zl,e),y(P,l,e),s(l,Yl,e),s(l,D,e),s(l,$l,e),y(O,l,e),s(l,Ll,e),s(l,K,e),s(l,ql,e),s(l,ll,e),s(l,Pl,e),y(el,l,e),s(l,Dl,e),s(l,tl,e),s(l,Ol,e),s(l,sl,e),s(l,Kl,e),y(nl,l,e),s(l,le,e),s(l,al,e),s(l,ee,e),y(Ml,l,e),s(l,te,e),s(l,il,e),s(l,se,e),s(l,Ul,e),ne=!0},p:Ne,i(l){ne||(r(d.$$.fragment,l),r(w.$$.fragment,l),r(I.$$.fragment,l),r(E.$$.fragment,l),r(R.$$.fragment,l),r(A.$$.fragment,l),r(G.$$.fragment,l),r(v.$$.fragment,l),r(x.$$.fragment,l),r(Y.$$.fragment,l),r($.$$.fragment,l),r(P.$$.fragment,l),r(O.$$.fragment,l),r(el.$$.fragment,l),r(nl.$$.fragment,l),r(Ml.$$.fragment,l),ne=!0)},o(l){T(d.$$.fragment,l),T(w.$$.fragment,l),T(I.$$.fragment,l),T(E.$$.fragment,l),T(R.$$.fragment,l),T(A.$$.fragment,l),T(G.$$.fragment,l),T(v.$$.fragment,l),T(x.$$.fragment,l),T(Y.$$.fragment,l),T($.$$.fragment,l),T(P.$$.fragment,l),T(O.$$.fragment,l),T(el.$$.fragment,l),T(nl.$$.fragment,l),T(Ml.$$.fragment,l),ne=!1},d(l){l&&(t(yl),t(ol),t(rl),t(Tl),t(j),t(ml),t(J),t(cl),t(ul),t(h),t(dl),t(b),t(jl),t(f),t(Jl),t(Z),t(wl),t(hl),t(C),t(bl),t(fl),t(B),t(Zl),t(Il),t(V),t(Cl),t(_),t(El),t(Bl),t(F),t(Rl),t(Vl),t(Q),t(_l),t(X),t(Al),t(g),t(Fl),t(N),t(Gl),t(W),t(Ql),t(S),t(Xl),t(gl),t(H),t(Nl),t(k),t(Wl),t(Sl),t(z),t(vl),t(Hl),t(kl),t(L),t(xl),t(q),t(zl),t(Yl),t(D),t($l),t(Ll),t(K),t(ql),t(ll),t(Pl),t(Dl),t(tl),t(Ol),t(sl),t(Kl),t(le),t(al),t(ee),t(te),t(il),t(se),t(Ul)),t(u),m(d,l),m(w,l),m(I,l),m(E,l),m(R,l),m(A,l),m(G,l),m(v,l),m(x,l),m(Y,l),m($,l),m(P,l),m(O,l),m(el,l),m(nl,l),m(Ml,l)}}}const ze='{"title":"🚀 Fine-Tune Qwen3 on AWS Trainium","local":"-fine-tune-qwen3-on-aws-trainium","sections":[{"title":"1. 🛠️ Setup AWS Environment","local":"1--setup-aws-environment","sections":[],"depth":2},{"title":"2. 📊 Load and Prepare the Dataset","local":"2--load-and-prepare-the-dataset","sections":[],"depth":2},{"title":"3. 🎯 Fine-tune Qwen3 with NeuronSFTTrainer and PEFT","local":"3--fine-tune-qwen3-with-neuronsfttrainer-and-peft","sections":[],"depth":2},{"title":"4. 🔄 Consolidate and Test the Fine-Tuned Model","local":"4--consolidate-and-test-the-fine-tuned-model","sections":[],"depth":2},{"title":"5. 🤗 Push to Hugging Face Hub","local":"5--push-to-hugging-face-hub","sections":[],"depth":2}],"depth":1}';function Ye(ae){return We(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class De extends Se{constructor(u){super(),ve(this,u,Ye,xe,ge,{})}}export{De as component}; | |
Xet Storage Details
- Size:
- 38.1 kB
- Xet hash:
- 70702169cb7ee01750a536ff29a44d23f847daa7270f7b4661f57562869b511d
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.