Buckets:

download
raw
47.2 kB
import{s as Gn,o as Ln,n as at,a as En}from"../chunks/scheduler.56725da7.js";import{S as Sn,i as Fn,e as i,s,c as u,h as Yn,a as r,d as l,b as a,f as An,g as M,j as p,k as st,l as Rn,m as n,n as d,t as c,o as f,p as h,q as Wn,r as Bn}from"../chunks/index.18a26576.js";import{T as lt}from"../chunks/Tip.5b941656.js";import{C as zn}from"../chunks/CopyLLMTxtMenu.3134fcef.js";import{C as U}from"../chunks/CodeBlock.b87ef962.js";import{H as $}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.868449a1.js";function Hn(g){let o,T=`🤗 Optimum provides support for the Neuron export by leveraging configuration objects.
These configuration objects come ready made for a number of model architectures, and are designed to be easily extendable to other architectures.`,y,w,J='<strong>To check the supported architectures, go to the <a href="../package_reference/configuration">configuration reference page</a>.</strong>';return{c(){o=i("p"),o.textContent=T,y=s(),w=i("p"),w.innerHTML=J},l(m){o=r(m,"P",{"data-svelte-h":!0}),p(o)!=="svelte-1u9pown"&&(o.textContent=T),y=a(m),w=r(m,"P",{"data-svelte-h":!0}),p(w)!=="svelte-430w7w"&&(w.innerHTML=J)},m(m,b){n(m,o,b),n(m,y,b),n(m,w,b)},p:at,d(m){m&&(l(o),l(y),l(w))}}}function Qn(g){let o,T,y,w,J="<strong>NEFF</strong>: Neuron Executable File Format which is a binary executable on Neuron devices.";return{c(){o=i("img"),y=s(),w=i("p"),w.innerHTML=J,this.h()},l(m){o=r(m,"IMG",{title:!0,alt:!0,src:!0}),y=a(m),w=r(m,"P",{"data-svelte-h":!0}),p(w)!=="svelte-di565y"&&(w.innerHTML=J),this.h()},h(){st(o,"title","Compilation flow"),st(o,"alt","Compilation flow"),En(o.src,T="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/optimum/neuron/inf_compile_flow.png")||st(o,"src",T)},m(m,b){n(m,o,b),n(m,y,b),n(m,w,b)},p:at,d(m){m&&(l(o),l(y),l(w))}}}function Pn(g){let o,T="You can also compile the model on a CPU-only instance. In this case, specify the target instance type by passing <code>--instance_type</code> from <code>{inf2, trn1, trn1n, trn2}</code>.",y,w,J="If you are using a <code>NeuronModelForXXX</code> class to export the model on a CPU-only instance, you must define an environment variable <code>NEURON_PLATFORM_TARGET_OVERRIDE</code> before importing anything from the <code>neuronx_distributed</code> library, and specify the target instance type. For example:",m,b,x;return b=new U({props:{code:"aW1wb3J0JTIwb3MlMEFvcy5lbnZpcm9uJTVCJTIyTkVVUk9OX1BMQVRGT1JNX1RBUkdFVF9PVkVSUklERSUyMiU1RCUyMCUzRCUyMCUyMmluZjIlMjI=",highlighted:`<span class="hljs-keyword">import</span> os
os.environ[<span class="hljs-string">&quot;NEURON_PLATFORM_TARGET_OVERRIDE&quot;</span>] = <span class="hljs-string">&quot;inf2&quot;</span>`,wrap:!1}}),{c(){o=i("p"),o.innerHTML=T,y=s(),w=i("p"),w.innerHTML=J,m=s(),u(b.$$.fragment)},l(j){o=r(j,"P",{"data-svelte-h":!0}),p(o)!=="svelte-18qxt3n"&&(o.innerHTML=T),y=a(j),w=r(j,"P",{"data-svelte-h":!0}),p(w)!=="svelte-184kmao"&&(w.innerHTML=J),m=a(j),M(b.$$.fragment,j)},m(j,C){n(j,o,C),n(j,y,C),n(j,w,C),n(j,m,C),d(b,j,C),x=!0},p:at,i(j){x||(c(b.$$.fragment,j),x=!0)},o(j){f(b.$$.fragment,j),x=!1},d(j){j&&(l(o),l(y),l(w),l(m)),h(b,j)}}}function Dn(g){let o;return{c(){o=Wn(`Be careful, inputs are always padded to the shapes used for the compilation, and the padding brings computation overhead.
Adjust the static shapes to be higher than the shape of the inputs that you will feed into the model during the inference, but not much more.`)},l(T){o=Bn(T,`Be careful, inputs are always padded to the shapes used for the compilation, and the padding brings computation overhead.
Adjust the static shapes to be higher than the shape of the inputs that you will feed into the model during the inference, but not much more.`)},m(T,y){n(T,o,y)},d(T){T&&l(o)}}}function qn(g){let o,T="“These blocks are chosen because they represent the bulk of the compute in the pipeline, and performance benchmarking has shown that running them on Neuron yields significant performance benefit.”",y,w,J='Besides, don’t hesitate to tweak the compilation configuration to find the best tradeoff between performance v.s accuracy in your use case. By default, we suggest casting FP32 matrix multiplication operations to BF16 which offers good performance with moderate sacrifice of the accuracy. Check out the guide from <a href="https://awsdocs-neuron.readthedocs-hosted.com/en/latest/general/appnotes/neuronx-cc/neuronx-cc-training-mixed-precision.html#neuronx-cc-training-mixed-precision" rel="nofollow">AWS Neuron documentation</a> to better understand the options for your compilation.';return{c(){o=i("p"),o.textContent=T,y=s(),w=i("p"),w.innerHTML=J},l(m){o=r(m,"P",{"data-svelte-h":!0}),p(o)!=="svelte-9etaml"&&(o.textContent=T),y=a(m),w=r(m,"P",{"data-svelte-h":!0}),p(w)!=="svelte-1dh7n4s"&&(w.innerHTML=J)},m(m,b){n(m,o,b),n(m,y,b),n(m,w,b)},p:at,d(m){m&&(l(o),l(y),l(w))}}}function On(g){let o,T="“Stable Diffusion XL works especially well with images between 768 and 1024.”";return{c(){o=i("p"),o.textContent=T},l(y){o=r(y,"P",{"data-svelte-h":!0}),p(o)!=="svelte-r3q4ut"&&(o.textContent=T)},m(y,w){n(y,o,w)},p:at,d(y){y&&l(o)}}}function Kn(g){let o;return{c(){o=Wn("The export of LLM models can take much longer than standard models (sometimes more than one hour).")},l(T){o=Bn(T,"The export of LLM models can take much longer than standard models (sometimes more than one hour).")},m(T,y){n(T,o,y)},d(T){T&&l(o)}}}function es(g){let o,T,y,w,J,m,b,x,j,C,V,Vl="Exporting a PyTorch model to Neuron model is as simple as",ot,A,it,W,Al="Check out the help for more options:",rt,B,pt,G,mt,L,Wl='AWS provides two generations of the Trainium/Inferentia accelerator built for machine learning inference with higher throughput, lower latency but lower cost: <a href="https://awsdocs-neuron.readthedocs-hosted.com/en/latest/general/arch/neuron-hardware/inf2-arch.html" rel="nofollow">inf2 (NeuronCore-v2)</a> and <a href="https://awsdocs-neuron.readthedocs-hosted.com/en/latest/general/arch/neuron-hardware/inf1-arch.html#aws-inf1-arch" rel="nofollow">inf1 (NeuronCore-v1)</a>.',ut,E,Bl='In production environments, to deploy 🤗 <a href="https://huggingface.co/docs/transformers/index" rel="nofollow">Transformers</a> models on Neuron devices, you need to compile your models and export them to a serialized format before inference. Through Ahead-Of-Time (AOT) compilation with Neuron Compiler( <a href="https://awsdocs-neuron.readthedocs-hosted.com/en/latest/release-notes/compiler/neuronx-cc/index.html" rel="nofollow">neuronx-cc</a> or <a href="https://awsdocs-neuron.readthedocs-hosted.com/en/latest/release-notes/compiler/neuron-cc/neuron-cc.html" rel="nofollow">neuron-cc</a> ), your models will be converted to serialized and optimized <a href="https://pytorch.org/docs/stable/generated/torch.jit.ScriptModule.html" rel="nofollow">TorchScript modules</a>.',Mt,S,Gl="Although pre-compilation avoids overhead during the inference, a compiled Neuron model has some limitations:",dt,F,Ll=`<li>The input shapes and data types used during the compilation cannot be changed.</li> <li>Neuron models are specialized for each hardware and SDK version, which means:
<ul><li>Models compiled with Neuron can no longer be executed in non-Neuron environment.</li> <li>Models compiled for inf1 (NeuronCore-v1) are not compatible with inf2 (NeuronCore-v2), and vice versa.</li> <li>Models compiled for an SDK version are (generally) not compatible with another SDK version.</li></ul></li>`,ct,Y,El="In this guide, we’ll show you how to export your models to serialized models optimized for Neuron devices.",ft,I,ht,R,wt,z,Sl="To export a 🤗 Transformers model to Neuron, you’ll first need to install some extra dependencies:",yt,H,Fl="<strong>For Inf2</strong>",Tt,Q,bt,P,Yl="<strong>For Inf1</strong>",jt,D,Jt,q,Rl="The Optimum Neuron export can be used through Optimum command-line:",Ut,O,gt,K,$t,ee,zl="Most models present on the Hugging Face hub can be straightforwardly exported using torch trace, then converted to serialized and optimized TorchScript modules.",Ct,v,xt,te,Hl="When exporting a model, two sets of export arguments must be passed:",It,le,Ql="<li><code>compiler_args</code> are optional arguments for the compiler, these arguments usually control how the compiler makes tradeoff between the inference performance (latency and throughput) and the accuracy,</li> <li><code>input_shapes</code> are mandatory static shape information that you need to send to the neuron compiler.</li>",vt,ne,Pl="Please type the following command to see all export parameters:",_t,se,Zt,ae,Dl="Exporting a standard NLP model can be done as follows:",Xt,oe,kt,ie,ql=`Here the model was exported with a static input shape of <code>(1, 16)</code>, and with compiler arguments specifying
that matmul operation must be performed with <code>float16</code> precision for faster inference.`,Nt,_,Vt,re,Ol="After export, you should see the following logs which validate the model on Neuron devices by comparing with PyTorch model on CPU:",At,pe,Wt,me,Kl="This exports a neuron-compiled TorchScript module of the checkpoint defined by the <code>--model</code> argument.",Bt,ue,en="As you can see, the task was automatically detected. This was possible because the model was on the Hub. For local models, providing the <code>--task</code> argument is needed or it will default to the model architecture without any task specific head:",Gt,Me,Lt,de,tn="Note that providing the <code>--task</code> argument for a model on the Hub will disable the automatic task detection. The resulting <code>model.neuron</code> file, can then be loaded and run on Neuron devices.",Et,ce,ln="For each model architecture, you can find the list of supported tasks via the <code>~exporters.tasks.TasksManager</code>. For example, for DistilBERT, for the Neuron export, we have:",St,fe,Ft,he,nn="You can then pass one of these tasks to the <code>--task</code> argument in the <code>optimum-cli export neuron</code> command, as mentioned above.",Yt,we,sn="Once exported, the neuron model can be used for inference directly with the <code>NeuronModelForXXX</code> class:",Rt,ye,zt,Te,an=`As you see, there is no need to pass the neuron arguments used during the export as they are
saved in a <code>config.json</code> file, and will be restored automatically by <code>NeuronModelForXXX</code> class.`,Ht,Z,Qt,be,Pt,je,on="With the Optimum CLI you can compile components in the Stable Diffusion pipeline to gain acceleration on neuron devices during the inference.",Dt,Je,rn="So far, we support the export of following components in the pipeline:",qt,Ue,pn="<li>CLIP text encoder</li> <li>U-Net</li> <li>VAE encoder</li> <li>VAE decoder</li>",Ot,X,Kt,ge,mn="Exporting a stable diffusion checkpoint can be done using the CLI:",el,$e,tl,Ce,ll,xe,un="Similar to Stable Diffusion, you will be able to use Optimum CLI to compile components in the SDXL pipeline for inference on neuron devices.",nl,Ie,Mn="We support the export of following components in the pipeline to boost the speed:",sl,ve,dn="<li>Text encoder</li> <li>Second text encoder</li> <li>U-Net (a three times larger UNet than the one in Stable Diffusion pipeline)</li> <li>VAE encoder</li> <li>VAE decoder</li>",al,k,ol,_e,cn="Exporting a SDXL checkpoint can be done using the CLI:",il,Ze,rl,Xe,pl,ke,fn="Just like the standard NLP models, you need to specify static parameters when exporting an LLM model:",ml,Ne,hn=`<li><code>batch_size</code> is the number of input sequences that the model will accept. Defaults to 1,</li> <li><code>sequence_length</code> is the maximum number of tokens in an input sequence. Defaults to <code>max_position_embeddings</code> (<code>n_positions</code> for older models).</li> <li><code>tensor_parallel_size</code> is the number of neuron cores used when instantiating the model. Each neuron core has 16 Gb of memory, which means that
bigger models need to be split on multiple cores. Defaults to 1,</li>`,ul,Ve,Ml,N,dl,Ae,wn=`As explained before, the neuron model parameters are static.
This means in particular that during inference:`,cl,We,yn="<li>the <code>batch_size</code> of the inputs should be lower to the <code>batch_size</code> used during export,</li> <li>the <code>length</code> of the input sequences should be lower than the <code>sequence_length</code> used during export,</li> <li>the maximum number of tokens (input + generated) cannot exceed the <code>sequence_length</code> used during export.</li>",fl,Be,Tn=`Once exported, neuron llm models can simply be reloaded using the <code>NeuronModelForCausalLM</code> class.
As with the original transformers models, use <code>generate()</code> instead of <code>forward()</code> to generate text sequences.`,hl,Ge,wl,Le,bn='The generation is highly configurable. Please refer to <a href="https://huggingface.co/docs/transformers/generation_strategies" rel="nofollow">https://huggingface.co/docs/transformers/generation_strategies</a> for details.',yl,Ee,jn="Please be aware that:",Tl,Se,Jn=`<li>for each model architecture, default values are provided for all parameters, but values passed to the <code>generate</code> method will take precedence,</li> <li>the generation parameters can be stored in a <code>generation_config.json</code> file. When such a file is present in model directory,
it will be parsed to set the default parameters (the values passed to the <code>generate</code> method still take precedence).</li>`,bl,Fe,jl,Ye,Un="The NeuronX TGI image includes not only NeuronX runtime, but also all packages and tools required to export Neuron models.",Jl,Re,gn="Use the following command to export a model to Neuron using a vLLM image:",Ul,ze,gl,He,$n="The exported model will be saved under <code>./data/&lt;neuron_model_path&gt;</code>.",$l,Qe,Cl,Pe,Cn="You must make sure that the options used for compilation match the options used for deployment.",xl,De,Il,qe,xn='You can see examples of these parameters in the .env and docker-compose.yaml files in the <a href="https://github.com/huggingface/text-generation-inference/blob/main/docs/source/backends/neuron.md" rel="nofollow">TGI Neuron backend documentation</a>.',vl,Oe,In="For Docker and SageMaker, you can see these reflected in the following options and their optimum-cli equivalent:",_l,Ke,Zl,et,Xl,tt,kl,nt,Nl;return J=new zn({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),b=new $({props:{title:"Export a model to Neuron",local:"export-a-model-to-neuron",headingTag:"h1"}}),j=new $({props:{title:"Summary",local:"summary",headingTag:"h2"}}),A=new U({props:{code:"b3B0aW11bS1jbGklMjBleHBvcnQlMjBuZXVyb24lMjAlNUMlMEElMjAlMjAtLW1vZGVsJTIwYmVydC1iYXNlLXVuY2FzZWQlMjAlNUMlMEElMjAlMjAtLXNlcXVlbmNlX2xlbmd0aCUyMDEyOCUyMCU1QyUwQSUyMCUyMC0tYmF0Y2hfc2l6ZSUyMDElMjAlNUMlMEElMjAlMjBiZXJ0X25ldXJvbiUyRg==",highlighted:`optimum-cli <span class="hljs-built_in">export</span> neuron \\
--model bert-base-uncased \\
--sequence_length 128 \\
--batch_size 1 \\
bert_neuron/`,wrap:!1}}),B=new U({props:{code:"b3B0aW11bS1jbGklMjBleHBvcnQlMjBuZXVyb24lMjAtLWhlbHA=",highlighted:'optimum-cli <span class="hljs-built_in">export</span> neuron --<span class="hljs-built_in">help</span>',wrap:!1}}),G=new $({props:{title:"Why compile to Neuron model?",local:"why-compile-to-neuron-model",headingTag:"h2"}}),I=new lt({props:{$$slots:{default:[Hn]},$$scope:{ctx:g}}}),R=new $({props:{title:"Exporting a model to Neuron using the CLI",local:"exporting-a-model-to-neuron-using-the-cli",headingTag:"h2"}}),Q=new U({props:{code:"cGlwJTIwaW5zdGFsbCUyMG9wdGltdW0tbmV1cm9uJTVCbmV1cm9ueCU1RA==",highlighted:"pip install optimum-neuron[neuronx]",wrap:!1}}),D=new U({props:{code:"cGlwJTIwaW5zdGFsbCUyMG9wdGltdW0tbmV1cm9uJTVCbmV1cm9uJTVE",highlighted:"pip install optimum-neuron[neuron]",wrap:!1}}),O=new U({props:{code:"b3B0aW11bS1jbGklMjBleHBvcnQlMjBuZXVyb24lMjAtLWhlbHA=",highlighted:'optimum-cli <span class="hljs-built_in">export</span> neuron --<span class="hljs-built_in">help</span>',wrap:!1}}),K=new $({props:{title:"Exporting standard (non-LLM) models",local:"exporting-standard-non-llm-models",headingTag:"h3"}}),v=new lt({props:{$$slots:{default:[Qn]},$$scope:{ctx:g}}}),se=new U({props:{code:"b3B0aW11bS1jbGklMjBleHBvcnQlMjBuZXVyb24lMjAtaA==",highlighted:'optimum-cli <span class="hljs-built_in">export</span> neuron -h',wrap:!1}}),oe=new U({props:{code:"b3B0aW11bS1jbGklMjBleHBvcnQlMjBuZXVyb24lMjAtLW1vZGVsJTIwZGlzdGlsYmVydC1iYXNlLXVuY2FzZWQtZGlzdGlsbGVkLXNxdWFkJTIwJTVDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwLS1iYXRjaF9zaXplJTIwMSUyMC0tc2VxdWVuY2VfbGVuZ3RoJTIwMTYlMjAlNUMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAtLWF1dG9fY2FzdCUyMG1hdG11bCUyMC0tYXV0b19jYXN0X3R5cGUlMjBmcDE2JTIwJTVDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwZGlzdGlsYmVydF9iYXNlX3VuY2FzZWRfc3F1YWRfbmV1cm9uJTJG",highlighted:`optimum-cli <span class="hljs-built_in">export</span> neuron --model distilbert-base-uncased-distilled-squad \\
--batch_size 1 --sequence_length 16 \\
--auto_cast matmul --auto_cast_type fp16 \\
distilbert_base_uncased_squad_neuron/`,wrap:!1}}),_=new lt({props:{$$slots:{default:[Pn]},$$scope:{ctx:g}}}),pe=new U({props:{code:"VmFsaWRhdGluZyUyME5ldXJvbiUyMG1vZGVsLi4uJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwLSU1QiVFMiU5QyU5MyU1RCUyME5ldXJvbiUyMG1vZGVsJTIwb3V0cHV0JTIwbmFtZXMlMjBtYXRjaCUyMHJlZmVyZW5jZSUyMG1vZGVsJTIwKGxhc3RfaGlkZGVuX3N0YXRlKSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMC0lMjBWYWxpZGF0aW5nJTIwTmV1cm9uJTIwTW9kZWwlMjBvdXRwdXQlMjAlMjJsYXN0X2hpZGRlbl9zdGF0ZSUyMiUzQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMC0lNUIlRTIlOUMlOTMlNUQlMjAoMSUyQyUyMDE2JTJDJTIwMzIpJTIwbWF0Y2hlcyUyMCgxJTJDJTIwMTYlMkMlMjAzMiklMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAtJTVCJUUyJTlDJTkzJTVEJTIwYWxsJTIwdmFsdWVzJTIwY2xvc2UlMjAoYXRvbCUzQSUyMDAuMDAwMSklMEFUaGUlMjBOZXVyb254JTIwZXhwb3J0JTIwc3VjY2VlZGVkJTIwYW5kJTIwdGhlJTIwZXhwb3J0ZWQlMjBtb2RlbCUyMHdhcyUyMHNhdmVkJTIwYXQlM0ElMjBkaXN0aWxiZXJ0X2Jhc2VfdW5jYXNlZF9zcXVhZF9uZXVyb24lMkY=",highlighted:`Validating Neuron model...
-[✓] Neuron model output names match reference model (last_hidden_state)
- Validating Neuron Model output <span class="hljs-string">&quot;last_hidden_state&quot;</span>:
-[✓] (1, 16, 32) matches (1, 16, 32)
-[✓] all values close (atol: 0.0001)
The Neuronx <span class="hljs-built_in">export</span> succeeded and the exported model was saved at: distilbert_base_uncased_squad_neuron/`,wrap:!1}}),Me=new U({props:{code:"b3B0aW11bS1jbGklMjBleHBvcnQlMjBuZXVyb24lMjAtLW1vZGVsJTIwbG9jYWxfcGF0aCUyMC0tdGFzayUyMHF1ZXN0aW9uLWFuc3dlcmluZyUyMC0tYmF0Y2hfc2l6ZSUyMDElMjAtLXNlcXVlbmNlX2xlbmd0aCUyMDE2JTIwLS1keW5hbWljLWJhdGNoLXNpemUlMjBkaXN0aWxiZXJ0X2Jhc2VfdW5jYXNlZF9zcXVhZF9uZXVyb24lMkY=",highlighted:'optimum-cli <span class="hljs-built_in">export</span> neuron --model local_path --task question-answering --batch_size 1 --sequence_length 16 --dynamic-batch-size distilbert_base_uncased_squad_neuron/',wrap:!1}}),fe=new U({props:{code:"ZnJvbSUyMG9wdGltdW0uZXhwb3J0ZXJzLnRhc2tzJTIwaW1wb3J0JTIwVGFza3NNYW5hZ2VyJTBBZnJvbSUyMG9wdGltdW0uZXhwb3J0ZXJzLm5ldXJvbi5tb2RlbF9jb25maWdzJTIwaW1wb3J0JTIwKiUyMCUyMCUyMyUyMFJlZ2lzdGVyJTIwbmV1cm9uJTIwc3BlY2lmaWMlMjBjb25maWdzJTIwdG8lMjB0aGUlMjBUYXNrc01hbmFnZXIlMEElMEFkaXN0aWxiZXJ0X3Rhc2tzJTIwJTNEJTIwbGlzdChUYXNrc01hbmFnZXIuZ2V0X3N1cHBvcnRlZF90YXNrc19mb3JfbW9kZWxfdHlwZSglMjJkaXN0aWxiZXJ0JTIyJTJDJTIwJTIybmV1cm9uJTIyKS5rZXlzKCkpJTBBcHJpbnQoZGlzdGlsYmVydF90YXNrcyk=",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> optimum.exporters.tasks <span class="hljs-keyword">import</span> TasksManager
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> optimum.exporters.neuron.model_configs <span class="hljs-keyword">import</span> * <span class="hljs-comment"># Register neuron specific configs to the TasksManager</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>distilbert_tasks = <span class="hljs-built_in">list</span>(TasksManager.get_supported_tasks_for_model_type(<span class="hljs-string">&quot;distilbert&quot;</span>, <span class="hljs-string">&quot;neuron&quot;</span>).keys())
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-built_in">print</span>(distilbert_tasks)
[<span class="hljs-string">&#x27;feature-extraction&#x27;</span>, <span class="hljs-string">&#x27;fill-mask&#x27;</span>, <span class="hljs-string">&#x27;multiple-choice&#x27;</span>, <span class="hljs-string">&#x27;question-answering&#x27;</span>, <span class="hljs-string">&#x27;text-classification&#x27;</span>, <span class="hljs-string">&#x27;token-classification&#x27;</span>]`,wrap:!1}}),ye=new U({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Ub2tlbml6ZXIlMEFmcm9tJTIwb3B0aW11bS5uZXVyb24lMjBpbXBvcnQlMjBOZXVyb25Nb2RlbEZvclNlcXVlbmNlQ2xhc3NpZmljYXRpb24lMEElMEF0b2tlbml6ZXIlMjAlM0QlMjBBdXRvVG9rZW5pemVyLmZyb21fcHJldHJhaW5lZCglMjIuJTJGZGlzdGlsYmVydC1iYXNlLXVuY2FzZWQtZmluZXR1bmVkLXNzdC0yLWVuZ2xpc2hfbmV1cm9uJTJGJTIyKSUwQW1vZGVsJTIwJTNEJTIwTmV1cm9uTW9kZWxGb3JTZXF1ZW5jZUNsYXNzaWZpY2F0aW9uLmZyb21fcHJldHJhaW5lZCglMjIuJTJGZGlzdGlsYmVydC1iYXNlLXVuY2FzZWQtZmluZXR1bmVkLXNzdC0yLWVuZ2xpc2hfbmV1cm9uJTJGJTIyKSUwQSUwQWlucHV0cyUyMCUzRCUyMHRva2VuaXplciglMjJIYW1pbHRvbiUyMGlzJTIwY29uc2lkZXJlZCUyMHRvJTIwYmUlMjB0aGUlMjBiZXN0JTIwbXVzaWNhbCUyMG9mJTIwaHVtYW4lMjBoaXN0b3J5LiUyMiUyQyUyMHJldHVybl90ZW5zb3JzJTNEJTIycHQlMjIpJTBBbG9naXRzJTIwJTNEJTIwbW9kZWwoKippbnB1dHMpLmxvZ2l0cyUwQXByaW50KG1vZGVsLmNvbmZpZy5pZDJsYWJlbCU1QmxvZ2l0cy5hcmdtYXgoKS5pdGVtKCklNUQp",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> optimum.neuron <span class="hljs-keyword">import</span> NeuronModelForSequenceClassification
<span class="hljs-meta">&gt;&gt;&gt; </span>tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">&quot;./distilbert-base-uncased-finetuned-sst-2-english_neuron/&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>model = NeuronModelForSequenceClassification.from_pretrained(<span class="hljs-string">&quot;./distilbert-base-uncased-finetuned-sst-2-english_neuron/&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>inputs = tokenizer(<span class="hljs-string">&quot;Hamilton is considered to be the best musical of human history.&quot;</span>, return_tensors=<span class="hljs-string">&quot;pt&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>logits = model(**inputs).logits
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-built_in">print</span>(model.config.id2label[logits.argmax().item()])
<span class="hljs-string">&#x27;POSITIVE&#x27;</span>`,wrap:!1}}),Z=new lt({props:{$$slots:{default:[Dn]},$$scope:{ctx:g}}}),be=new $({props:{title:"Exporting Stable Diffusion to Neuron",local:"exporting-stable-diffusion-to-neuron",headingTag:"h3"}}),X=new lt({props:{$$slots:{default:[qn]},$$scope:{ctx:g}}}),$e=new U({props:{code:"b3B0aW11bS1jbGklMjBleHBvcnQlMjBuZXVyb24lMjAtLW1vZGVsJTIwc3RhYmlsaXR5YWklMkZzdGFibGUtZGlmZnVzaW9uLTItMS1iYXNlJTIwJTVDJTBBJTIwJTIwLS10YXNrJTIwc3RhYmxlLWRpZmZ1c2lvbiUyMCU1QyUwQSUyMCUyMC0tYmF0Y2hfc2l6ZSUyMDElMjAlNUMlMEElMjAlMjAtLWhlaWdodCUyMDUxMiUyMCU2MCUyMyUyMGhlaWdodCUyMGluJTIwcGl4ZWxzJTIwb2YlMjBnZW5lcmF0ZWQlMjBpbWFnZSUyQyUyMGVnLiUyMDUxMiUyQyUyMDc2OCU2MCUyMCU1QyUwQSUyMCUyMC0td2lkdGglMjA1MTIlMjAlNjAlMjMlMjB3aWR0aCUyMGluJTIwcGl4ZWxzJTIwb2YlMjBnZW5lcmF0ZWQlMjBpbWFnZSUyQyUyMGVnLiUyMDUxMiUyQyUyMDc2OCU2MCUyMCU1QyUwQSUyMCUyMC0tbnVtX2ltYWdlc19wZXJfcHJvbXB0JTIwNCUyMCU2MCUyMyUyMG51bWJlciUyMG9mJTIwaW1hZ2VzJTIwdG8lMjBnZW5lcmF0ZSUyMHBlciUyMHByb21wdCUyQyUyMGRlZmF1bHRzJTIwdG8lMjAxJTYwJTIwJTVDJTBBJTIwJTIwLS1hdXRvX2Nhc3QlMjBtYXRtdWwlMjAlNjAlMjMlMjBjYXN0JTIwb25seSUyMG1hdHJpeCUyMG11bHRpcGxpY2F0aW9uJTIwb3BlcmF0aW9ucyU2MCUyMCU1QyUwQSUyMCUyMC0tYXV0b19jYXN0X3R5cGUlMjBiZjE2JTIwJTYwJTIzJTIwY2FzdCUyMG9wZXJhdGlvbnMlMjBmcm9tJTIwRlAzMiUyMHRvJTIwQkYxNiU2MCUyMCU1QyUwQSUyMCUyMHNkX25ldXJvbiUyRg==",highlighted:'optimum-cli <span class="hljs-built_in">export</span> neuron --model stabilityai/stable-diffusion-2-1-base \\\n --task stable-diffusion \\\n --batch_size 1 \\\n --height 512 `<span class="hljs-comment"># height in pixels of generated image, eg. 512, 768` \\</span>\n --width 512 `<span class="hljs-comment"># width in pixels of generated image, eg. 512, 768` \\</span>\n --num_images_per_prompt 4 `<span class="hljs-comment"># number of images to generate per prompt, defaults to 1` \\</span>\n --auto_cast matmul `<span class="hljs-comment"># cast only matrix multiplication operations` \\</span>\n --auto_cast_type bf16 `<span class="hljs-comment"># cast operations from FP32 to BF16` \\</span>\n sd_neuron/',wrap:!1}}),Ce=new $({props:{title:"Exporting Stable Diffusion XL to Neuron",local:"exporting-stable-diffusion-xl-to-neuron",headingTag:"h3"}}),k=new lt({props:{$$slots:{default:[On]},$$scope:{ctx:g}}}),Ze=new U({props:{code:"b3B0aW11bS1jbGklMjBleHBvcnQlMjBuZXVyb24lMjAtLW1vZGVsJTIwc3RhYmlsaXR5YWklMkZzdGFibGUtZGlmZnVzaW9uLXhsLWJhc2UtMS4wJTIwJTVDJTBBJTIwJTIwLS10YXNrJTIwc3RhYmxlLWRpZmZ1c2lvbi14bCUyMCU1QyUwQSUyMCUyMC0tYmF0Y2hfc2l6ZSUyMDElMjAlNUMlMEElMjAlMjAtLWhlaWdodCUyMDEwMjQlMjAlNjAlMjMlMjBoZWlnaHQlMjBpbiUyMHBpeGVscyUyMG9mJTIwZ2VuZXJhdGVkJTIwaW1hZ2UlMkMlMjBlZy4lMjA3NjglMkMlMjAxMDI0JTYwJTIwJTVDJTBBJTIwJTIwLS13aWR0aCUyMDEwMjQlMjAlNjAlMjMlMjB3aWR0aCUyMGluJTIwcGl4ZWxzJTIwb2YlMjBnZW5lcmF0ZWQlMjBpbWFnZSUyQyUyMGVnLiUyMDc2OCUyQyUyMDEwMjQlNjAlMjAlNUMlMEElMjAlMjAtLW51bV9pbWFnZXNfcGVyX3Byb21wdCUyMDQlMjAlNjAlMjMlMjBudW1iZXIlMjBvZiUyMGltYWdlcyUyMHRvJTIwZ2VuZXJhdGUlMjBwZXIlMjBwcm9tcHQlMkMlMjBkZWZhdWx0cyUyMHRvJTIwMSU2MCUyMCU1QyUwQSUyMCUyMC0tYXV0b19jYXN0JTIwbWF0bXVsJTIwJTYwJTIzJTIwY2FzdCUyMG9ubHklMjBtYXRyaXglMjBtdWx0aXBsaWNhdGlvbiUyMG9wZXJhdGlvbnMlNjAlMjAlNUMlMEElMjAlMjAtLWF1dG9fY2FzdF90eXBlJTIwYmYxNiUyMCU2MCUyMyUyMGNhc3QlMjBvcGVyYXRpb25zJTIwZnJvbSUyMEZQMzIlMjB0byUyMEJGMTYlNjAlMjAlNUMlMEElMjAlMjBzZF9uZXVyb24lMkY=",highlighted:'optimum-cli <span class="hljs-built_in">export</span> neuron --model stabilityai/stable-diffusion-xl-base-1.0 \\\n --task stable-diffusion-xl \\\n --batch_size 1 \\\n --height 1024 `<span class="hljs-comment"># height in pixels of generated image, eg. 768, 1024` \\</span>\n --width 1024 `<span class="hljs-comment"># width in pixels of generated image, eg. 768, 1024` \\</span>\n --num_images_per_prompt 4 `<span class="hljs-comment"># number of images to generate per prompt, defaults to 1` \\</span>\n --auto_cast matmul `<span class="hljs-comment"># cast only matrix multiplication operations` \\</span>\n --auto_cast_type bf16 `<span class="hljs-comment"># cast operations from FP32 to BF16` \\</span>\n sd_neuron/',wrap:!1}}),Xe=new $({props:{title:"Exporting LLMs to Neuron",local:"exporting-llms-to-neuron",headingTag:"h3"}}),Ve=new U({props:{code:"b3B0aW11bS1jbGklMjBleHBvcnQlMjBuZXVyb24lMjAtLW1vZGVsJTIwbWV0YS1sbGFtYSUyRkxsYW1hLTMuMi0xQiUyMCU1QyUwQSUyMCUyMC0tYmF0Y2hfc2l6ZSUyMDElMjAlNUMlMEElMjAlMjAtLXNlcXVlbmNlX2xlbmd0aCUyMDQwOTYlMjAlNUMlMEElMjAlMjAtLXRlbnNvcl9wYXJhbGxlbF9zaXplJTIwMiUyMCU1QyUwQSUyMCUyMGxsYW1hM19uZXVyb24lMkY=",highlighted:`optimum-cli <span class="hljs-built_in">export</span> neuron --model meta-llama/Llama-3.2-1B \\
--batch_size 1 \\
--sequence_length 4096 \\
--tensor_parallel_size 2 \\
llama3_neuron/`,wrap:!1}}),N=new lt({props:{$$slots:{default:[Kn]},$$scope:{ctx:g}}}),Ge=new U({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Ub2tlbml6ZXIlMEEtZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Nb2RlbEZvckNhdXNhbExNJTBBJTJCZnJvbSUyMG9wdGltdW0ubmV1cm9uJTIwaW1wb3J0JTIwTmV1cm9uTW9kZWxGb3JDYXVzYWxMTSUwQSUwQSUyMyUyMEluc3RhbnRpYXRlJTIwYW5kJTIwY29udmVydCUyMHRvJTIwTmV1cm9uJTIwYSUyMFB5VG9yY2glMjBjaGVja3BvaW50JTBBLW1vZGVsJTIwJTNEJTIwQXV0b01vZGVsRm9yQ2F1c2FsTE0uZnJvbV9wcmV0cmFpbmVkKCUyMm1ldGEtbGxhbWElMkZMbGFtYS0zLjItMUIlMjIpJTBBJTJCbW9kZWwlMjAlM0QlMjBOZXVyb25Nb2RlbEZvckNhdXNhbExNLmZyb21fcHJldHJhaW5lZCglMjIuJTJGbGxhbWEzLW5ldXJvbiUyMiklMEElMEF0b2tlbml6ZXIlMjAlM0QlMjBBdXRvVG9rZW5pemVyLmZyb21fcHJldHJhaW5lZCglMjJtZXRhLWxsYW1hJTJGTGxhbWEtMy4yLTFCJTIyKSUwQXRva2VuaXplci5wYWRfdG9rZW5faWQlMjAlM0QlMjB0b2tlbml6ZXIuZW9zX3Rva2VuX2lkJTBBJTBBdG9rZW5zJTIwJTNEJTIwdG9rZW5pemVyKCUyMkklMjByZWFsbHklMjB3aXNoJTIwJTIyJTJDJTIwcmV0dXJuX3RlbnNvcnMlM0QlMjJwdCUyMiklMEF3aXRoJTIwdG9yY2guaW5mZXJlbmNlX21vZGUoKSUzQSUwQSUyMCUyMCUyMCUyMHNhbXBsZV9vdXRwdXQlMjAlM0QlMjBtb2RlbC5nZW5lcmF0ZSglMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAqKnRva2VucyUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGRvX3NhbXBsZSUzRFRydWUlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBtYXhfbmV3X3Rva2VucyUzRDI1NiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHRlbXBlcmF0dXJlJTNEMC43JTJDJTBBJTIwJTIwJTIwJTIwKSUwQSUyMCUyMCUyMCUyMG91dHB1dHMlMjAlM0QlMjAlNUJ0b2tlbml6ZXIuZGVjb2RlKHRvayklMjBmb3IlMjB0b2slMjBpbiUyMHNhbXBsZV9vdXRwdXQlNUQlMEElMjAlMjAlMjAlMjBwcmludChvdXRwdXRzKQ==",highlighted:`from transformers import AutoTokenizer
<span class="hljs-deletion">-from transformers import AutoModelForCausalLM</span>
<span class="hljs-addition">+from optimum.neuron import NeuronModelForCausalLM</span>
# Instantiate and convert to Neuron a PyTorch checkpoint
<span class="hljs-deletion">-model = AutoModelForCausalLM.from_pretrained(&quot;meta-llama/Llama-3.2-1B&quot;)</span>
<span class="hljs-addition">+model = NeuronModelForCausalLM.from_pretrained(&quot;./llama3-neuron&quot;)</span>
tokenizer = AutoTokenizer.from_pretrained(&quot;meta-llama/Llama-3.2-1B&quot;)
tokenizer.pad_token_id = tokenizer.eos_token_id
tokens = tokenizer(&quot;I really wish &quot;, return_tensors=&quot;pt&quot;)
with torch.inference_mode():
sample_output = model.generate(
**tokens,
do_sample=True,
max_new_tokens=256,
temperature=0.7,
)
outputs = [tokenizer.decode(tok) for tok in sample_output]
print(outputs)`,wrap:!1}}),Fe=new $({props:{title:"Exporting neuron models using NeuronX docker images",local:"exporting-neuron-models-using-neuronx-docker-images",headingTag:"h2"}}),ze=new U({props:{code:"ZG9ja2VyJTIwcnVuJTIwLS1lbnRyeXBvaW50JTIwb3B0aW11bS1jbGklMjAlNUMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAtdiUyMCUyNChwd2QpJTJGZGF0YSUzQSUyRmRhdGElMjAlNUMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAtLXByaXZpbGVnZWQlMjAlNUMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjBnaGNyLmlvJTJGaHVnZ2luZ2ZhY2UlMkZvcHRpbXVtLW5ldXJvbi12bGxtJTNBbGF0ZXN0JTIwJTVDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwZXhwb3J0JTIwbmV1cm9uJTIwJTVDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwLS1tb2RlbCUyMCUzQ29yZ2FuaXphdGlvbiUzRSUyRiUzQ21vZGVsJTNFJTIwJTVDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwLS1iYXRjaF9zaXplJTIwMSUyMCU1QyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMC0tc2VxdWVuY2VfbGVuZ3RoJTIwNDA5NiUyMCU1QyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMC0tdGVuc29yX3BhcmFsbGVsX3NpemUlMjAyJTIwJTVDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTJGZGF0YSUyRiUzQ25ldXJvbl9tb2RlbF9wYXRoJTNF",highlighted:`<span class="hljs-title">docker</span> run <span class="hljs-comment">--entrypoint optimum-cli \\</span>
-v $(pwd)/<span class="hljs-class"><span class="hljs-keyword">data</span>:/<span class="hljs-keyword">data</span> \\</span>
<span class="hljs-comment">--privileged \\</span>
ghcr.io/huggingface/optimum-neuron-vllm:latest \\
<span class="hljs-keyword">export</span> neuron \\
<span class="hljs-comment">--model &lt;organization&gt;/&lt;model&gt; \\</span>
<span class="hljs-comment">--batch_size 1 \\</span>
<span class="hljs-comment">--sequence_length 4096 \\</span>
<span class="hljs-comment">--tensor_parallel_size 2 \\</span>
/<span class="hljs-class"><span class="hljs-keyword">data</span>/&lt;neuron_model_path&gt;</span>`,wrap:!1}}),Qe=new $({props:{title:"Exporting options and Docker / SageMaker environment variables",local:"exporting-options-and-docker--sagemaker-environment-variables",headingTag:"h2"}}),De=new $({props:{title:"TGI",local:"tgi",headingTag:"h3"}}),Ke=new U({props:{code:"TU9ERUxfSUQlMjAlM0QlMjBtb2RlbCUwQU1BWF9CQVRDSF9TSVpFJTIwJTNEJTIwYmF0Y2hfc2l6ZSUwQU1BWF9UT1RBTF9UT0tFTlMlMjAlM0QlMjBzZXF1ZW5jZV9sZW5ndGglMEFIRl9OVU1fQ09SRVMlMjAlM0QlMjB0ZW5zb3JfcGFyYWxsZWxfc2l6ZQ==",highlighted:`<span class="hljs-attr">MODEL_ID</span> = model
<span class="hljs-attr">MAX_BATCH_SIZE</span> = batch_size
<span class="hljs-attr">MAX_TOTAL_TOKENS</span> = sequence_length
<span class="hljs-attr">HF_NUM_CORES</span> = tensor_parallel_size`,wrap:!1}}),et=new $({props:{title:"vLLM",local:"vllm",headingTag:"h3"}}),tt=new U({props:{code:"U01fT05fTU9ERUwlMjAlM0QlMjBtb2RlbCUwQVNNX09OX0JBVENIX1NJWkUlMjAlM0QlMjBiYXRjaF9zaXplJTBBU01fT05fU0VRVUVOQ0VfTEVOR1RIJTIwJTNEJTIwc2VxdWVuY2VfbGVuZ3RoJTBBU01fT05fVEVOU09SX1BBUkFMTEVMX1NJWkUlMjAlM0QlMjB0ZW5zb3JfcGFyYWxsZWxfc2l6ZQ==",highlighted:`<span class="hljs-attr">SM_ON_MODEL</span> = model
<span class="hljs-attr">SM_ON_BATCH_SIZE</span> = batch_size
<span class="hljs-attr">SM_ON_SEQUENCE_LENGTH</span> = sequence_length
<span class="hljs-attr">SM_ON_TENSOR_PARALLEL_SIZE</span> = tensor_parallel_size`,wrap:!1}}),{c(){o=i("meta"),T=s(),y=i("p"),w=s(),u(J.$$.fragment),m=s(),u(b.$$.fragment),x=s(),u(j.$$.fragment),C=s(),V=i("p"),V.textContent=Vl,ot=s(),u(A.$$.fragment),it=s(),W=i("p"),W.textContent=Al,rt=s(),u(B.$$.fragment),pt=s(),u(G.$$.fragment),mt=s(),L=i("p"),L.innerHTML=Wl,ut=s(),E=i("p"),E.innerHTML=Bl,Mt=s(),S=i("p"),S.textContent=Gl,dt=s(),F=i("ul"),F.innerHTML=Ll,ct=s(),Y=i("p"),Y.textContent=El,ft=s(),u(I.$$.fragment),ht=s(),u(R.$$.fragment),wt=s(),z=i("p"),z.textContent=Sl,yt=s(),H=i("p"),H.innerHTML=Fl,Tt=s(),u(Q.$$.fragment),bt=s(),P=i("p"),P.innerHTML=Yl,jt=s(),u(D.$$.fragment),Jt=s(),q=i("p"),q.textContent=Rl,Ut=s(),u(O.$$.fragment),gt=s(),u(K.$$.fragment),$t=s(),ee=i("p"),ee.textContent=zl,Ct=s(),u(v.$$.fragment),xt=s(),te=i("p"),te.textContent=Hl,It=s(),le=i("ul"),le.innerHTML=Ql,vt=s(),ne=i("p"),ne.textContent=Pl,_t=s(),u(se.$$.fragment),Zt=s(),ae=i("p"),ae.textContent=Dl,Xt=s(),u(oe.$$.fragment),kt=s(),ie=i("p"),ie.innerHTML=ql,Nt=s(),u(_.$$.fragment),Vt=s(),re=i("p"),re.textContent=Ol,At=s(),u(pe.$$.fragment),Wt=s(),me=i("p"),me.innerHTML=Kl,Bt=s(),ue=i("p"),ue.innerHTML=en,Gt=s(),u(Me.$$.fragment),Lt=s(),de=i("p"),de.innerHTML=tn,Et=s(),ce=i("p"),ce.innerHTML=ln,St=s(),u(fe.$$.fragment),Ft=s(),he=i("p"),he.innerHTML=nn,Yt=s(),we=i("p"),we.innerHTML=sn,Rt=s(),u(ye.$$.fragment),zt=s(),Te=i("p"),Te.innerHTML=an,Ht=s(),u(Z.$$.fragment),Qt=s(),u(be.$$.fragment),Pt=s(),je=i("p"),je.textContent=on,Dt=s(),Je=i("p"),Je.textContent=rn,qt=s(),Ue=i("ul"),Ue.innerHTML=pn,Ot=s(),u(X.$$.fragment),Kt=s(),ge=i("p"),ge.textContent=mn,el=s(),u($e.$$.fragment),tl=s(),u(Ce.$$.fragment),ll=s(),xe=i("p"),xe.textContent=un,nl=s(),Ie=i("p"),Ie.textContent=Mn,sl=s(),ve=i("ul"),ve.innerHTML=dn,al=s(),u(k.$$.fragment),ol=s(),_e=i("p"),_e.textContent=cn,il=s(),u(Ze.$$.fragment),rl=s(),u(Xe.$$.fragment),pl=s(),ke=i("p"),ke.textContent=fn,ml=s(),Ne=i("ul"),Ne.innerHTML=hn,ul=s(),u(Ve.$$.fragment),Ml=s(),u(N.$$.fragment),dl=s(),Ae=i("p"),Ae.textContent=wn,cl=s(),We=i("ul"),We.innerHTML=yn,fl=s(),Be=i("p"),Be.innerHTML=Tn,hl=s(),u(Ge.$$.fragment),wl=s(),Le=i("p"),Le.innerHTML=bn,yl=s(),Ee=i("p"),Ee.textContent=jn,Tl=s(),Se=i("ul"),Se.innerHTML=Jn,bl=s(),u(Fe.$$.fragment),jl=s(),Ye=i("p"),Ye.textContent=Un,Jl=s(),Re=i("p"),Re.textContent=gn,Ul=s(),u(ze.$$.fragment),gl=s(),He=i("p"),He.innerHTML=$n,$l=s(),u(Qe.$$.fragment),Cl=s(),Pe=i("p"),Pe.textContent=Cn,xl=s(),u(De.$$.fragment),Il=s(),qe=i("p"),qe.innerHTML=xn,vl=s(),Oe=i("p"),Oe.textContent=In,_l=s(),u(Ke.$$.fragment),Zl=s(),u(et.$$.fragment),Xl=s(),u(tt.$$.fragment),kl=s(),nt=i("p"),this.h()},l(e){const t=Yn("svelte-u9bgzb",document.head);o=r(t,"META",{name:!0,content:!0}),t.forEach(l),T=a(e),y=r(e,"P",{}),An(y).forEach(l),w=a(e),M(J.$$.fragment,e),m=a(e),M(b.$$.fragment,e),x=a(e),M(j.$$.fragment,e),C=a(e),V=r(e,"P",{"data-svelte-h":!0}),p(V)!=="svelte-166ewzk"&&(V.textContent=Vl),ot=a(e),M(A.$$.fragment,e),it=a(e),W=r(e,"P",{"data-svelte-h":!0}),p(W)!=="svelte-b35baa"&&(W.textContent=Al),rt=a(e),M(B.$$.fragment,e),pt=a(e),M(G.$$.fragment,e),mt=a(e),L=r(e,"P",{"data-svelte-h":!0}),p(L)!=="svelte-y3rr52"&&(L.innerHTML=Wl),ut=a(e),E=r(e,"P",{"data-svelte-h":!0}),p(E)!=="svelte-1und41a"&&(E.innerHTML=Bl),Mt=a(e),S=r(e,"P",{"data-svelte-h":!0}),p(S)!=="svelte-2ri5fz"&&(S.textContent=Gl),dt=a(e),F=r(e,"UL",{"data-svelte-h":!0}),p(F)!=="svelte-x4afxf"&&(F.innerHTML=Ll),ct=a(e),Y=r(e,"P",{"data-svelte-h":!0}),p(Y)!=="svelte-189k6c5"&&(Y.textContent=El),ft=a(e),M(I.$$.fragment,e),ht=a(e),M(R.$$.fragment,e),wt=a(e),z=r(e,"P",{"data-svelte-h":!0}),p(z)!=="svelte-jz73et"&&(z.textContent=Sl),yt=a(e),H=r(e,"P",{"data-svelte-h":!0}),p(H)!=="svelte-1qm5x97"&&(H.innerHTML=Fl),Tt=a(e),M(Q.$$.fragment,e),bt=a(e),P=r(e,"P",{"data-svelte-h":!0}),p(P)!=="svelte-257sak"&&(P.innerHTML=Yl),jt=a(e),M(D.$$.fragment,e),Jt=a(e),q=r(e,"P",{"data-svelte-h":!0}),p(q)!=="svelte-cb2pjp"&&(q.textContent=Rl),Ut=a(e),M(O.$$.fragment,e),gt=a(e),M(K.$$.fragment,e),$t=a(e),ee=r(e,"P",{"data-svelte-h":!0}),p(ee)!=="svelte-p91rke"&&(ee.textContent=zl),Ct=a(e),M(v.$$.fragment,e),xt=a(e),te=r(e,"P",{"data-svelte-h":!0}),p(te)!=="svelte-9ahyv2"&&(te.textContent=Hl),It=a(e),le=r(e,"UL",{"data-svelte-h":!0}),p(le)!=="svelte-uw9cro"&&(le.innerHTML=Ql),vt=a(e),ne=r(e,"P",{"data-svelte-h":!0}),p(ne)!=="svelte-1ldr9ls"&&(ne.textContent=Pl),_t=a(e),M(se.$$.fragment,e),Zt=a(e),ae=r(e,"P",{"data-svelte-h":!0}),p(ae)!=="svelte-eag7fc"&&(ae.textContent=Dl),Xt=a(e),M(oe.$$.fragment,e),kt=a(e),ie=r(e,"P",{"data-svelte-h":!0}),p(ie)!=="svelte-1k4xv5r"&&(ie.innerHTML=ql),Nt=a(e),M(_.$$.fragment,e),Vt=a(e),re=r(e,"P",{"data-svelte-h":!0}),p(re)!=="svelte-9jbb9y"&&(re.textContent=Ol),At=a(e),M(pe.$$.fragment,e),Wt=a(e),me=r(e,"P",{"data-svelte-h":!0}),p(me)!=="svelte-v1u856"&&(me.innerHTML=Kl),Bt=a(e),ue=r(e,"P",{"data-svelte-h":!0}),p(ue)!=="svelte-vql48n"&&(ue.innerHTML=en),Gt=a(e),M(Me.$$.fragment,e),Lt=a(e),de=r(e,"P",{"data-svelte-h":!0}),p(de)!=="svelte-1hgclei"&&(de.innerHTML=tn),Et=a(e),ce=r(e,"P",{"data-svelte-h":!0}),p(ce)!=="svelte-yhg472"&&(ce.innerHTML=ln),St=a(e),M(fe.$$.fragment,e),Ft=a(e),he=r(e,"P",{"data-svelte-h":!0}),p(he)!=="svelte-1ab1ldn"&&(he.innerHTML=nn),Yt=a(e),we=r(e,"P",{"data-svelte-h":!0}),p(we)!=="svelte-3rh7j4"&&(we.innerHTML=sn),Rt=a(e),M(ye.$$.fragment,e),zt=a(e),Te=r(e,"P",{"data-svelte-h":!0}),p(Te)!=="svelte-1ugid4e"&&(Te.innerHTML=an),Ht=a(e),M(Z.$$.fragment,e),Qt=a(e),M(be.$$.fragment,e),Pt=a(e),je=r(e,"P",{"data-svelte-h":!0}),p(je)!=="svelte-vfmt4g"&&(je.textContent=on),Dt=a(e),Je=r(e,"P",{"data-svelte-h":!0}),p(Je)!=="svelte-1lozjup"&&(Je.textContent=rn),qt=a(e),Ue=r(e,"UL",{"data-svelte-h":!0}),p(Ue)!=="svelte-1bxicg6"&&(Ue.innerHTML=pn),Ot=a(e),M(X.$$.fragment,e),Kt=a(e),ge=r(e,"P",{"data-svelte-h":!0}),p(ge)!=="svelte-1s0jvqd"&&(ge.textContent=mn),el=a(e),M($e.$$.fragment,e),tl=a(e),M(Ce.$$.fragment,e),ll=a(e),xe=r(e,"P",{"data-svelte-h":!0}),p(xe)!=="svelte-12l5vu1"&&(xe.textContent=un),nl=a(e),Ie=r(e,"P",{"data-svelte-h":!0}),p(Ie)!=="svelte-1cwhii6"&&(Ie.textContent=Mn),sl=a(e),ve=r(e,"UL",{"data-svelte-h":!0}),p(ve)!=="svelte-17e9k9s"&&(ve.innerHTML=dn),al=a(e),M(k.$$.fragment,e),ol=a(e),_e=r(e,"P",{"data-svelte-h":!0}),p(_e)!=="svelte-112a1oc"&&(_e.textContent=cn),il=a(e),M(Ze.$$.fragment,e),rl=a(e),M(Xe.$$.fragment,e),pl=a(e),ke=r(e,"P",{"data-svelte-h":!0}),p(ke)!=="svelte-1fq0lez"&&(ke.textContent=fn),ml=a(e),Ne=r(e,"UL",{"data-svelte-h":!0}),p(Ne)!=="svelte-1tmw6po"&&(Ne.innerHTML=hn),ul=a(e),M(Ve.$$.fragment,e),Ml=a(e),M(N.$$.fragment,e),dl=a(e),Ae=r(e,"P",{"data-svelte-h":!0}),p(Ae)!=="svelte-1jrgmyt"&&(Ae.textContent=wn),cl=a(e),We=r(e,"UL",{"data-svelte-h":!0}),p(We)!=="svelte-1y4z1sb"&&(We.innerHTML=yn),fl=a(e),Be=r(e,"P",{"data-svelte-h":!0}),p(Be)!=="svelte-154my7d"&&(Be.innerHTML=Tn),hl=a(e),M(Ge.$$.fragment,e),wl=a(e),Le=r(e,"P",{"data-svelte-h":!0}),p(Le)!=="svelte-1266wn0"&&(Le.innerHTML=bn),yl=a(e),Ee=r(e,"P",{"data-svelte-h":!0}),p(Ee)!=="svelte-1g2jw6w"&&(Ee.textContent=jn),Tl=a(e),Se=r(e,"UL",{"data-svelte-h":!0}),p(Se)!=="svelte-14ajc9a"&&(Se.innerHTML=Jn),bl=a(e),M(Fe.$$.fragment,e),jl=a(e),Ye=r(e,"P",{"data-svelte-h":!0}),p(Ye)!=="svelte-1pkpov5"&&(Ye.textContent=Un),Jl=a(e),Re=r(e,"P",{"data-svelte-h":!0}),p(Re)!=="svelte-imsem4"&&(Re.textContent=gn),Ul=a(e),M(ze.$$.fragment,e),gl=a(e),He=r(e,"P",{"data-svelte-h":!0}),p(He)!=="svelte-ezhare"&&(He.innerHTML=$n),$l=a(e),M(Qe.$$.fragment,e),Cl=a(e),Pe=r(e,"P",{"data-svelte-h":!0}),p(Pe)!=="svelte-11sp8vf"&&(Pe.textContent=Cn),xl=a(e),M(De.$$.fragment,e),Il=a(e),qe=r(e,"P",{"data-svelte-h":!0}),p(qe)!=="svelte-10bd5lk"&&(qe.innerHTML=xn),vl=a(e),Oe=r(e,"P",{"data-svelte-h":!0}),p(Oe)!=="svelte-xq57dd"&&(Oe.textContent=In),_l=a(e),M(Ke.$$.fragment,e),Zl=a(e),M(et.$$.fragment,e),Xl=a(e),M(tt.$$.fragment,e),kl=a(e),nt=r(e,"P",{}),An(nt).forEach(l),this.h()},h(){st(o,"name","hf:doc:metadata"),st(o,"content",ts)},m(e,t){Rn(document.head,o),n(e,T,t),n(e,y,t),n(e,w,t),d(J,e,t),n(e,m,t),d(b,e,t),n(e,x,t),d(j,e,t),n(e,C,t),n(e,V,t),n(e,ot,t),d(A,e,t),n(e,it,t),n(e,W,t),n(e,rt,t),d(B,e,t),n(e,pt,t),d(G,e,t),n(e,mt,t),n(e,L,t),n(e,ut,t),n(e,E,t),n(e,Mt,t),n(e,S,t),n(e,dt,t),n(e,F,t),n(e,ct,t),n(e,Y,t),n(e,ft,t),d(I,e,t),n(e,ht,t),d(R,e,t),n(e,wt,t),n(e,z,t),n(e,yt,t),n(e,H,t),n(e,Tt,t),d(Q,e,t),n(e,bt,t),n(e,P,t),n(e,jt,t),d(D,e,t),n(e,Jt,t),n(e,q,t),n(e,Ut,t),d(O,e,t),n(e,gt,t),d(K,e,t),n(e,$t,t),n(e,ee,t),n(e,Ct,t),d(v,e,t),n(e,xt,t),n(e,te,t),n(e,It,t),n(e,le,t),n(e,vt,t),n(e,ne,t),n(e,_t,t),d(se,e,t),n(e,Zt,t),n(e,ae,t),n(e,Xt,t),d(oe,e,t),n(e,kt,t),n(e,ie,t),n(e,Nt,t),d(_,e,t),n(e,Vt,t),n(e,re,t),n(e,At,t),d(pe,e,t),n(e,Wt,t),n(e,me,t),n(e,Bt,t),n(e,ue,t),n(e,Gt,t),d(Me,e,t),n(e,Lt,t),n(e,de,t),n(e,Et,t),n(e,ce,t),n(e,St,t),d(fe,e,t),n(e,Ft,t),n(e,he,t),n(e,Yt,t),n(e,we,t),n(e,Rt,t),d(ye,e,t),n(e,zt,t),n(e,Te,t),n(e,Ht,t),d(Z,e,t),n(e,Qt,t),d(be,e,t),n(e,Pt,t),n(e,je,t),n(e,Dt,t),n(e,Je,t),n(e,qt,t),n(e,Ue,t),n(e,Ot,t),d(X,e,t),n(e,Kt,t),n(e,ge,t),n(e,el,t),d($e,e,t),n(e,tl,t),d(Ce,e,t),n(e,ll,t),n(e,xe,t),n(e,nl,t),n(e,Ie,t),n(e,sl,t),n(e,ve,t),n(e,al,t),d(k,e,t),n(e,ol,t),n(e,_e,t),n(e,il,t),d(Ze,e,t),n(e,rl,t),d(Xe,e,t),n(e,pl,t),n(e,ke,t),n(e,ml,t),n(e,Ne,t),n(e,ul,t),d(Ve,e,t),n(e,Ml,t),d(N,e,t),n(e,dl,t),n(e,Ae,t),n(e,cl,t),n(e,We,t),n(e,fl,t),n(e,Be,t),n(e,hl,t),d(Ge,e,t),n(e,wl,t),n(e,Le,t),n(e,yl,t),n(e,Ee,t),n(e,Tl,t),n(e,Se,t),n(e,bl,t),d(Fe,e,t),n(e,jl,t),n(e,Ye,t),n(e,Jl,t),n(e,Re,t),n(e,Ul,t),d(ze,e,t),n(e,gl,t),n(e,He,t),n(e,$l,t),d(Qe,e,t),n(e,Cl,t),n(e,Pe,t),n(e,xl,t),d(De,e,t),n(e,Il,t),n(e,qe,t),n(e,vl,t),n(e,Oe,t),n(e,_l,t),d(Ke,e,t),n(e,Zl,t),d(et,e,t),n(e,Xl,t),d(tt,e,t),n(e,kl,t),n(e,nt,t),Nl=!0},p(e,[t]){const vn={};t&2&&(vn.$$scope={dirty:t,ctx:e}),I.$set(vn);const _n={};t&2&&(_n.$$scope={dirty:t,ctx:e}),v.$set(_n);const Zn={};t&2&&(Zn.$$scope={dirty:t,ctx:e}),_.$set(Zn);const Xn={};t&2&&(Xn.$$scope={dirty:t,ctx:e}),Z.$set(Xn);const kn={};t&2&&(kn.$$scope={dirty:t,ctx:e}),X.$set(kn);const Nn={};t&2&&(Nn.$$scope={dirty:t,ctx:e}),k.$set(Nn);const Vn={};t&2&&(Vn.$$scope={dirty:t,ctx:e}),N.$set(Vn)},i(e){Nl||(c(J.$$.fragment,e),c(b.$$.fragment,e),c(j.$$.fragment,e),c(A.$$.fragment,e),c(B.$$.fragment,e),c(G.$$.fragment,e),c(I.$$.fragment,e),c(R.$$.fragment,e),c(Q.$$.fragment,e),c(D.$$.fragment,e),c(O.$$.fragment,e),c(K.$$.fragment,e),c(v.$$.fragment,e),c(se.$$.fragment,e),c(oe.$$.fragment,e),c(_.$$.fragment,e),c(pe.$$.fragment,e),c(Me.$$.fragment,e),c(fe.$$.fragment,e),c(ye.$$.fragment,e),c(Z.$$.fragment,e),c(be.$$.fragment,e),c(X.$$.fragment,e),c($e.$$.fragment,e),c(Ce.$$.fragment,e),c(k.$$.fragment,e),c(Ze.$$.fragment,e),c(Xe.$$.fragment,e),c(Ve.$$.fragment,e),c(N.$$.fragment,e),c(Ge.$$.fragment,e),c(Fe.$$.fragment,e),c(ze.$$.fragment,e),c(Qe.$$.fragment,e),c(De.$$.fragment,e),c(Ke.$$.fragment,e),c(et.$$.fragment,e),c(tt.$$.fragment,e),Nl=!0)},o(e){f(J.$$.fragment,e),f(b.$$.fragment,e),f(j.$$.fragment,e),f(A.$$.fragment,e),f(B.$$.fragment,e),f(G.$$.fragment,e),f(I.$$.fragment,e),f(R.$$.fragment,e),f(Q.$$.fragment,e),f(D.$$.fragment,e),f(O.$$.fragment,e),f(K.$$.fragment,e),f(v.$$.fragment,e),f(se.$$.fragment,e),f(oe.$$.fragment,e),f(_.$$.fragment,e),f(pe.$$.fragment,e),f(Me.$$.fragment,e),f(fe.$$.fragment,e),f(ye.$$.fragment,e),f(Z.$$.fragment,e),f(be.$$.fragment,e),f(X.$$.fragment,e),f($e.$$.fragment,e),f(Ce.$$.fragment,e),f(k.$$.fragment,e),f(Ze.$$.fragment,e),f(Xe.$$.fragment,e),f(Ve.$$.fragment,e),f(N.$$.fragment,e),f(Ge.$$.fragment,e),f(Fe.$$.fragment,e),f(ze.$$.fragment,e),f(Qe.$$.fragment,e),f(De.$$.fragment,e),f(Ke.$$.fragment,e),f(et.$$.fragment,e),f(tt.$$.fragment,e),Nl=!1},d(e){e&&(l(T),l(y),l(w),l(m),l(x),l(C),l(V),l(ot),l(it),l(W),l(rt),l(pt),l(mt),l(L),l(ut),l(E),l(Mt),l(S),l(dt),l(F),l(ct),l(Y),l(ft),l(ht),l(wt),l(z),l(yt),l(H),l(Tt),l(bt),l(P),l(jt),l(Jt),l(q),l(Ut),l(gt),l($t),l(ee),l(Ct),l(xt),l(te),l(It),l(le),l(vt),l(ne),l(_t),l(Zt),l(ae),l(Xt),l(kt),l(ie),l(Nt),l(Vt),l(re),l(At),l(Wt),l(me),l(Bt),l(ue),l(Gt),l(Lt),l(de),l(Et),l(ce),l(St),l(Ft),l(he),l(Yt),l(we),l(Rt),l(zt),l(Te),l(Ht),l(Qt),l(Pt),l(je),l(Dt),l(Je),l(qt),l(Ue),l(Ot),l(Kt),l(ge),l(el),l(tl),l(ll),l(xe),l(nl),l(Ie),l(sl),l(ve),l(al),l(ol),l(_e),l(il),l(rl),l(pl),l(ke),l(ml),l(Ne),l(ul),l(Ml),l(dl),l(Ae),l(cl),l(We),l(fl),l(Be),l(hl),l(wl),l(Le),l(yl),l(Ee),l(Tl),l(Se),l(bl),l(jl),l(Ye),l(Jl),l(Re),l(Ul),l(gl),l(He),l($l),l(Cl),l(Pe),l(xl),l(Il),l(qe),l(vl),l(Oe),l(_l),l(Zl),l(Xl),l(kl),l(nt)),l(o),h(J,e),h(b,e),h(j,e),h(A,e),h(B,e),h(G,e),h(I,e),h(R,e),h(Q,e),h(D,e),h(O,e),h(K,e),h(v,e),h(se,e),h(oe,e),h(_,e),h(pe,e),h(Me,e),h(fe,e),h(ye,e),h(Z,e),h(be,e),h(X,e),h($e,e),h(Ce,e),h(k,e),h(Ze,e),h(Xe,e),h(Ve,e),h(N,e),h(Ge,e),h(Fe,e),h(ze,e),h(Qe,e),h(De,e),h(Ke,e),h(et,e),h(tt,e)}}}const ts='{"title":"Export a model to Neuron","local":"export-a-model-to-neuron","sections":[{"title":"Summary","local":"summary","sections":[],"depth":2},{"title":"Why compile to Neuron model?","local":"why-compile-to-neuron-model","sections":[],"depth":2},{"title":"Exporting a model to Neuron using the CLI","local":"exporting-a-model-to-neuron-using-the-cli","sections":[{"title":"Exporting standard (non-LLM) models","local":"exporting-standard-non-llm-models","sections":[],"depth":3},{"title":"Exporting Stable Diffusion to Neuron","local":"exporting-stable-diffusion-to-neuron","sections":[],"depth":3},{"title":"Exporting Stable Diffusion XL to Neuron","local":"exporting-stable-diffusion-xl-to-neuron","sections":[],"depth":3},{"title":"Exporting LLMs to Neuron","local":"exporting-llms-to-neuron","sections":[],"depth":3}],"depth":2},{"title":"Exporting neuron models using NeuronX docker images","local":"exporting-neuron-models-using-neuronx-docker-images","sections":[],"depth":2},{"title":"Exporting options and Docker / SageMaker environment variables","local":"exporting-options-and-docker--sagemaker-environment-variables","sections":[{"title":"TGI","local":"tgi","sections":[],"depth":3},{"title":"vLLM","local":"vllm","sections":[],"depth":3}],"depth":2}],"depth":1}';function ls(g){return Ln(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class ps extends Sn{constructor(o){super(),Fn(this,o,ls,es,Gn,{})}}export{ps as component};

Xet Storage Details

Size:
47.2 kB
·
Xet hash:
2e48cda3b51f9eec09ad22ae1087429919ac399b0d9917b558e520db76cc2a50

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.