Buckets:

download
raw
35.4 kB
import{s as Ve,n as Se,o as Ge}from"../chunks/scheduler.85c25b89.js";import{S as Qe,i as Fe,g as o,s as n,r as p,A as Xe,h as i,f as l,c as a,j as ke,u,x as M,k as _e,y as He,a as s,v as c,d as r,t as h,w as m}from"../chunks/index.c9bcf812.js";import{C as d}from"../chunks/CodeBlock.c004bd26.js";import{H as ut}from"../chunks/getInferenceSnippets.5ea0a804.js";function Le(ae){let T,ct,Mt,rt,y,ht,j,oe='In this tutorial you will learn how to deploy <a href="https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct" rel="nofollow">/meta-llama/Llama-3.3-70B-Instruct</a> model on AWS Inferentia2 with Hugging Face Optimum on Amazon SageMaker. We are going to use the Hugging Face TGI Neuron Container, a purpose-built Inference Container to easily deploy LLMs on AWS Inferentia2 powered by<a href="https://huggingface.co/docs/text-generation-inference/index" rel="nofollow">Text Generation Inference</a> and <a href="https://huggingface.co/docs/optimum-neuron/index" rel="nofollow">Optimum Neuron</a>.',mt,J,ie="We will cover how to:",Tt,I,Me='<li><a href="#1-setup-development-environment">Setup development environment</a></li> <li><a href="#2-retrieve-the-new-hugging-face-tgi-neuron-dlc">Retrieve the new Hugging Face TGI Neuron DLC</a></li> <li><a href="#3-deploy-llama-33-70b-to-inferentia2">Deploy Llama 3.3 70B to inferentia2</a></li> <li><a href="#4-clean-up">Clean up</a></li>',dt,U,pe="Lets get started! 🚀",yt,w,ue='<a href="https://aws.amazon.com/ec2/instance-types/inf2/" rel="nofollow">AWS inferentia (Inf2)</a> are purpose-built EC2 for deep learning (DL) inference workloads. Here are the different instances of the Inferentia2 family.',jt,g,ce="<thead><tr><th>instance size</th> <th>accelerators</th> <th>Neuron Cores</th> <th>accelerator memory</th> <th>vCPU</th> <th>CPU Memory</th> <th>on-demand price ($/h)</th></tr></thead> <tbody><tr><td>inf2.xlarge</td> <td>1</td> <td>2</td> <td>32</td> <td>4</td> <td>16</td> <td>0.76</td></tr> <tr><td>inf2.8xlarge</td> <td>1</td> <td>2</td> <td>32</td> <td>32</td> <td>128</td> <td>1.97</td></tr> <tr><td>inf2.24xlarge</td> <td>6</td> <td>12</td> <td>192</td> <td>96</td> <td>384</td> <td>6.49</td></tr> <tr><td>inf2.48xlarge</td> <td>12</td> <td>24</td> <td>384</td> <td>192</td> <td>768</td> <td>12.98</td></tr></tbody>",Jt,f,It,b,re="For this tutorial, we are going to use a Notebook Instance in Amazon SageMaker with the Python 3 (ipykernel) and the <code>sagemaker</code> python SDK to deploy Llama 3.3 70B to a SageMaker inference endpoint.",Ut,C,he="Make sur you have the latest version of the SageMaker SDK installed.",wt,N,gt,q,me="Then, instantiate the sagemaker role and session.",ft,B,bt,E,Ct,A,Te='The latest Hugging Face TGI Neuron DLCs can be used to run inference on AWS Inferentia2. You can use the <code>get_huggingface_llm_image_uri</code> method of the <code>sagemaker</code> SDK to retrieve the appropriate Hugging Face TGI Neuron DLC URI based on your desired <code>backend</code>, <code>session</code>, <code>region</code>, and <code>version</code>. You can find the latest version of the container <a href="https://huggingface.co/docs/optimum-neuron/containers" rel="nofollow">here</a>, if not yet added to the SageMaker SDK.',Nt,W,de="At the time of the tutorial, the latest version of the container is not yet added to the Sagemaker SDK so we will not use <code>get_huggingface_llm_image_uri</code>.",qt,Z,Bt,v,Et,x,ye=`At the time of writing, <a href="https://awsdocs-neuron.readthedocs-hosted.com/en/v2.6.0/general/arch/neuron-features/dynamic-shapes.html#neuron-dynamic-shapes" rel="nofollow">AWS Inferentia2 does not support dynamic shapes for inference</a>, which means that we need to specify our sequence length and batch size ahead of time.
To make it easier for customers to utilize the full power of Inferentia2, we created a <a href="https://huggingface.co/docs/optimum-neuron/guides/cache_system" rel="nofollow">neuron model cache</a>, which contains pre-compiled configurations for the most popular LLMs, including Llama 3.3 70B.`,At,z,je='This means we don’t need to compile the model ourselves, but we can use the pre-compiled model from the cache. You can find compiled/cached configurations on the <a href="https://huggingface.co/aws-neuron/optimum-neuron-cache/tree/main/inference-cache-config" rel="nofollow">Hugging Face Hub</a>. If your desired configuration is not yet cached, you can compile it yourself using the <a href="https://huggingface.co/docs/optimum-neuron/guides/export_model" rel="nofollow">Optimum CLI</a> or open a request at the <a href="https://huggingface.co/aws-neuron/optimum-neuron-cache/discussions" rel="nofollow">Cache repository</a>.',Wt,k,Je="<strong>Deploying Llama 3.3 70B to a SageMaker Endpoint</strong>",Zt,_,Ie="Before deploying the model to Amazon SageMaker, we must define the TGI Neuron endpoint configuration. We need to make sure the following additional parameters are defined:",vt,V,Ue="<li><code>HF_NUM_CORES</code>: Number of Neuron Cores used for the compilation.</li> <li><code>HF_BATCH_SIZE</code>: The batch size that was used to compile the model.</li> <li><code>HF_SEQUENCE_LENGTH</code>: The sequence length that was used to compile the model.</li> <li><code>HF_AUTO_CAST_TYPE</code>: The auto cast type that was used to compile the model.</li>",xt,S,we="We still need to define traditional TGI parameters with:",zt,G,ge="<li><code>HF_MODEL_ID</code>: The Hugging Face model ID.</li> <li><code>HF_TOKEN</code>: The Hugging Face API token to access gated models.</li> <li><code>MAX_BATCH_SIZE</code>: The maximum batch size that the model can handle, equal to the batch size used for compilation.</li> <li><code>MAX_INPUT_TOKEN</code>: The maximum input length that the model can handle.</li> <li><code>MAX_TOTAL_TOKENS</code>: The maximum total tokens the model can generate, equal to the sequence length used for compilation.</li>",kt,Q,fe="Optionnaly, you can configure the endpoint to support chat templates:",_t,F,be="<li><code>MESSAGES_API_ENABLED</code>: Enable Messages API</li>",Vt,X,Ce="<strong>Select the right instance type</strong>",St,H,Ne='Llama 3.3 70B is a large model and requires a lot of memory. We are going to use the <code>inf2.48xlarge</code> instance type, which has 192 vCPUs and 384 GB of accelerator memory. The <code>inf2.48xlarge</code> instance comes with 12 Inferentia2 accelerators that include 24 Neuron Cores. If you want to find the cached configurations for Llama 3.3 70B, you can find them <a href="https://huggingface.co/aws-neuron/optimum-neuron-cache/blob/main/inference-cache-config/llama3-70b.json#L16" rel="nofollow">here</a>. In our case we will use a batch size of 4 and a sequence length of 4096.',Gt,L,qe='Before we can deploy Llama 3.3 70B to Inferentia2, we need to make sure we have the necessary permissions to access the model. You can request access to the model <a href="https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct" rel="nofollow">here</a> and create a User access token following this <a href="https://huggingface.co/docs/hub/en/security-tokens" rel="nofollow">guide</a>.',Qt,$,Be="After that we can create our endpoint configuration and deploy the model to Amazon SageMaker. We will deploy the endpoint with the Messages API enabled, so that it is fully compatible with the OpenAI Chat Completion API.",Ft,R,Xt,Y,Ee="After we have created the <code>HuggingFaceModel</code> we can deploy it to Amazon SageMaker using the <code>deploy</code> method. We will deploy the model with the <code>ml.inf2.48xlarge</code> instance type. TGI will automatically distribute and shard the model across all Inferentia devices.",Ht,D,Lt,P,Ae="SageMaker will now create our endpoint and deploy the model to it. It takes around 30 minutes for deployment.",$t,O,We="After our endpoint is deployed we can run inference on it. We will use the <code>predict</code> method from the <code>predictor</code> to run inference on our endpoint.",Rt,K,Ze="The endpoint supports the Messages API, which is fully compatible with the OpenAI Chat Completion API. The Messages API allows us to interact with the model in a conversational way. We can define the role of the message and the content. The role can be either <code>system</code>,<code>assistant</code> or <code>user</code>. The <code>system</code> role is used to provide context to the model and the <code>user</code> role is used to ask questions or provide input to the model.",Yt,tt,ve='Parameters can be defined as in the <code>parameters</code> attribute of the payload. Check out the chat completion <a href="https://platform.openai.com/docs/api-reference/chat/create" rel="nofollow">documentation</a> to find supported parameters.',Dt,et,Pt,lt,Ot,st,xe="Okay lets test it.",Kt,nt,te,at,ee,ot,ze="To clean up, we can delete the model and endpoint.",le,it,se,pt,ne;return y=new ut({props:{title:"Deploy Llama 3.3 70B on AWS Inferentia2",local:"deploy-llama-33-70b-on-aws-inferentia2",headingTag:"h1"}}),f=new ut({props:{title:"1. Setup development environment",local:"1-setup-development-environment",headingTag:"h2"}}),N=new d({props:{code:"IXBpcCUyMGluc3RhbGwlMjBzYWdlbWFrZXIlMjAtLXVwZ3JhZGUlMjAtLXF1aWV0",highlighted:"!pip install sagemaker --upgrade --quiet",wrap:!1}}),B=new d({props:{code:"aW1wb3J0JTIwc2FnZW1ha2VyJTBBaW1wb3J0JTIwYm90bzMlMEElMEFzZXNzJTIwJTNEJTIwc2FnZW1ha2VyLlNlc3Npb24oKSUwQSUyMyUyMHNhZ2VtYWtlciUyMHNlc3Npb24lMjBidWNrZXQlMjAtJTNFJTIwdXNlZCUyMGZvciUyMHVwbG9hZGluZyUyMGRhdGElMkMlMjBtb2RlbHMlMjBhbmQlMjBsb2dzJTBBJTIzJTIwc2FnZW1ha2VyJTIwd2lsbCUyMGF1dG9tYXRpY2FsbHklMjBjcmVhdGUlMjB0aGlzJTIwYnVja2V0JTIwaWYlMjBpdCUyMG5vdCUyMGV4aXN0cyUwQXNhZ2VtYWtlcl9zZXNzaW9uX2J1Y2tldCUyMCUzRCUyME5vbmUlMEFpZiUyMHNhZ2VtYWtlcl9zZXNzaW9uX2J1Y2tldCUyMGlzJTIwTm9uZSUyMGFuZCUyMHNlc3MlMjBpcyUyMG5vdCUyME5vbmUlM0ElMEElMjAlMjAlMjAlMjAlMjMlMjBzZXQlMjB0byUyMGRlZmF1bHQlMjBidWNrZXQlMjBpZiUyMGElMjBidWNrZXQlMjBuYW1lJTIwaXMlMjBub3QlMjBnaXZlbiUwQSUyMCUyMCUyMCUyMHNhZ2VtYWtlcl9zZXNzaW9uX2J1Y2tldCUyMCUzRCUyMHNlc3MuZGVmYXVsdF9idWNrZXQoKSUwQSUwQXRyeSUzQSUwQSUyMCUyMCUyMCUyMHJvbGUlMjAlM0QlMjBzYWdlbWFrZXIuZ2V0X2V4ZWN1dGlvbl9yb2xlKCklMEFleGNlcHQlMjBWYWx1ZUVycm9yJTNBJTBBJTIwJTIwJTIwJTIwaWFtJTIwJTNEJTIwYm90bzMuY2xpZW50KCUyMmlhbSUyMiklMEElMjAlMjAlMjAlMjByb2xlJTIwJTNEJTIwaWFtLmdldF9yb2xlKFJvbGVOYW1lJTNEJTIyc2FnZW1ha2VyX2V4ZWN1dGlvbl9yb2xlJTIyKSU1QiUyMlJvbGUlMjIlNUQlNUIlMjJBcm4lMjIlNUQlMEElMEFzZXNzJTIwJTNEJTIwc2FnZW1ha2VyLlNlc3Npb24oZGVmYXVsdF9idWNrZXQlM0RzYWdlbWFrZXJfc2Vzc2lvbl9idWNrZXQpJTBBJTBBcHJpbnQoZiUyMnNhZ2VtYWtlciUyMHJvbGUlMjBhcm4lM0ElMjAlN0Jyb2xlJTdEJTIyKSUwQXByaW50KGYlMjJzYWdlbWFrZXIlMjBzZXNzaW9uJTIwcmVnaW9uJTNBJTIwJTdCc2Vzcy5ib3RvX3JlZ2lvbl9uYW1lJTdEJTIyKQ==",highlighted:`<span class="hljs-keyword">import</span> sagemaker
<span class="hljs-keyword">import</span> boto3
sess = sagemaker.Session()
<span class="hljs-comment"># sagemaker session bucket -&gt; used for uploading data, models and logs</span>
<span class="hljs-comment"># sagemaker will automatically create this bucket if it not exists</span>
sagemaker_session_bucket = <span class="hljs-literal">None</span>
<span class="hljs-keyword">if</span> sagemaker_session_bucket <span class="hljs-keyword">is</span> <span class="hljs-literal">None</span> <span class="hljs-keyword">and</span> sess <span class="hljs-keyword">is</span> <span class="hljs-keyword">not</span> <span class="hljs-literal">None</span>:
<span class="hljs-comment"># set to default bucket if a bucket name is not given</span>
sagemaker_session_bucket = sess.default_bucket()
<span class="hljs-keyword">try</span>:
role = sagemaker.get_execution_role()
<span class="hljs-keyword">except</span> ValueError:
iam = boto3.client(<span class="hljs-string">&quot;iam&quot;</span>)
role = iam.get_role(RoleName=<span class="hljs-string">&quot;sagemaker_execution_role&quot;</span>)[<span class="hljs-string">&quot;Role&quot;</span>][<span class="hljs-string">&quot;Arn&quot;</span>]
sess = sagemaker.Session(default_bucket=sagemaker_session_bucket)
<span class="hljs-built_in">print</span>(<span class="hljs-string">f&quot;sagemaker role arn: <span class="hljs-subst">{role}</span>&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">f&quot;sagemaker session region: <span class="hljs-subst">{sess.boto_region_name}</span>&quot;</span>)`,wrap:!1}}),E=new ut({props:{title:"2. Retrieve the latest Hugging Face TGI Neuron DLC",local:"2-retrieve-the-latest-hugging-face-tgi-neuron-dlc",headingTag:"h2"}}),Z=new d({props:{code:"JTIzJTIwcHVsbGVkJTIwZnJvbSUyMGh0dHBzJTNBJTJGJTJGZ2l0aHViLmNvbSUyRmF3cyUyRnNhZ2VtYWtlci1weXRob24tc2RrJTJGYmxvYiUyRm1hc3RlciUyRnNyYyUyRnNhZ2VtYWtlciUyRmltYWdlX3VyaV9jb25maWclMkZodWdnaW5nZmFjZS1sbG0tbmV1cm9ueC5qc29uJTBBYWNjb3VudF9pZF9kaWN0JTIwJTNEJTIwJTdCJTBBJTIwJTIwJTIwJTIwJTIyYXAtbm9ydGhlYXN0LTElMjIlM0ElMjAlMjI3NjMxMDQzNTE4ODQlMjIlMkMlMEElMjAlMjAlMjAlMjAlMjJhcC1zb3V0aC0xJTIyJTNBJTIwJTIyNzYzMTA0MzUxODg0JTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIyYXAtc291dGgtMiUyMiUzQSUyMCUyMjc3MjE1MzE1ODQ1MiUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMmFwLXNvdXRoZWFzdC0xJTIyJTNBJTIwJTIyNzYzMTA0MzUxODg0JTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIyYXAtc291dGhlYXN0LTIlMjIlM0ElMjAlMjI3NjMxMDQzNTE4ODQlMjIlMkMlMEElMjAlMjAlMjAlMjAlMjJhcC1zb3V0aGVhc3QtNCUyMiUzQSUyMCUyMjQ1NzQ0NzI3NDMyMiUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMmFwLXNvdXRoZWFzdC01JTIyJTNBJTIwJTIyNTUwMjI1NDMzNDYyJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIyYXAtc291dGhlYXN0LTclMjIlM0ElMjAlMjI1OTAxODM4MTM0MzclMjIlMkMlMEElMjAlMjAlMjAlMjAlMjJjbi1ub3J0aC0xJTIyJTNBJTIwJTIyNzI3ODk3NDcxODA3JTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIyY24tbm9ydGh3ZXN0LTElMjIlM0ElMjAlMjI3Mjc4OTc0NzE4MDclMjIlMkMlMEElMjAlMjAlMjAlMjAlMjJldS1jZW50cmFsLTElMjIlM0ElMjAlMjI3NjMxMDQzNTE4ODQlMjIlMkMlMEElMjAlMjAlMjAlMjAlMjJldS1jZW50cmFsLTIlMjIlM0ElMjAlMjIzODA0MjA4MDk2ODglMjIlMkMlMEElMjAlMjAlMjAlMjAlMjJldS1zb3V0aC0yJTIyJTNBJTIwJTIyNTAzMjI3Mzc2Nzg1JTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIyZXUtd2VzdC0xJTIyJTNBJTIwJTIyNzYzMTA0MzUxODg0JTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIyZXUtd2VzdC0zJTIyJTNBJTIwJTIyNzYzMTA0MzUxODg0JTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIyaWwtY2VudHJhbC0xJTIyJTNBJTIwJTIyNzgwNTQzMDIyMTI2JTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIybXgtY2VudHJhbC0xJTIyJTNBJTIwJTIyNjM3NDIzMjM5OTQyJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIyc2EtZWFzdC0xJTIyJTNBJTIwJTIyNzYzMTA0MzUxODg0JTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIydXMtZWFzdC0xJTIyJTNBJTIwJTIyNzYzMTA0MzUxODg0JTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIydXMtZWFzdC0yJTIyJTNBJTIwJTIyNzYzMTA0MzUxODg0JTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIydXMtZ292LWVhc3QtMSUyMiUzQSUyMCUyMjQ0NjA0NTA4NjQxMiUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMnVzLWdvdi13ZXN0LTElMjIlM0ElMjAlMjI0NDIzODY3NDQzNTMlMjIlMkMlMEElMjAlMjAlMjAlMjAlMjJ1cy13ZXN0LTIlMjIlM0ElMjAlMjI3NjMxMDQzNTE4ODQlMjIlMkMlMEElMjAlMjAlMjAlMjAlMjJjYS13ZXN0LTElMjIlM0ElMjAlMjIyMDQ1MzgxNDM1NzIlMjIlMkMlMEElN0QlMEElMEFyZWdpb24lMjAlM0QlMjBib3RvMy5TZXNzaW9uKCkucmVnaW9uX25hbWUlMEFsbG1faW1hZ2UlMjAlM0QlMjBmJTIyJTdCYWNjb3VudF9pZF9kaWN0JTVCcmVnaW9uJTVEJTdELmRrci5lY3IuJTdCcmVnaW9uJTdELmFtYXpvbmF3cy5jb20lMkZodWdnaW5nZmFjZS1weXRvcmNoLXRnaS1pbmZlcmVuY2UlM0EyLjEuMi1vcHRpbXVtMC4wLjI4LW5ldXJvbngtcHkzMTAtdWJ1bnR1MjIuMDQlMjI=",highlighted:`<span class="hljs-comment"># pulled from https://github.com/aws/sagemaker-python-sdk/blob/master/src/sagemaker/image_uri_config/huggingface-llm-neuronx.json</span>
account_id_dict = {
<span class="hljs-string">&quot;ap-northeast-1&quot;</span>: <span class="hljs-string">&quot;763104351884&quot;</span>,
<span class="hljs-string">&quot;ap-south-1&quot;</span>: <span class="hljs-string">&quot;763104351884&quot;</span>,
<span class="hljs-string">&quot;ap-south-2&quot;</span>: <span class="hljs-string">&quot;772153158452&quot;</span>,
<span class="hljs-string">&quot;ap-southeast-1&quot;</span>: <span class="hljs-string">&quot;763104351884&quot;</span>,
<span class="hljs-string">&quot;ap-southeast-2&quot;</span>: <span class="hljs-string">&quot;763104351884&quot;</span>,
<span class="hljs-string">&quot;ap-southeast-4&quot;</span>: <span class="hljs-string">&quot;457447274322&quot;</span>,
<span class="hljs-string">&quot;ap-southeast-5&quot;</span>: <span class="hljs-string">&quot;550225433462&quot;</span>,
<span class="hljs-string">&quot;ap-southeast-7&quot;</span>: <span class="hljs-string">&quot;590183813437&quot;</span>,
<span class="hljs-string">&quot;cn-north-1&quot;</span>: <span class="hljs-string">&quot;727897471807&quot;</span>,
<span class="hljs-string">&quot;cn-northwest-1&quot;</span>: <span class="hljs-string">&quot;727897471807&quot;</span>,
<span class="hljs-string">&quot;eu-central-1&quot;</span>: <span class="hljs-string">&quot;763104351884&quot;</span>,
<span class="hljs-string">&quot;eu-central-2&quot;</span>: <span class="hljs-string">&quot;380420809688&quot;</span>,
<span class="hljs-string">&quot;eu-south-2&quot;</span>: <span class="hljs-string">&quot;503227376785&quot;</span>,
<span class="hljs-string">&quot;eu-west-1&quot;</span>: <span class="hljs-string">&quot;763104351884&quot;</span>,
<span class="hljs-string">&quot;eu-west-3&quot;</span>: <span class="hljs-string">&quot;763104351884&quot;</span>,
<span class="hljs-string">&quot;il-central-1&quot;</span>: <span class="hljs-string">&quot;780543022126&quot;</span>,
<span class="hljs-string">&quot;mx-central-1&quot;</span>: <span class="hljs-string">&quot;637423239942&quot;</span>,
<span class="hljs-string">&quot;sa-east-1&quot;</span>: <span class="hljs-string">&quot;763104351884&quot;</span>,
<span class="hljs-string">&quot;us-east-1&quot;</span>: <span class="hljs-string">&quot;763104351884&quot;</span>,
<span class="hljs-string">&quot;us-east-2&quot;</span>: <span class="hljs-string">&quot;763104351884&quot;</span>,
<span class="hljs-string">&quot;us-gov-east-1&quot;</span>: <span class="hljs-string">&quot;446045086412&quot;</span>,
<span class="hljs-string">&quot;us-gov-west-1&quot;</span>: <span class="hljs-string">&quot;442386744353&quot;</span>,
<span class="hljs-string">&quot;us-west-2&quot;</span>: <span class="hljs-string">&quot;763104351884&quot;</span>,
<span class="hljs-string">&quot;ca-west-1&quot;</span>: <span class="hljs-string">&quot;204538143572&quot;</span>,
}
region = boto3.Session().region_name
llm_image = <span class="hljs-string">f&quot;<span class="hljs-subst">{account_id_dict[region]}</span>.dkr.ecr.<span class="hljs-subst">{region}</span>.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.2-optimum0.0.28-neuronx-py310-ubuntu22.04&quot;</span>`,wrap:!1}}),v=new ut({props:{title:"3. Deploy Llama 3.3 70B to Inferentia2",local:"3-deploy-llama-33-70b-to-inferentia2",headingTag:"h2"}}),R=new d({props:{code:"ZnJvbSUyMHNhZ2VtYWtlci5odWdnaW5nZmFjZSUyMGltcG9ydCUyMEh1Z2dpbmdGYWNlTW9kZWwlMEElMEElMjMlMjBzYWdlbWFrZXIlMjBjb25maWclMEFpbnN0YW5jZV90eXBlJTIwJTNEJTIwJTIybWwuaW5mMi40OHhsYXJnZSUyMiUwQWhlYWx0aF9jaGVja190aW1lb3V0JTIwJTNEJTIwMzYwMCUyMCUyMCUyMyUyMGFkZGl0aW9uYWwlMjB0aW1lJTIwdG8lMjBsb2FkJTIwdGhlJTIwbW9kZWwlMEF2b2x1bWVfc2l6ZSUyMCUzRCUyMDUxMiUyMCUyMCUyMyUyMHNpemUlMjBpbiUyMEdCJTIwb2YlMjB0aGUlMjBFQlMlMjB2b2x1bWUlMEElMEElMjMlMjBEZWZpbmUlMjBNb2RlbCUyMGFuZCUyMEVuZHBvaW50JTIwY29uZmlndXJhdGlvbiUyMHBhcmFtZXRlciUwQWNvbmZpZyUyMCUzRCUyMCU3QiUwQSUyMCUyMCUyMCUyMCUyMkhGX01PREVMX0lEJTIyJTNBJTIwJTIybWV0YS1sbGFtYSUyRk1ldGEtTGxhbWEtMy03MEItSW5zdHJ1Y3QlMjIlMkMlMEElMjAlMjAlMjAlMjAlMjJIRl9OVU1fQ09SRVMlMjIlM0ElMjAlMjIyNCUyMiUyQyUyMCUyMCUyMyUyMG51bWJlciUyMG9mJTIwbmV1cm9uJTIwY29yZXMlMEElMjAlMjAlMjAlMjAlMjJIRl9BVVRPX0NBU1RfVFlQRSUyMiUzQSUyMCUyMmJmMTYlMjIlMkMlMjAlMjAlMjMlMjBkdHlwZSUyMG9mJTIwdGhlJTIwbW9kZWwlMEElMjAlMjAlMjAlMjAlMjJNQVhfQkFUQ0hfU0laRSUyMiUzQSUyMCUyMjQlMjIlMkMlMjAlMjAlMjMlMjBtYXglMjBiYXRjaCUyMHNpemUlMjBmb3IlMjB0aGUlMjBtb2RlbCUwQSUyMCUyMCUyMCUyMCUyMk1BWF9JTlBVVF9UT0tFTlMlMjIlM0ElMjAlMjI0MDAwJTIyJTJDJTIwJTIwJTIzJTIwbWF4JTIwbGVuZ3RoJTIwb2YlMjBpbnB1dCUyMHRleHQlMEElMjAlMjAlMjAlMjAlMjJNQVhfVE9UQUxfVE9LRU5TJTIyJTNBJTIwJTIyNDA5NiUyMiUyQyUyMCUyMCUyMyUyMG1heCUyMGxlbmd0aCUyMG9mJTIwZ2VuZXJhdGVkJTIwdGV4dCUwQSUyMCUyMCUyMCUyMCUyMk1FU1NBR0VTX0FQSV9FTkFCTEVEJTIyJTNBJTIwJTIydHJ1ZSUyMiUyQyUyMCUyMCUyMyUyMEVuYWJsZSUyMHRoZSUyMG1lc3NhZ2VzJTIwQVBJJTBBJTIwJTIwJTIwJTIwJTIySEZfVE9LRU4lMjIlM0ElMjAlMjIlM0NSRVBMQUNFJTIwV0lUSCUyMFlPVVIlMjBUT0tFTiUzRSUyMiUyQyUwQSU3RCUwQSUwQWFzc2VydCUyMCglMEElMjAlMjAlMjAlMjBjb25maWclNUIlMjJIRl9UT0tFTiUyMiU1RCUyMCElM0QlMjAlMjIlM0NSRVBMQUNFJTIwV0lUSCUyMFlPVVIlMjBUT0tFTiUzRSUyMiUwQSklMkMlMjAlMjJQbGVhc2UlMjByZXBsYWNlJTIwJyUzQ1JFUExBQ0UlMjBXSVRIJTIwWU9VUiUyMFRPS0VOJTNFJyUyMHdpdGglMjB5b3VyJTIwSHVnZ2luZyUyMEZhY2UlMjBIdWIlMjBBUEklMjB0b2tlbiUyMiUwQSUwQSUwQSUyMyUyMGNyZWF0ZSUyMEh1Z2dpbmdGYWNlTW9kZWwlMjB3aXRoJTIwdGhlJTIwaW1hZ2UlMjB1cmklMEFsbG1fbW9kZWwlMjAlM0QlMjBIdWdnaW5nRmFjZU1vZGVsKHJvbGUlM0Ryb2xlJTJDJTIwaW1hZ2VfdXJpJTNEbGxtX2ltYWdlJTJDJTIwZW52JTNEY29uZmlnKQ==",highlighted:`<span class="hljs-keyword">from</span> sagemaker.huggingface <span class="hljs-keyword">import</span> HuggingFaceModel
<span class="hljs-comment"># sagemaker config</span>
instance_type = <span class="hljs-string">&quot;ml.inf2.48xlarge&quot;</span>
health_check_timeout = <span class="hljs-number">3600</span> <span class="hljs-comment"># additional time to load the model</span>
volume_size = <span class="hljs-number">512</span> <span class="hljs-comment"># size in GB of the EBS volume</span>
<span class="hljs-comment"># Define Model and Endpoint configuration parameter</span>
config = {
<span class="hljs-string">&quot;HF_MODEL_ID&quot;</span>: <span class="hljs-string">&quot;meta-llama/Meta-Llama-3-70B-Instruct&quot;</span>,
<span class="hljs-string">&quot;HF_NUM_CORES&quot;</span>: <span class="hljs-string">&quot;24&quot;</span>, <span class="hljs-comment"># number of neuron cores</span>
<span class="hljs-string">&quot;HF_AUTO_CAST_TYPE&quot;</span>: <span class="hljs-string">&quot;bf16&quot;</span>, <span class="hljs-comment"># dtype of the model</span>
<span class="hljs-string">&quot;MAX_BATCH_SIZE&quot;</span>: <span class="hljs-string">&quot;4&quot;</span>, <span class="hljs-comment"># max batch size for the model</span>
<span class="hljs-string">&quot;MAX_INPUT_TOKENS&quot;</span>: <span class="hljs-string">&quot;4000&quot;</span>, <span class="hljs-comment"># max length of input text</span>
<span class="hljs-string">&quot;MAX_TOTAL_TOKENS&quot;</span>: <span class="hljs-string">&quot;4096&quot;</span>, <span class="hljs-comment"># max length of generated text</span>
<span class="hljs-string">&quot;MESSAGES_API_ENABLED&quot;</span>: <span class="hljs-string">&quot;true&quot;</span>, <span class="hljs-comment"># Enable the messages API</span>
<span class="hljs-string">&quot;HF_TOKEN&quot;</span>: <span class="hljs-string">&quot;&lt;REPLACE WITH YOUR TOKEN&gt;&quot;</span>,
}
<span class="hljs-keyword">assert</span> (
config[<span class="hljs-string">&quot;HF_TOKEN&quot;</span>] != <span class="hljs-string">&quot;&lt;REPLACE WITH YOUR TOKEN&gt;&quot;</span>
), <span class="hljs-string">&quot;Please replace &#x27;&lt;REPLACE WITH YOUR TOKEN&gt;&#x27; with your Hugging Face Hub API token&quot;</span>
<span class="hljs-comment"># create HuggingFaceModel with the image uri</span>
llm_model = HuggingFaceModel(role=role, image_uri=llm_image, env=config)`,wrap:!1}}),D=new d({props:{code:"JTIzJTIwZGVhY3RpdmF0ZSUyMHdhcm5pbmclMjBzaW5jZSUyMG1vZGVsJTIwaXMlMjBjb21waWxlZCUwQWxsbV9tb2RlbC5faXNfY29tcGlsZWRfbW9kZWwlMjAlM0QlMjBUcnVlJTBBJTBBbGxtJTIwJTNEJTIwbGxtX21vZGVsLmRlcGxveSglMEElMjAlMjAlMjAlMjBpbml0aWFsX2luc3RhbmNlX2NvdW50JTNEMSUyQyUwQSUyMCUyMCUyMCUyMGluc3RhbmNlX3R5cGUlM0RpbnN0YW5jZV90eXBlJTJDJTBBJTIwJTIwJTIwJTIwY29udGFpbmVyX3N0YXJ0dXBfaGVhbHRoX2NoZWNrX3RpbWVvdXQlM0RoZWFsdGhfY2hlY2tfdGltZW91dCUyQyUwQSUyMCUyMCUyMCUyMHZvbHVtZV9zaXplJTNEdm9sdW1lX3NpemUlMkMlMEEp",highlighted:`<span class="hljs-comment"># deactivate warning since model is compiled</span>
llm_model._is_compiled_model = <span class="hljs-literal">True</span>
llm = llm_model.deploy(
initial_instance_count=<span class="hljs-number">1</span>,
instance_type=instance_type,
container_startup_health_check_timeout=health_check_timeout,
volume_size=volume_size,
)`,wrap:!1}}),et=new d({props:{code:"JTdCJTBBJTIwJTIwJTIybWVzc2FnZXMlMjIlM0ElMjAlNUIlMEElMjAlMjAlMjAlMjAlN0IlMjAlMjJyb2xlJTIyJTNBJTIwJTIyc3lzdGVtJTIyJTJDJTIwJTIyY29udGVudCUyMiUzQSUyMCUyMllvdSUyMGFyZSUyMGElMjBoZWxwZnVsJTIwYXNzaXN0YW50LiUyMiUyMCU3RCUyQyUwQSUyMCUyMCUyMCUyMCU3QiUyMCUyMnJvbGUlMjIlM0ElMjAlMjJ1c2VyJTIyJTJDJTIwJTIyY29udGVudCUyMiUzQSUyMCUyMldoYXQlMjBpcyUyMGRlZXAlMjBsZWFybmluZyUzRiUyMiUyMCU3RCUwQSUyMCUyMCU1RCUwQSU3RA==",highlighted:`<span class="hljs-punctuation">{</span>
<span class="hljs-attr">&quot;messages&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">[</span>
<span class="hljs-punctuation">{</span> <span class="hljs-attr">&quot;role&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;system&quot;</span><span class="hljs-punctuation">,</span> <span class="hljs-attr">&quot;content&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;You are a helpful assistant.&quot;</span> <span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span>
<span class="hljs-punctuation">{</span> <span class="hljs-attr">&quot;role&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;user&quot;</span><span class="hljs-punctuation">,</span> <span class="hljs-attr">&quot;content&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;What is deep learning?&quot;</span> <span class="hljs-punctuation">}</span>
<span class="hljs-punctuation">]</span>
<span class="hljs-punctuation">}</span>`,wrap:!1}}),lt=new d({props:{code:"JTIzJTIwUHJvbXB0JTIwdG8lMjBnZW5lcmF0ZSUwQW1lc3NhZ2VzJTIwJTNEJTIwJTVCJTBBJTIwJTIwJTIwJTIwJTdCJTIycm9sZSUyMiUzQSUyMCUyMnN5c3RlbSUyMiUyQyUyMCUyMmNvbnRlbnQlMjIlM0ElMjAlMjJZb3UlMjBhcmUlMjBhJTIwaGVscGZ1bCUyMGFzc2lzdGFudC4lMjIlN0QlMkMlMEElMjAlMjAlMjAlMjAlN0IlMjJyb2xlJTIyJTNBJTIwJTIydXNlciUyMiUyQyUyMCUyMmNvbnRlbnQlMjIlM0ElMjAlMjJXaGF0JTIwaXMlMjBkZWVwJTIwbGVhcm5pbmclMjBpbiUyMG9uZSUyMHNlbnRlbmNlJTNGJTIyJTdEJTJDJTBBJTVEJTBBJTBBJTIzJTIwR2VuZXJhdGlvbiUyMGFyZ3VtZW50cyUyMGh0dHBzJTNBJTJGJTJGcGxhdGZvcm0ub3BlbmFpLmNvbSUyRmRvY3MlMkZhcGktcmVmZXJlbmNlJTJGY2hhdCUyRmNyZWF0ZSUwQXBhcmFtZXRlcnMlMjAlM0QlMjAlN0IlMEElMjAlMjAlMjAlMjAlMjJtYXhfdG9rZW5zJTIyJTNBJTIwMTAwJTJDJTBBJTdE",highlighted:`<span class="hljs-comment"># Prompt to generate</span>
messages = [
{<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;system&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;You are a helpful assistant.&quot;</span>},
{<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;What is deep learning in one sentence?&quot;</span>},
]
<span class="hljs-comment"># Generation arguments https://platform.openai.com/docs/api-reference/chat/create</span>
parameters = {
<span class="hljs-string">&quot;max_tokens&quot;</span>: <span class="hljs-number">100</span>,
}`,wrap:!1}}),nt=new d({props:{code:"Y2hhdCUyMCUzRCUyMGxsbS5wcmVkaWN0KCU3QiUyMm1lc3NhZ2VzJTIyJTNBJTIwbWVzc2FnZXMlMkMlMjAqKnBhcmFtZXRlcnMlMkMlMjAlMjJzdGVhbSUyMiUzQSUyMFRydWUlN0QpJTBBJTBBcHJpbnQoY2hhdCU1QiUyMmNob2ljZXMlMjIlNUQlNUIwJTVEJTVCJTIybWVzc2FnZSUyMiU1RCU1QiUyMmNvbnRlbnQlMjIlNUQuc3RyaXAoKSk=",highlighted:`chat = llm.predict({<span class="hljs-string">&quot;messages&quot;</span>: messages, **parameters, <span class="hljs-string">&quot;steam&quot;</span>: <span class="hljs-literal">True</span>})
<span class="hljs-built_in">print</span>(chat[<span class="hljs-string">&quot;choices&quot;</span>][<span class="hljs-number">0</span>][<span class="hljs-string">&quot;message&quot;</span>][<span class="hljs-string">&quot;content&quot;</span>].strip())`,wrap:!1}}),at=new ut({props:{title:"4. Clean up",local:"4-clean-up",headingTag:"h2"}}),it=new d({props:{code:"bGxtLmRlbGV0ZV9tb2RlbCgpJTBBbGxtLmRlbGV0ZV9lbmRwb2ludCgp",highlighted:`llm.delete_model()
llm.delete_endpoint()`,wrap:!1}}),{c(){T=o("meta"),ct=n(),Mt=o("p"),rt=n(),p(y.$$.fragment),ht=n(),j=o("p"),j.innerHTML=oe,mt=n(),J=o("p"),J.textContent=ie,Tt=n(),I=o("ol"),I.innerHTML=Me,dt=n(),U=o("p"),U.textContent=pe,yt=n(),w=o("p"),w.innerHTML=ue,jt=n(),g=o("table"),g.innerHTML=ce,Jt=n(),p(f.$$.fragment),It=n(),b=o("p"),b.innerHTML=re,Ut=n(),C=o("p"),C.textContent=he,wt=n(),p(N.$$.fragment),gt=n(),q=o("p"),q.textContent=me,ft=n(),p(B.$$.fragment),bt=n(),p(E.$$.fragment),Ct=n(),A=o("p"),A.innerHTML=Te,Nt=n(),W=o("p"),W.innerHTML=de,qt=n(),p(Z.$$.fragment),Bt=n(),p(v.$$.fragment),Et=n(),x=o("p"),x.innerHTML=ye,At=n(),z=o("p"),z.innerHTML=je,Wt=n(),k=o("p"),k.innerHTML=Je,Zt=n(),_=o("p"),_.textContent=Ie,vt=n(),V=o("ul"),V.innerHTML=Ue,xt=n(),S=o("p"),S.textContent=we,zt=n(),G=o("ul"),G.innerHTML=ge,kt=n(),Q=o("p"),Q.textContent=fe,_t=n(),F=o("ul"),F.innerHTML=be,Vt=n(),X=o("p"),X.innerHTML=Ce,St=n(),H=o("p"),H.innerHTML=Ne,Gt=n(),L=o("p"),L.innerHTML=qe,Qt=n(),$=o("p"),$.textContent=Be,Ft=n(),p(R.$$.fragment),Xt=n(),Y=o("p"),Y.innerHTML=Ee,Ht=n(),p(D.$$.fragment),Lt=n(),P=o("p"),P.textContent=Ae,$t=n(),O=o("p"),O.innerHTML=We,Rt=n(),K=o("p"),K.innerHTML=Ze,Yt=n(),tt=o("p"),tt.innerHTML=ve,Dt=n(),p(et.$$.fragment),Pt=n(),p(lt.$$.fragment),Ot=n(),st=o("p"),st.textContent=xe,Kt=n(),p(nt.$$.fragment),te=n(),p(at.$$.fragment),ee=n(),ot=o("p"),ot.textContent=ze,le=n(),p(it.$$.fragment),se=n(),pt=o("p"),this.h()},l(t){const e=Xe("svelte-u9bgzb",document.head);T=i(e,"META",{name:!0,content:!0}),e.forEach(l),ct=a(t),Mt=i(t,"P",{}),ke(Mt).forEach(l),rt=a(t),u(y.$$.fragment,t),ht=a(t),j=i(t,"P",{"data-svelte-h":!0}),M(j)!=="svelte-18ykppc"&&(j.innerHTML=oe),mt=a(t),J=i(t,"P",{"data-svelte-h":!0}),M(J)!=="svelte-df2280"&&(J.textContent=ie),Tt=a(t),I=i(t,"OL",{"data-svelte-h":!0}),M(I)!=="svelte-ouxi3x"&&(I.innerHTML=Me),dt=a(t),U=i(t,"P",{"data-svelte-h":!0}),M(U)!=="svelte-fedw35"&&(U.textContent=pe),yt=a(t),w=i(t,"P",{"data-svelte-h":!0}),M(w)!=="svelte-1q2zsrn"&&(w.innerHTML=ue),jt=a(t),g=i(t,"TABLE",{"data-svelte-h":!0}),M(g)!=="svelte-1tmwmqe"&&(g.innerHTML=ce),Jt=a(t),u(f.$$.fragment,t),It=a(t),b=i(t,"P",{"data-svelte-h":!0}),M(b)!=="svelte-1e6mm58"&&(b.innerHTML=re),Ut=a(t),C=i(t,"P",{"data-svelte-h":!0}),M(C)!=="svelte-gxxxnf"&&(C.textContent=he),wt=a(t),u(N.$$.fragment,t),gt=a(t),q=i(t,"P",{"data-svelte-h":!0}),M(q)!=="svelte-k2b9z7"&&(q.textContent=me),ft=a(t),u(B.$$.fragment,t),bt=a(t),u(E.$$.fragment,t),Ct=a(t),A=i(t,"P",{"data-svelte-h":!0}),M(A)!=="svelte-150z9km"&&(A.innerHTML=Te),Nt=a(t),W=i(t,"P",{"data-svelte-h":!0}),M(W)!=="svelte-183iqes"&&(W.innerHTML=de),qt=a(t),u(Z.$$.fragment,t),Bt=a(t),u(v.$$.fragment,t),Et=a(t),x=i(t,"P",{"data-svelte-h":!0}),M(x)!=="svelte-w7lp5p"&&(x.innerHTML=ye),At=a(t),z=i(t,"P",{"data-svelte-h":!0}),M(z)!=="svelte-1kijx5w"&&(z.innerHTML=je),Wt=a(t),k=i(t,"P",{"data-svelte-h":!0}),M(k)!=="svelte-prnhqf"&&(k.innerHTML=Je),Zt=a(t),_=i(t,"P",{"data-svelte-h":!0}),M(_)!=="svelte-1lo8mr8"&&(_.textContent=Ie),vt=a(t),V=i(t,"UL",{"data-svelte-h":!0}),M(V)!=="svelte-19xkqok"&&(V.innerHTML=Ue),xt=a(t),S=i(t,"P",{"data-svelte-h":!0}),M(S)!=="svelte-7y1w5l"&&(S.textContent=we),zt=a(t),G=i(t,"UL",{"data-svelte-h":!0}),M(G)!=="svelte-21hz0o"&&(G.innerHTML=ge),kt=a(t),Q=i(t,"P",{"data-svelte-h":!0}),M(Q)!=="svelte-ba46xn"&&(Q.textContent=fe),_t=a(t),F=i(t,"UL",{"data-svelte-h":!0}),M(F)!=="svelte-i1wt4y"&&(F.innerHTML=be),Vt=a(t),X=i(t,"P",{"data-svelte-h":!0}),M(X)!=="svelte-1qiwbk5"&&(X.innerHTML=Ce),St=a(t),H=i(t,"P",{"data-svelte-h":!0}),M(H)!=="svelte-1px60yi"&&(H.innerHTML=Ne),Gt=a(t),L=i(t,"P",{"data-svelte-h":!0}),M(L)!=="svelte-1mh0xpb"&&(L.innerHTML=qe),Qt=a(t),$=i(t,"P",{"data-svelte-h":!0}),M($)!=="svelte-dbarld"&&($.textContent=Be),Ft=a(t),u(R.$$.fragment,t),Xt=a(t),Y=i(t,"P",{"data-svelte-h":!0}),M(Y)!=="svelte-eb49q4"&&(Y.innerHTML=Ee),Ht=a(t),u(D.$$.fragment,t),Lt=a(t),P=i(t,"P",{"data-svelte-h":!0}),M(P)!=="svelte-hs5wbn"&&(P.textContent=Ae),$t=a(t),O=i(t,"P",{"data-svelte-h":!0}),M(O)!=="svelte-1popn9h"&&(O.innerHTML=We),Rt=a(t),K=i(t,"P",{"data-svelte-h":!0}),M(K)!=="svelte-7vzs06"&&(K.innerHTML=Ze),Yt=a(t),tt=i(t,"P",{"data-svelte-h":!0}),M(tt)!=="svelte-gr6587"&&(tt.innerHTML=ve),Dt=a(t),u(et.$$.fragment,t),Pt=a(t),u(lt.$$.fragment,t),Ot=a(t),st=i(t,"P",{"data-svelte-h":!0}),M(st)!=="svelte-1pq3qhh"&&(st.textContent=xe),Kt=a(t),u(nt.$$.fragment,t),te=a(t),u(at.$$.fragment,t),ee=a(t),ot=i(t,"P",{"data-svelte-h":!0}),M(ot)!=="svelte-100mxno"&&(ot.textContent=ze),le=a(t),u(it.$$.fragment,t),se=a(t),pt=i(t,"P",{}),ke(pt).forEach(l),this.h()},h(){_e(T,"name","hf:doc:metadata"),_e(T,"content",$e)},m(t,e){He(document.head,T),s(t,ct,e),s(t,Mt,e),s(t,rt,e),c(y,t,e),s(t,ht,e),s(t,j,e),s(t,mt,e),s(t,J,e),s(t,Tt,e),s(t,I,e),s(t,dt,e),s(t,U,e),s(t,yt,e),s(t,w,e),s(t,jt,e),s(t,g,e),s(t,Jt,e),c(f,t,e),s(t,It,e),s(t,b,e),s(t,Ut,e),s(t,C,e),s(t,wt,e),c(N,t,e),s(t,gt,e),s(t,q,e),s(t,ft,e),c(B,t,e),s(t,bt,e),c(E,t,e),s(t,Ct,e),s(t,A,e),s(t,Nt,e),s(t,W,e),s(t,qt,e),c(Z,t,e),s(t,Bt,e),c(v,t,e),s(t,Et,e),s(t,x,e),s(t,At,e),s(t,z,e),s(t,Wt,e),s(t,k,e),s(t,Zt,e),s(t,_,e),s(t,vt,e),s(t,V,e),s(t,xt,e),s(t,S,e),s(t,zt,e),s(t,G,e),s(t,kt,e),s(t,Q,e),s(t,_t,e),s(t,F,e),s(t,Vt,e),s(t,X,e),s(t,St,e),s(t,H,e),s(t,Gt,e),s(t,L,e),s(t,Qt,e),s(t,$,e),s(t,Ft,e),c(R,t,e),s(t,Xt,e),s(t,Y,e),s(t,Ht,e),c(D,t,e),s(t,Lt,e),s(t,P,e),s(t,$t,e),s(t,O,e),s(t,Rt,e),s(t,K,e),s(t,Yt,e),s(t,tt,e),s(t,Dt,e),c(et,t,e),s(t,Pt,e),c(lt,t,e),s(t,Ot,e),s(t,st,e),s(t,Kt,e),c(nt,t,e),s(t,te,e),c(at,t,e),s(t,ee,e),s(t,ot,e),s(t,le,e),c(it,t,e),s(t,se,e),s(t,pt,e),ne=!0},p:Se,i(t){ne||(r(y.$$.fragment,t),r(f.$$.fragment,t),r(N.$$.fragment,t),r(B.$$.fragment,t),r(E.$$.fragment,t),r(Z.$$.fragment,t),r(v.$$.fragment,t),r(R.$$.fragment,t),r(D.$$.fragment,t),r(et.$$.fragment,t),r(lt.$$.fragment,t),r(nt.$$.fragment,t),r(at.$$.fragment,t),r(it.$$.fragment,t),ne=!0)},o(t){h(y.$$.fragment,t),h(f.$$.fragment,t),h(N.$$.fragment,t),h(B.$$.fragment,t),h(E.$$.fragment,t),h(Z.$$.fragment,t),h(v.$$.fragment,t),h(R.$$.fragment,t),h(D.$$.fragment,t),h(et.$$.fragment,t),h(lt.$$.fragment,t),h(nt.$$.fragment,t),h(at.$$.fragment,t),h(it.$$.fragment,t),ne=!1},d(t){t&&(l(ct),l(Mt),l(rt),l(ht),l(j),l(mt),l(J),l(Tt),l(I),l(dt),l(U),l(yt),l(w),l(jt),l(g),l(Jt),l(It),l(b),l(Ut),l(C),l(wt),l(gt),l(q),l(ft),l(bt),l(Ct),l(A),l(Nt),l(W),l(qt),l(Bt),l(Et),l(x),l(At),l(z),l(Wt),l(k),l(Zt),l(_),l(vt),l(V),l(xt),l(S),l(zt),l(G),l(kt),l(Q),l(_t),l(F),l(Vt),l(X),l(St),l(H),l(Gt),l(L),l(Qt),l($),l(Ft),l(Xt),l(Y),l(Ht),l(Lt),l(P),l($t),l(O),l(Rt),l(K),l(Yt),l(tt),l(Dt),l(Pt),l(Ot),l(st),l(Kt),l(te),l(ee),l(ot),l(le),l(se),l(pt)),l(T),m(y,t),m(f,t),m(N,t),m(B,t),m(E,t),m(Z,t),m(v,t),m(R,t),m(D,t),m(et,t),m(lt,t),m(nt,t),m(at,t),m(it,t)}}}const $e='{"title":"Deploy Llama 3.3 70B on AWS Inferentia2","local":"deploy-llama-33-70b-on-aws-inferentia2","sections":[{"title":"1. Setup development environment","local":"1-setup-development-environment","sections":[],"depth":2},{"title":"2. Retrieve the latest Hugging Face TGI Neuron DLC","local":"2-retrieve-the-latest-hugging-face-tgi-neuron-dlc","sections":[],"depth":2},{"title":"3. Deploy Llama 3.3 70B to Inferentia2","local":"3-deploy-llama-33-70b-to-inferentia2","sections":[],"depth":2},{"title":"4. Clean up","local":"4-clean-up","sections":[],"depth":2}],"depth":1}';function Re(ae){return Ge(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Ke extends Qe{constructor(T){super(),Fe(this,T,Re,Le,Ve,{})}}export{Ke as component};

Xet Storage Details

Size:
35.4 kB
·
Xet hash:
c92c2066c77b9f0586b589b217f6d87536639536fceeaa708056889bb542402d

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.