Buckets:

hf-doc-build
/

doc

hf-doc-build/doc / optimum-neuron /v0.3.0 /en /_app /immutable /nodes /13.38918f9e.js

8.32 kB

import{s as ge,n as de,o as ce}from"../chunks/scheduler.85c25b89.js";import{S as we,i as be,g as l,s as a,r as z,A as $e,h as r,f as n,c as o,j as ue,u as C,x as s,k as he,y as ye,a as i,v as L,d as _,t as H,w as P}from"../chunks/index.c9bcf812.js";import{H as D}from"../chunks/getInferenceSnippets.5ea0a804.js";function ke(ee){let f,I,M,E,m,W,p,te='<a href="https://github.com/huggingface/optimum-neuron" rel="nofollow">Optimum Neuron</a> is integrated into Amazon SageMaker through the Hugging Face Deep Learning Containers for AWS Accelerators like Inferentia2 and Trainium1. This allows you to easily train and deploy 🤗 Transformers and Diffusers models on Amazon SageMaker leveraging AWS accelerators.',U,u,ne="The Hugging Face DLC images come with pre-installed Optimum Neuron and tools to compile models for efficient inference on Inferentia2 and Trainium1. This makes deploying large transformer models simple and optimized out of the box.",B,h,ie="Below is a list of available end-to-end tutorials on using Optimum Neuron via the Hugging Face DLC to train and deploy models on Amazon SageMaker. Follow the end-to-end examples to learn how Optimum Neuron integrates with SageMaker through the Hugging Face DLC images to unlock performance and cost benefits.",O,g,G,d,ae="Tutorial on how to deploy a text embedding model (BGE-Base) for efficient and fast embedding generation on AWS Inferentia2 using Amazon SageMaker; The post shows how Inferentia2 can be a great option for not only efficient and fast but also cost-effective inference of embeddings compared to GPUs or services like OpenAI and Amazon Bedrock.",N,c,oe='<li><a href="https://www.philschmid.de/inferentia2-embeddings" rel="nofollow">Tutorial</a></li> <li><a href="https://github.com/philschmid/huggingface-inferentia2-samples/blob/main/llama2-7b/sagemaker-notebook.ipynb" rel="nofollow">GitHub Repo</a></li>',R,w,F,b,le="Tutorial on how to deploy the conversational Llama 2 model on AWS Inferentia2 using Amazon SageMaker for low-latency inference; Shows how to leverage Inferentia2 and SageMaker to go from model training to production deployment with just a few lines of code.",j,$,re='<li><a href="https://www.philschmid.de/inferentia2-llama-7b" rel="nofollow">Tutorial</a></li> <li><a href="https://github.com/philschmid/huggingface-inferentia2-samples/blob/main/stable-diffusion-xl/sagemaker-notebook.ipynb" rel="nofollow">GitHub Repo</a></li>',X,y,q,k,se="Tutorial on how to deploy Stable Diffusion XL model on AWS Inferentia2 using Optimum Neuron and Amazon SageMaker for efficient 1024x1024 image generation achieving ~6 seconds per image; The post shows how a single <code>inf2.xlarge</code> instance costing $0.99/hour can achieve ~10 images per minute, making Inferentia2 a great option for not only efficient and fast but also cost-effective inference of images compared to GPUs.",J,x,fe='<li><a href="https://www.philschmid.de/inferentia2-stable-diffusion-xl" rel="nofollow">Tutorial</a></li> <li><a href="https://github.com/Placeholder/stable-diffusion-inferentia" rel="nofollow">GitHub Repo</a></li>',K,v,Q,S,me="Tutorial on how to optimize and deploy BERT model on AWS Inferentia2 using Optimum Neuron and Amazon SageMaker for efficient text classification achieving 4ms latency; The post shows how a single inf2.xlarge instance costing $0.99/hour can achieve 116 inferences/sec and 500 inferences/sec without network overhead, making Inferentia2 a great option for low-latency and cost-effective inference compared to GPUs.",V,T,pe='<li><a href="https://www.philschmid.de/optimize-deploy-bert-inf2" rel="nofollow">Tutorial</a></li> <li><a href="https://github.com/philschmid/huggingface-inferentia2-samples/blob/main/bert-transformers/sagemaker-notebook.ipynb" rel="nofollow">GitHub Repo</a></li>',Y,A,Z;return m=new D({props:{title:"Using Optimum Neuron on Amazon SageMaker",local:"using-optimum-neuron-on-amazon-sagemaker",headingTag:"h1"}}),g=new D({props:{title:"Deploy Embedding Models on Inferentia2 for Efficient Similarity Search",local:"deploy-embedding-models-on-inferentia2-for-efficient-similarity-search",headingTag:"h2"}}),w=new D({props:{title:"Deploy Llama 2 7B on AWS inferentia2 with Amazon SageMaker",local:"deploy-llama-2-7b-on-aws-inferentia2-with-amazon-sagemaker",headingTag:"h2"}}),y=new D({props:{title:"Deploy Stable Diffusion XL on AWS inferentia2 with Amazon SageMaker",local:"deploy-stable-diffusion-xl-on-aws-inferentia2-with-amazon-sagemaker",headingTag:"h2"}}),v=new D({props:{title:"Deploy BERT for Text Classification on AWS inferentia2 with Amazon SageMaker",local:"deploy-bert-for-text-classification-on-aws-inferentia2-with-amazon-sagemaker",headingTag:"h2"}}),{c(){f=l("meta"),I=a(),M=l("p"),E=a(),z(m.$$.fragment),W=a(),p=l("p"),p.innerHTML=te,U=a(),u=l("p"),u.textContent=ne,B=a(),h=l("p"),h.textContent=ie,O=a(),z(g.$$.fragment),G=a(),d=l("p"),d.textContent=ae,N=a(),c=l("ul"),c.innerHTML=oe,R=a(),z(w.$$.fragment),F=a(),b=l("p"),b.textContent=le,j=a(),$=l("ul"),$.innerHTML=re,X=a(),z(y.$$.fragment),q=a(),k=l("p"),k.innerHTML=se,J=a(),x=l("ul"),x.innerHTML=fe,K=a(),z(v.$$.fragment),Q=a(),S=l("p"),S.textContent=me,V=a(),T=l("ul"),T.innerHTML=pe,Y=a(),A=l("p"),this.h()},l(e){const t=$e("svelte-u9bgzb",document.head);f=r(t,"META",{name:!0,content:!0}),t.forEach(n),I=o(e),M=r(e,"P",{}),ue(M).forEach(n),E=o(e),C(m.$$.fragment,e),W=o(e),p=r(e,"P",{"data-svelte-h":!0}),s(p)!=="svelte-13cd18f"&&(p.innerHTML=te),U=o(e),u=r(e,"P",{"data-svelte-h":!0}),s(u)!=="svelte-1ngzo57"&&(u.textContent=ne),B=o(e),h=r(e,"P",{"data-svelte-h":!0}),s(h)!=="svelte-16q6ucs"&&(h.textContent=ie),O=o(e),C(g.$$.fragment,e),G=o(e),d=r(e,"P",{"data-svelte-h":!0}),s(d)!=="svelte-1utk3kr"&&(d.textContent=ae),N=o(e),c=r(e,"UL",{"data-svelte-h":!0}),s(c)!=="svelte-1n73dx9"&&(c.innerHTML=oe),R=o(e),C(w.$$.fragment,e),F=o(e),b=r(e,"P",{"data-svelte-h":!0}),s(b)!=="svelte-1dii1xw"&&(b.textContent=le),j=o(e),$=r(e,"UL",{"data-svelte-h":!0}),s($)!=="svelte-1lzr929"&&($.innerHTML=re),X=o(e),C(y.$$.fragment,e),q=o(e),k=r(e,"P",{"data-svelte-h":!0}),s(k)!=="svelte-18imwet"&&(k.innerHTML=se),J=o(e),x=r(e,"UL",{"data-svelte-h":!0}),s(x)!=="svelte-9jfujr"&&(x.innerHTML=fe),K=o(e),C(v.$$.fragment,e),Q=o(e),S=r(e,"P",{"data-svelte-h":!0}),s(S)!=="svelte-1a6nhs6"&&(S.textContent=me),V=o(e),T=r(e,"UL",{"data-svelte-h":!0}),s(T)!=="svelte-15612wl"&&(T.innerHTML=pe),Y=o(e),A=r(e,"P",{}),ue(A).forEach(n),this.h()},h(){he(f,"name","hf:doc:metadata"),he(f,"content",xe)},m(e,t){ye(document.head,f),i(e,I,t),i(e,M,t),i(e,E,t),L(m,e,t),i(e,W,t),i(e,p,t),i(e,U,t),i(e,u,t),i(e,B,t),i(e,h,t),i(e,O,t),L(g,e,t),i(e,G,t),i(e,d,t),i(e,N,t),i(e,c,t),i(e,R,t),L(w,e,t),i(e,F,t),i(e,b,t),i(e,j,t),i(e,$,t),i(e,X,t),L(y,e,t),i(e,q,t),i(e,k,t),i(e,J,t),i(e,x,t),i(e,K,t),L(v,e,t),i(e,Q,t),i(e,S,t),i(e,V,t),i(e,T,t),i(e,Y,t),i(e,A,t),Z=!0},p:de,i(e){Z||(_(m.$$.fragment,e),_(g.$$.fragment,e),_(w.$$.fragment,e),_(y.$$.fragment,e),_(v.$$.fragment,e),Z=!0)},o(e){H(m.$$.fragment,e),H(g.$$.fragment,e),H(w.$$.fragment,e),H(y.$$.fragment,e),H(v.$$.fragment,e),Z=!1},d(e){e&&(n(I),n(M),n(E),n(W),n(p),n(U),n(u),n(B),n(h),n(O),n(G),n(d),n(N),n(c),n(R),n(F),n(b),n(j),n($),n(X),n(q),n(k),n(J),n(x),n(K),n(Q),n(S),n(V),n(T),n(Y),n(A)),n(f),P(m,e),P(g,e),P(w,e),P(y,e),P(v,e)}}}const xe='{"title":"Using Optimum Neuron on Amazon SageMaker","local":"using-optimum-neuron-on-amazon-sagemaker","sections":[{"title":"Deploy Embedding Models on Inferentia2 for Efficient Similarity Search","local":"deploy-embedding-models-on-inferentia2-for-efficient-similarity-search","sections":[],"depth":2},{"title":"Deploy Llama 2 7B on AWS inferentia2 with Amazon SageMaker","local":"deploy-llama-2-7b-on-aws-inferentia2-with-amazon-sagemaker","sections":[],"depth":2},{"title":"Deploy Stable Diffusion XL on AWS inferentia2 with Amazon SageMaker","local":"deploy-stable-diffusion-xl-on-aws-inferentia2-with-amazon-sagemaker","sections":[],"depth":2},{"title":"Deploy BERT for Text Classification on AWS inferentia2 with Amazon SageMaker","local":"deploy-bert-for-text-classification-on-aws-inferentia2-with-amazon-sagemaker","sections":[],"depth":2}],"depth":1}';function ve(ee){return ce(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Ae extends we{constructor(f){super(),be(this,f,ve,ke,ge,{})}}export{Ae as component};

Xet Storage Details

Size:: 8.32 kB
Xet hash:: 84775c3b984d2e8a794d3196784da30c49bbb0f125cad852cc944606271ea8c4

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.