<!DOCTYPE html> <html lang="en"><head> <script src="accelerate_files/libs/clipboard/clipboard.min.js"></script> <script src="accelerate_files/libs/quarto-html/tabby.min.js"></script> <script src="accelerate_files/libs/quarto-html/popper.min.js"></script> <script src="accelerate_files/libs/quarto-html/tippy.umd.min.js"></script> <link href="accelerate_files/libs/quarto-html/tippy.css" rel="stylesheet"> <link href="accelerate_files/libs/quarto-html/light-border.css" rel="stylesheet"> <link href="accelerate_files/libs/quarto-html/quarto-html.min.css" rel="stylesheet" data-mode="light"> <link href="accelerate_files/libs/quarto-html/quarto-syntax-highlighting-dark.css" rel="stylesheet" id="quarto-text-highlighting-styles"> <script src="accelerate_files/libs/quarto-contrib/videojs/video.min.js"></script> <link href="accelerate_files/libs/quarto-contrib/videojs/video-js.css" rel="stylesheet"><meta charset="utf-8"> <meta name="generator" content="quarto-1.3.450"> <title>Hugging Face Accelerate: Making device-agnostic ML training and inference easy at scale</title> <meta name="apple-mobile-web-app-capable" content="yes"> <meta name="apple-mobile-web-app-status-bar-style" content="black-translucent"> <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no, minimal-ui"> <link rel="stylesheet" href="accelerate_files/libs/revealjs/dist/reset.css"> <link rel="stylesheet" href="accelerate_files/libs/revealjs/dist/reveal.css"> <style> code{white-space: pre-wrap;} span.smallcaps{font-variant: small-caps;} div.columns{display: flex; gap: min(4vw, 1.5em);} div.column{flex: auto; overflow-x: auto;} div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;} ul.task-list{list-style: none;} ul.task-list li input[type="checkbox"] { width: 0.8em; margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */ vertical-align: middle; } /* CSS for syntax highlighting */ pre > code.sourceCode { white-space: pre; position: relative; } pre > code.sourceCode > span { display: inline-block; line-height: 1.25; } pre > code.sourceCode > span:empty { height: 1.2em; } .sourceCode { overflow: visible; } code.sourceCode > span { color: inherit; text-decoration: inherit; } div.sourceCode { margin: 1em 0; } pre.sourceCode { margin: 0; } @media screen { div.sourceCode { overflow: auto; } } @media print { pre > code.sourceCode { white-space: pre-wrap; } pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; } } pre.numberSource code { counter-reset: source-line 0; } pre.numberSource code > span { position: relative; left: -4em; counter-increment: source-line; } pre.numberSource code > span > a:first-child::before { content: counter(source-line); position: relative; left: -1em; text-align: right; vertical-align: baseline; border: none; display: inline-block; -webkit-touch-callout: none; -webkit-user-select: none; -khtml-user-select: none; -moz-user-select: none; -ms-user-select: none; user-select: none; padding: 0 4px; width: 4em; } pre.numberSource { margin-left: 3em; padding-left: 4px; } div.sourceCode { color: #f8f8f2; } @media screen { pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; } } code span { color: #f8f8f2; } /* Normal */ code span.al { color: #f07178; background-color: #2a0f15; font-weight: bold; } /* Alert */ code span.an { color: #d4d0ab; } /* Annotation */ code span.at { color: #00e0e0; } /* Attribute */ code span.bn { color: #d4d0ab; } /* BaseN */ code span.bu { color: #abe338; } /* BuiltIn */ code span.cf { color: #ffa07a; font-weight: bold; } /* ControlFlow */ code span.ch { color: #abe338; } /* Char */ code span.cn { color: #ffd700; } /* Constant */ code span.co { color: #f8f8f2; font-style: italic; } /* Comment */ code span.cv { color: #ffd700; } /* CommentVar */ code span.do { color: #f8f8f2; } /* Documentation */ code span.dt { color: #ffa07a; } /* DataType */ code span.dv { color: #d4d0ab; } /* DecVal */ code span.er { color: #f07178; text-decoration: underline; } /* Error */ code span.ex { color: #00e0e0; font-weight: bold; } /* Extension */ code span.fl { color: #d4d0ab; } /* Float */ code span.fu { color: #ffa07a; } /* Function */ code span.im { color: #abe338; } /* Import */ code span.in { color: #d4d0ab; } /* Information */ code span.kw { color: #ffa07a; font-weight: bold; } /* Keyword */ code span.op { color: #ffa07a; } /* Operator */ code span.ot { color: #00e0e0; } /* Other */ code span.pp { color: #dcc6e0; } /* Preprocessor */ code span.re { color: #00e0e0; background-color: #f8f8f2; } /* RegionMarker */ code span.sc { color: #abe338; } /* SpecialChar */ code span.ss { color: #abe338; } /* SpecialString */ code span.st { color: #abe338; } /* String */ code span.va { color: #00e0e0; } /* Variable */ code span.vs { color: #abe338; } /* VerbatimString */ code span.wa { color: #dcc6e0; } /* Warning */ </style> <link rel="stylesheet" href="accelerate_files/libs/revealjs/dist/theme/quarto.css"> <link href="accelerate_files/libs/revealjs/plugin/quarto-line-highlight/line-highlight.css" rel="stylesheet"> <link href="accelerate_files/libs/revealjs/plugin/reveal-menu/menu.css" rel="stylesheet"> <link href="accelerate_files/libs/revealjs/plugin/reveal-menu/quarto-menu.css" rel="stylesheet"> <link href="accelerate_files/libs/revealjs/plugin/quarto-support/footer.css" rel="stylesheet"> <style type="text/css"> .callout { margin-top: 1em; margin-bottom: 1em; border-radius: .25rem; } .callout.callout-style-simple { padding: 0em 0.5em; border-left: solid #acacac .3rem; border-right: solid 1px silver; border-top: solid 1px silver; border-bottom: solid 1px silver; display: flex; } .callout.callout-style-default { border-left: solid #acacac .3rem; border-right: solid 1px silver; border-top: solid 1px silver; border-bottom: solid 1px silver; } .callout .callout-body-container { flex-grow: 1; } .callout.callout-style-simple .callout-body { font-size: 1rem; font-weight: 400; } .callout.callout-style-default .callout-body { font-size: 0.9rem; font-weight: 400; } .callout.callout-titled.callout-style-simple .callout-body { margin-top: 0.2em; } .callout:not(.callout-titled) .callout-body { display: flex; } .callout:not(.no-icon).callout-titled.callout-style-simple .callout-content { padding-left: 1.6em; } .callout.callout-titled .callout-header { padding-top: 0.2em; margin-bottom: -0.2em; } .callout.callout-titled .callout-title p { margin-top: 0.5em; margin-bottom: 0.5em; } .callout.callout-titled.callout-style-simple .callout-content p { margin-top: 0; } .callout.callout-titled.callout-style-default .callout-content p { margin-top: 0.7em; } .callout.callout-style-simple div.callout-title { border-bottom: none; font-size: .9rem; font-weight: 600; opacity: 75%; } .callout.callout-style-default div.callout-title { border-bottom: none; font-weight: 600; opacity: 85%; font-size: 0.9rem; padding-left: 0.5em; padding-right: 0.5em; } .callout.callout-style-default div.callout-content { padding-left: 0.5em; padding-right: 0.5em; } .callout.callout-style-simple .callout-icon::before { height: 1rem; width: 1rem; display: inline-block; content: ""; background-repeat: no-repeat; background-size: 1rem 1rem; } .callout.callout-style-default .callout-icon::before { height: 0.9rem; width: 0.9rem; display: inline-block; content: ""; background-repeat: no-repeat; background-size: 0.9rem 0.9rem; } .callout-title { display: flex } .callout-icon::before { margin-top: 1rem; padding-right: .5rem; } .callout.no-icon::before { display: none !important; } .callout.callout-titled .callout-body > .callout-content > :last-child { margin-bottom: 0.5rem; } .callout.callout-titled .callout-icon::before { margin-top: .5rem; padding-right: .5rem; } .callout:not(.callout-titled) .callout-icon::before { margin-top: 1rem; padding-right: .5rem; } /* Callout Types */ div.callout-note { border-left-color: #4582ec !important; } div.callout-note .callout-icon::before { background-image: url('data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAAAXNSR0IArs4c6QAAAERlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAA6ABAAMAAAABAAEAAKACAAQAAAABAAAAIKADAAQAAAABAAAAIAAAAACshmLzAAAEU0lEQVRYCcVXTWhcVRQ+586kSUMMxkyaElstCto2SIhitS5Ek8xUKV2poatCcVHtUlFQk8mbaaziwpWgglJwVaquitBOfhQXFlqlzSJpFSpIYyXNjBNiTCck7x2/8/LeNDOZxDuEkgOXe++553zfefee+/OYLOXFk3+1LLrRdiO81yNqZ6K9cG0P3MeFaMIQjXssE8Z1JzLO9ls20MBZX7oG8w9GxB0goaPrW5aNMp1yOZIa7Wv6o2ykpLtmAPs/vrG14Z+6d4jpbSKuhdcSyq9wGMPXjonwmESXrriLzFGOdDBLB8Y6MNYBu0dRokSygMA/mrun8MGFN3behm6VVAwg4WR3i6FvYK1T7MHo9BK7ydH+1uurECoouk5MPRyVSBrBHMYwVobG2aOXM07sWrn5qgB60rc6mcwIDJtQrnrEr44kmy+UO9r0u9O5/YbkS9juQckLed3DyW2XV/qWBBB3ptvI8EUY3I9p/67OW+g967TNr3Sotn3IuVlfMLVnsBwH4fsnebJvyGm5GeIUA3jljERmrv49SizPYuq+z7c2H/jlGC+Ghhupn/hcapqmcudB9jwJ/3jvnvu6vu5lVzF1fXyZuZZ7U8nRmVzytvT+H3kilYvH09mLWrQdwFSsFEsxFVs5fK7A0g8gMZjbif4ACpKbjv7gNGaD8bUrlk8x+KRflttr22JEMRUbTUwwDQScyzPgedQHZT0xnx7ujw2jfVfExwYHwOsDTjLdJ2ebmeQIlJ7neo41s/DrsL3kl+W2lWvAga0tR3zueGr6GL78M3ifH0rGXrBC2aAR8uYcIA5gwV8zIE8onoh8u0Fca/ciF7j1uOzEnqcIm59sEXoGc0+z6+H45V1CvAvHcD7THztu669cnp+L0okAeIc6zjbM/24LgGM1gZk7jnRu1aQWoU9sfUOuhrmtaPIO3YY1KLLWZaEO5TKUbMY5zx8W9UJ6elpLwKXbsaZ4EFl7B4bMtDv0iRipKoDQT2sNQI9b1utXFdYisi+wzZ/ri/1m7QfDgEuvgUUEIJPq3DhX/5DWNqIXDOweC2wvIR90Oq3lDpdMIgD2r0dXvGdsEW5H6x6HLRJYU7C69VefO1x8Gde1ZFSJLfWS1jbCnhtOPxmpfv2LXOA2Xk2tvnwKKPFuZ/oRmwBwqRQDcKNeVQkYcOjtWVBuM/JuYw5b6isojIkYxyYAFn5K7ZBF10fea52y8QltAg6jnMqNHFBmGkQ1j+U43HMi2xMar1Nv0zGsf1s8nUsmUtPOOrbFIR8bHFDMB5zL13Gmr/kGlCkUzedTzzmzsaJXhYawnA3UmARpiYj5ooJZiUoxFRtK3X6pgNPv+IZVPcnwbOl6f+aBaO1CNvPW9n9LmCp01nuSaTRF2YxHqZ8DYQT6WsXT+RD6eUztwYLZ8rM+rcPxamv1VQzFUkzFXvkiVrySGQgJNvXHJAxiU3/NwiC03rSf05VBaPtu/Z7/B8Yn/w7eguloAAAAAElFTkSuQmCC'); } div.callout-note.callout-style-default .callout-title { background-color: #dae6fb } div.callout-important { border-left-color: #d9534f !important; } div.callout-important .callout-icon::before { background-image: url('data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAAAXNSR0IArs4c6QAAAERlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAA6ABAAMAAAABAAEAAKACAAQAAAABAAAAIKADAAQAAAABAAAAIAAAAACshmLzAAAEKklEQVRYCcVXTWhcVRS+575MJym48A+hSRFr00ySRQhURRfd2HYjk2SSTokuBCkU2o0LoSKKraKIBTcuFCoidGFD08nkBzdREbpQ1EDNIv8qSGMFUboImMSZd4/f9zJv8ibJMC8xJQfO3HPPPef7zrvvvnvviIkpC9nsw0UttFunbUhpFzFtarSd6WJkStVMw5xyVqYTvkwfzuf/5FgtkVoB0729j1rjXwThS7Vio+Mo6DNnvLfahoZ+i/o32lULuJ3NNiz7q6+pyAUkJaFF6JwaM2lUJlV0MlnQn5aTRbEu0SEqHUa0A4AdiGuB1kFXRfVyg5d87+Dg4DL6m2TLAub60ilj7A1Ec4odSAc8X95sHh7+ZRPCFo6Fnp7HfU/fBng/hi10CjCnWnJjsxvDNxWw0NfV6Rv5GgP3I3jGWXumdTD/3cbEOP2ZbOZp69yniG3FQ9z1jD7bnBu9Fc2tKGC2q+uAJOQHBDRiZX1x36o7fWBs7J9ownbtO+n0/qWkvW7UPIfc37WgT6ZGR++EOJyeQDSb9UB+DZ1G6DdLDzyS+b/kBCYGsYgJbSQHuThGKRcw5xdeQf8YdNHsc6ePXrlSYMBuSIAFTGAtQo+VuALo4BX83N190NWZWbynBjhOHsmNfFWLeL6v+ynsA58zDvvAC8j5PkbOcXCMg2PZFk3q8MjI7WAG/Dp9AwP7jdGBOOQkAvlFUB+irtm16I1Zw9YBcpGTGXYmk3kQIC/Cds55l+iMI3jqhjAuaoe+am2Jw5GT3Nbz3CkE12NavmzN5+erJW7046n/CH1RO/RVa8lBLozXk9uqykkGAyRXLWlLv5jyp4RFsG5vGVzpDLnIjTWgnRy2Rr+tDKvRc7Y8AyZq10jj8DqXdnIRNtFZb+t/ZRtXcDiVnzpqx8mPcDWxgARUqx0W1QB9MeUZiNrV4qP+Ehc+BpNgATsTX8ozYKL2NtFYAHc84fG7ndxUPr+AR/iQSns7uSUufAymwDOb2+NjK27lEFocm/EE2WpyIy/Hi66MWuMKJn8RvxIcj87IM5Vh9663ziW36kR0HNenXuxmfaD8JC7tfKbrhFr7LiZCrMjrzTeGx+PmkosrkNzW94ObzwocJ7A1HokLolY+AvkTiD/q1H0cN48c5EL8Crkttsa/AXQVDmutfyku0E7jShx49XqV3MFK8IryDhYVbj7Sj2P2eBxwcXoe8T8idsKKPRcnZw1b+slFTubwUwhktrfnAt7J++jwQtLZcm3sr9LQrjRzz6cfMv9aLvgmnAGvpoaGLxM4mAEaLV7iAzQ3oU0IvD5x9ix3yF2RAAuYAOO2f7PEFWCXZ4C9Pb2UsgDeVnFSpbFK7/IWu7TPTvBqzbGdCHOJQSxiEjt6IyZmxQyEJHv6xyQsYk//moVFsN2zP6fRImjfq7/n/wFDguUQFNEwugAAAABJRU5ErkJggg=='); } div.callout-important.callout-style-default .callout-title { background-color: #f7dddc } div.callout-warning { border-left-color: #f0ad4e !important; } div.callout-warning .callout-icon::before { background-image: url('data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAAAXNSR0IArs4c6QAAAERlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAA6ABAAMAAAABAAEAAKACAAQAAAABAAAAIKADAAQAAAABAAAAIAAAAACshmLzAAAETklEQVRYCeVWW2gcVRg+58yaTUnizqbipZeX4uWhBEniBaoUX1Ioze52t7sRq6APio9V9MEaoWlVsFasRq0gltaAPuxms8lu0gcviE/FFOstVbSIxgcv6SU7EZqmdc7v9+9mJtNks51NTUH84ed889/PP+cmxP+d5FIbMJmNbpREu4WUkiTtCicKny0l1pIKmBzovF2S+hIJHX8iEu3hZJ5lNZGqyRrGSIQpq15AzF28jgpeY6yk6GVdrfFqdrD6Iw+QlB8g0YS2g7dyQmXM/IDhBhT0UCiRf59lfqmmDvzRt6kByV/m4JjtzuaujMUM2c5Z2d6JdKrRb3K2q6mA+oYVz8JnDdKPmmNthzkAk/lN63sYPgevrguc72aZX/L9C6x09GYyxBgCX4NlvyGUHOKELlm5rXeR1kchuChJt4SSwyddZRXgvwMGvYo4QSlk3/zkHD8UHxwVJA6zjZZqP8v8kK8OWLnIZtLyCAJagYC4rTGW/9Pqj92N/c+LUaAj27movwbi19tk/whRCIE7Q9vyI6yvRpftAKVTdUjOW40X3h5OXsKCdmFcx0xlLJoSuQngnrJe7Kcjm4OMq9FlC7CMmScQANuNvjfP3PjGXDBaUQmbp296S5L4DrpbrHN1T87ZVEZVCzg1FF0Ft+dKrlLukI+/c9ENo+TvlTDbYFvuKPtQ9+l052rXrgKoWkDAFnvh0wTOmYn8R5f4k/jN/fZiCM1tQx9jQQ4ANhqG4hiL0qIFTGViG9DKB7GYzgubnpofgYRwO+DFjh0Zin2m4b/97EDkXkc+f6xYAPX0KK2I/7fUQuwzuwo/L3AkcjugPNixC8cHf0FyPjWlItmLxWw4Ou9YsQCr5fijMGoD/zpdRy95HRysyXA74MWOnscpO4j2y3HAVisw85hX5+AFBRSHt4ShfLFkIMXTqyKFc46xdzQM6XbAi702a7sy04J0+feReMFKp5q9esYLCqAZYw/k14E/xcLLsFElaornTuJB0svMuJINy8xkIYuL+xPAlWRceH6+HX7THJ0djLUom46zREu7tTkxwmf/FdOZ/sh6Q8qvEAiHpm4PJ4a/doJe0gH1t+aHRgCzOvBvJedEK5OFE5jpm4AGP2a8Dxe3gGJ/pAutug9Gp6he92CsSsWBaEcxGx0FHytmIpuqGkOpldqNYQK8cSoXvd+xLxXADw0kf6UkJNFtdo5MOgaLjiQOQHcn+A6h5NuL2s0qsC2LOM75PcF3yr5STuBSAcGG+meA14K/CI21HcS4LBT6tv0QAh8Dr5l93AhZzG5ZJ4VxAqdZUEl9z7WJ4aN+svMvwHHL21UKTd1mqvChH7/Za5xzXBBKrUcB0TQ+Ulgkfbi/H/YT5EptrGzsEK7tR1B7ln9BBwckYfMiuSqklSznIuoIIOM42MQO+QnduCoFCI0bpkzjCjddHPN/F+2Yu+sd9bKNpVwHhbS3LluK/0zgfwD0xYI5dXuzlQAAAABJRU5ErkJggg=='); } div.callout-warning.callout-style-default .callout-title { background-color: #fcefdc } div.callout-tip { border-left-color: #02b875 !important; } div.callout-tip .callout-icon::before { background-image: url('data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAAAXNSR0IArs4c6QAAAERlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAA6ABAAMAAAABAAEAAKACAAQAAAABAAAAIKADAAQAAAABAAAAIAAAAACshmLzAAADr0lEQVRYCe1XTWgTQRj9ZjZV8a9SPIkKgj8I1bMHsUWrqYLVg4Ue6v9BwZOxSYsIerFao7UiUryIqJcqgtpimhbBXoSCVxUFe9CTiogUrUp2Pt+3aUI2u5vdNh4dmMzOzHvvezuz8xNFM0mjnbXaNu1MvFWRXkXEyE6aYOYJpdW4IXuA4r0fo8qqSMDBU0v1HJUgVieAXxzCsdE/YJTdFcVIZQNMyhruOMJKXYFoLfIfIvVIMWdsrd+Rpd86ZmyzzjJmLStqRn0v8lzkb4rVIXvnpScOJuAn2ACC65FkPzEdEy4TPWRLJ2h7z4cArXzzaOdKlbOvKKX25Wl00jSnrwVxAg3o4dRxhO13RBSdNvH0xSARv3adTXbBdTf64IWO2vH0LT+cv4GR1DJt+DUItaQogeBX/chhbTBxEiZ6gftlDNXTrvT7co4ub5A6gp9HIcHvzTa46OS5fBeP87Qm0fQkr4FsYgVQ7Qg+ZayaDg9jhg1GkWj8RG6lkeSacrrHgDaxdoBiZPg+NXV/KifMuB6//JmYH4CntVEHy/keA6x4h4CU5oFy8GzrBS18cLJMXcljAKB6INjWsRcuZBWVaS3GDrqB7rdapVIeA+isQ57Eev9eCqzqOa81CY05VLd6SamW2wA2H3SiTbnbSxmzfp7WtKZkqy4mdyAlGx7ennghYf8voqp9cLSgKdqNfa6RdRsAAkPwRuJZNbpByn+RrJi1RXTwdi8RQF6ymDwGMAtZ6TVE+4uoKh+MYkcLsT0Hk8eAienbiGdjJHZTpmNjlbFJNKDVAp2fJlYju6IreQxQ08UJDNYdoLSl6AadO+fFuCQqVMB1NJwPm69T04Wv5WhfcWyfXQB+wXRs1pt+nCknRa0LVzSA/2B+a9+zQJadb7IyyV24YAxKp2Jqs3emZTuNnKxsah+uabKbMk7CbTgJx/zIgQYErIeTKRQ9yD9wxVof5YolPHqaWo7TD6tJlh7jQnK5z2n3+fGdggIOx2kaa2YI9QWarc5Ce1ipNWMKeSG4DysFF52KBmTNMmn5HqCFkwy34rDg05gDwgH3bBi+sgFhN/e8QvRn8kbamCOhgrZ9GJhFDgfcMHzFb6BAtjKpFhzTjwv1KCVuxHvCbsSiEz4CANnj84cwHdFXAbAOJ4LTSAawGWFn5tDhLMYz6nWeU2wJfIhmIJBefcd/A5FWQWGgrWzyORZ3Q6HuV+Jf0Bj+BTX69fm1zWgK7By1YTXchFDORywnfQ7GpzOo6S+qECrsx2ifVQAAAABJRU5ErkJggg=='); } div.callout-tip.callout-style-default .callout-title { background-color: #ccf1e3 } div.callout-caution { border-left-color: #fd7e14 !important; } div.callout-caution .callout-icon::before { background-image: url('data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAAAXNSR0IArs4c6QAAAERlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAA6ABAAMAAAABAAEAAKACAAQAAAABAAAAIKADAAQAAAABAAAAIAAAAACshmLzAAACV0lEQVRYCdVWzWoUQRCuqp2ICBLJXgITZL1EfQDBW/bkzUMUD7klD+ATSHBEfAIfQO+iXsWDxJsHL96EHAwhgzlkg8nBg25XWb0zIb0zs9muYYWkoKeru+vn664fBqElyZNuyh167NXJ8Ut8McjbmEraKHkd7uAnAFku+VWdb3reSmRV8PKSLfZ0Gjn3a6Xlcq9YGb6tADjn+lUfTXtVmaZ1KwBIvFI11rRXlWlatwIAAv2asaa9mlB9wwygiDX26qaw1yYPzFXg2N1GgG0FMF8Oj+VIx7E/03lHx8UhvYyNZLN7BwSPgekXXLribw7w5/c8EF+DBK5idvDVYtEEwMeYefjjLAdEyQ3M9nfOkgnPTEkYU+sxMq0BxNR6jExrAI31H1rzvLEfRIdgcv1XEdj6QTQAS2wtstEALLG1yEZ3QhH6oDX7ExBSFEkFINXH98NTrme5IOaaA7kIfiu2L8A3qhH9zRbukdCqdsA98TdElyeMe5BI8Rs2xHRIsoTSSVFfCFCWGPn9XHb4cdobRIWABNf0add9jakDjQJpJ1bTXOJXnnRXHRf+dNL1ZV1MBRCXhMbaHqGI1JkKIL7+i8uffuP6wVQAzO7+qVEbF6NbS0LJureYcWXUUhH66nLR5rYmva+2tjRFtojkM2aD76HEGAD3tPtKM309FJg5j/K682ywcWJ3PASCcycH/22u+Bh7Aa0ehM2Fu4z0SAE81HF9RkB21c5bEn4Dzw+/qNOyXr3DCTQDMBOdhi4nAgiFDGCinIa2owCEChUwD8qzd03PG+qdW/4fDzjUMcE1ZpIAAAAASUVORK5CYII='); } div.callout-caution.callout-style-default .callout-title { background-color: #ffe5d0 } </style> <style type="text/css"> .reveal div.sourceCode { margin: 0; overflow: auto; } .reveal div.hanging-indent { margin-left: 1em; text-indent: -1em; } .reveal .slide:not(.center) { height: 100%; } .reveal .slide.scrollable { overflow-y: auto; } .reveal .footnotes { height: 100%; overflow-y: auto; } .reveal .slide .absolute { position: absolute; display: block; } .reveal .footnotes ol { counter-reset: ol; list-style-type: none; margin-left: 0; } .reveal .footnotes ol li:before { counter-increment: ol; content: counter(ol) ". "; } .reveal .footnotes ol li > p:first-child { display: inline-block; } .reveal .slide ul, .reveal .slide ol { margin-bottom: 0.5em; } .reveal .slide ul li, .reveal .slide ol li { margin-top: 0.4em; margin-bottom: 0.2em; } .reveal .slide ul[role="tablist"] li { margin-bottom: 0; } .reveal .slide ul li > *:first-child, .reveal .slide ol li > *:first-child { margin-block-start: 0; } .reveal .slide ul li > *:last-child, .reveal .slide ol li > *:last-child { margin-block-end: 0; } .reveal .slide .columns:nth-child(3) { margin-block-start: 0.8em; } .reveal blockquote { box-shadow: none; } .reveal .tippy-content>* { margin-top: 0.2em; margin-bottom: 0.7em; } .reveal .tippy-content>*:last-child { margin-bottom: 0.2em; } .reveal .slide > img.stretch.quarto-figure-center, .reveal .slide > img.r-stretch.quarto-figure-center { display: block; margin-left: auto; margin-right: auto; } .reveal .slide > img.stretch.quarto-figure-left, .reveal .slide > img.r-stretch.quarto-figure-left { display: block; margin-left: 0; margin-right: auto; } .reveal .slide > img.stretch.quarto-figure-right, .reveal .slide > img.r-stretch.quarto-figure-right { display: block; margin-left: auto; margin-right: 0; } </style> </head> <body class="quarto-dark"> <div class="reveal"> <div class="slides"> <section id="title-slide" class="quarto-title-block center"> <h1 class="title">Hugging Face Accelerate: Making device-agnostic ML training and inference easy at scale</h1> <div class="quarto-title-authors"> </div> </section> <section id="who-am-i" class="slide level2"> <h2>Who am I?</h2> <ul> <li>Zachary Mueller</li> <li>Technical Lead for the 🤗 Accelerate project</li> <li>Maintain the <code>transformers</code> Trainer</li> <li>API design geek</li> </ul> </section> <section id="what-is-accelerate" class="slide level2"> <h2>What is 🤗 Accelerate?</h2> <ul> <li>A training framework</li> <li>An inference framework</li> <li>A command-line interface</li> </ul> </section> <section id="a-training-framework" class="slide level2"> <h2>A Training Framework</h2> <ul> <li>Powered by PyTorch</li> <li>Change a few lines of code, gain device <em>and</em> hardware-agnostic capabilities</li> <li>Low-code, with minimal magic aimed at easy hackability and use without high-level abstractions</li> <li>We handle the intracies so you don’t have to</li> </ul> </section> <section id="a-training-framework-1" class="slide level2"> <h2>A Training Framework</h2> <div style="font-size: 70%;"> <ul> <li>Support for any hardware-accelerator on the market: <ul> <li>CPU, GPU, TPU, XPU, NPU, MLU</li> </ul></li> <li>Automatic mixed-precision training <em>safely</em> in whatever fashion you may choose: <ul> <li>FP16, BF16, FP8 (through either <code>TransformerEngine</code> or <code>MS-AMP</code>)</li> </ul></li> <li>Automatic and efficient gradient accumulation</li> <li>Support for quantization through <code>bitsandbytes</code></li> <li>Support your favorite experiment trackers (<code>aim</code>, <code>clearml</code>, <code>comet_ml</code>, <code>dvc-lite</code>, <code>ml-flow</code>, <code>tensorboard</code>, <code>wandb</code>)</li> <li>Easy to configure plugin or YAML-level API for setting up advanced frameworks like <code>FSDP</code>, <code>DeepSpeed</code>, and <code>Megatron-LM</code></li> </ul> </div> </section> <section id="low-code" class="slide level2"> <h2>Low-Code</h2> <div style="font-size: 70%;"> <ul> <li>Biggest friction with “wrapper” libraries is control of your code</li> <li>By being minimally intrusive, your code just “works” while still giving you complete control</li> </ul> </div> <div style="font-size: 60%;padding-left:15%;padding-top:0%;padding-right:20%"> <div class="sourceCode" id="cb1"><pre class="sourceCode numberSource diff number-lines code-with-copy"><code class="sourceCode diff"><span id="cb1-1"><a href="#cb1-1"></a> import torch</span> <span id="cb1-2"><a href="#cb1-2"></a> import torch.nn.functional as F</span> <span id="cb1-3"><a href="#cb1-3"></a> from datasets import load_dataset</span> <span id="cb1-4"><a href="#cb1-4"></a><span class="va">+ from accelerate import Accelerator</span></span> <span id="cb1-5"><a href="#cb1-5"></a></span> <span id="cb1-6"><a href="#cb1-6"></a><span class="va">+ accelerator = Accelerator()</span></span> <span id="cb1-7"><a href="#cb1-7"></a><span class="st">- device = 'cpu'</span></span> <span id="cb1-8"><a href="#cb1-8"></a><span class="va">+ device = accelerator.device</span></span> <span id="cb1-9"><a href="#cb1-9"></a></span> <span id="cb1-10"><a href="#cb1-10"></a> model = torch.nn.Transformer().to(device)</span> <span id="cb1-11"><a href="#cb1-11"></a> optimizer = torch.optim.Adam(model.parameters())</span> <span id="cb1-12"><a href="#cb1-12"></a> dataset = load_dataset('my_dataset')</span> <span id="cb1-13"><a href="#cb1-13"></a> data = torch.utils.data.DataLoader(dataset, shuffle=True)</span> <span id="cb1-14"><a href="#cb1-14"></a></span> <span id="cb1-15"><a href="#cb1-15"></a><span class="va">+ model, optimizer, dataloader = accelerator.prepare(model, optimizer, dataloader)</span></span> <span id="cb1-16"><a href="#cb1-16"></a></span> <span id="cb1-17"><a href="#cb1-17"></a> model.train()</span> <span id="cb1-18"><a href="#cb1-18"></a> for epoch in range(10):</span> <span id="cb1-19"><a href="#cb1-19"></a> for source, targets in dataloader:</span> <span id="cb1-20"><a href="#cb1-20"></a> source, targets = source.to(device), targets.to(device)</span> <span id="cb1-21"><a href="#cb1-21"></a> optimizer.zero_grad()</span> <span id="cb1-22"><a href="#cb1-22"></a> output = model(source)</span> <span id="cb1-23"><a href="#cb1-23"></a> loss = F.cross_entropy(output, targets)</span> <span id="cb1-24"><a href="#cb1-24"></a><span class="st">- loss.backward()</span></span> <span id="cb1-25"><a href="#cb1-25"></a><span class="va">+ accelerator.backward(loss)</span></span> <span id="cb1-26"><a href="#cb1-26"></a> optimizer.step()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> </div> </section> <section id="easy-to-integrate" class="slide level2"> <h2>Easy to integrate</h2> <div style="font-size: 70%;"> <ul> <li>Due to the low-code nature, it’s trivial to integrate into existing PyTorch frameworks: <ol type="1"> <li>Create an <code>Accelerator</code></li> </ol></li> </ul> </div> <div style="font-size: 60%;padding-left:15%;padding-top:0%;padding-right:20%"> <div class="sourceCode" id="cb2"><pre class="sourceCode numberSource diff number-lines code-with-copy"><code class="sourceCode diff"><span id="cb2-1"><a href="#cb2-1"></a> import torch</span> <span id="cb2-2"><a href="#cb2-2"></a> import torch.nn.functional as F</span> <span id="cb2-3"><a href="#cb2-3"></a> from datasets import load_dataset</span> <span id="cb2-4"><a href="#cb2-4"></a><span class="va">+ from accelerate import Accelerator</span></span> <span id="cb2-5"><a href="#cb2-5"></a></span> <span id="cb2-6"><a href="#cb2-6"></a><span class="va">+ accelerator = Accelerator()</span></span> <span id="cb2-7"><a href="#cb2-7"></a> device = 'cpu'</span> <span id="cb2-8"><a href="#cb2-8"></a></span> <span id="cb2-9"><a href="#cb2-9"></a> model = torch.nn.Transformer().to(device)</span> <span id="cb2-10"><a href="#cb2-10"></a> optimizer = torch.optim.Adam(model.parameters())</span> <span id="cb2-11"><a href="#cb2-11"></a> dataset = load_dataset('my_dataset')</span> <span id="cb2-12"><a href="#cb2-12"></a> data = torch.utils.data.DataLoader(dataset, shuffle=True)</span> <span id="cb2-13"><a href="#cb2-13"></a></span> <span id="cb2-14"><a href="#cb2-14"></a> model.train()</span> <span id="cb2-15"><a href="#cb2-15"></a> for epoch in range(10):</span> <span id="cb2-16"><a href="#cb2-16"></a> for source, targets in dataloader:</span> <span id="cb2-17"><a href="#cb2-17"></a> source, targets = source.to(device), targets.to(device)</span> <span id="cb2-18"><a href="#cb2-18"></a> optimizer.zero_grad()</span> <span id="cb2-19"><a href="#cb2-19"></a> output = model(source)</span> <span id="cb2-20"><a href="#cb2-20"></a> loss = F.cross_entropy(output, targets)</span> <span id="cb2-21"><a href="#cb2-21"></a> loss.backward()</span> <span id="cb2-22"><a href="#cb2-22"></a> optimizer.step()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> </div> </section> <section id="easy-to-integrate-1" class="slide level2"> <h2>Easy to integrate</h2> <div style="font-size: 70%;"> <ul> <li>Due to the low-code nature, it’s trivial to integrate into existing PyTorch frameworks: <ol start="2" type="1"> <li>Wrap your PyTorch objects with <code>accelerator.prepare</code> and remove device-placements</li> </ol></li> </ul> </div> <div style="font-size: 60%;padding-left:15%;padding-top:0%;padding-right:20%"> <div class="sourceCode" id="cb3"><pre class="sourceCode numberSource diff number-lines code-with-copy"><code class="sourceCode diff"><span id="cb3-1"><a href="#cb3-1"></a> import torch</span> <span id="cb3-2"><a href="#cb3-2"></a> import torch.nn.functional as F</span> <span id="cb3-3"><a href="#cb3-3"></a> from datasets import load_dataset</span> <span id="cb3-4"><a href="#cb3-4"></a> from accelerate import Accelerator</span> <span id="cb3-5"><a href="#cb3-5"></a></span> <span id="cb3-6"><a href="#cb3-6"></a> accelerator = Accelerator()</span> <span id="cb3-7"><a href="#cb3-7"></a><span class="st">- device = 'cpu'</span></span> <span id="cb3-8"><a href="#cb3-8"></a></span> <span id="cb3-9"><a href="#cb3-9"></a> model = torch.nn.Transformer().to(device)</span> <span id="cb3-10"><a href="#cb3-10"></a> optimizer = torch.optim.Adam(model.parameters())</span> <span id="cb3-11"><a href="#cb3-11"></a> dataset = load_dataset('my_dataset')</span> <span id="cb3-12"><a href="#cb3-12"></a> data = torch.utils.data.DataLoader(dataset, shuffle=True)</span> <span id="cb3-13"><a href="#cb3-13"></a></span> <span id="cb3-14"><a href="#cb3-14"></a><span class="va">+ model, optimizer, dataloader = accelerator.prepare(model, optimizer, dataloader)</span></span> <span id="cb3-15"><a href="#cb3-15"></a></span> <span id="cb3-16"><a href="#cb3-16"></a> model.train()</span> <span id="cb3-17"><a href="#cb3-17"></a> for epoch in range(10):</span> <span id="cb3-18"><a href="#cb3-18"></a> for source, targets in dataloader:</span> <span id="cb3-19"><a href="#cb3-19"></a> source, targets = source.to(device), targets.to(device)</span> <span id="cb3-20"><a href="#cb3-20"></a> optimizer.zero_grad()</span> <span id="cb3-21"><a href="#cb3-21"></a> output = model(source)</span> <span id="cb3-22"><a href="#cb3-22"></a> loss = F.cross_entropy(output, targets)</span> <span id="cb3-23"><a href="#cb3-23"></a> loss.backward()</span> <span id="cb3-24"><a href="#cb3-24"></a> optimizer.step()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> </div> </section> <section id="easy-to-integrate-2" class="slide level2"> <h2>Easy to integrate</h2> <div style="font-size: 70%;"> <ul> <li>Due to the low-code nature, it’s trivial to integrate into existing PyTorch frameworks: <ol start="3" type="1"> <li>Use <code>accelerator.backward</code> for the backward pass</li> </ol></li> </ul> </div> <div style="font-size: 60%;padding-left:15%;padding-top:0%;padding-right:20%"> <div class="sourceCode" id="cb4"><pre class="sourceCode numberSource diff number-lines code-with-copy"><code class="sourceCode diff"><span id="cb4-1"><a href="#cb4-1"></a> import torch</span> <span id="cb4-2"><a href="#cb4-2"></a> import torch.nn.functional as F</span> <span id="cb4-3"><a href="#cb4-3"></a> from datasets import load_dataset</span> <span id="cb4-4"><a href="#cb4-4"></a> from accelerate import Accelerator</span> <span id="cb4-5"><a href="#cb4-5"></a></span> <span id="cb4-6"><a href="#cb4-6"></a> accelerator = Accelerator()</span> <span id="cb4-7"><a href="#cb4-7"></a></span> <span id="cb4-8"><a href="#cb4-8"></a> model = torch.nn.Transformer().to(device)</span> <span id="cb4-9"><a href="#cb4-9"></a> optimizer = torch.optim.Adam(model.parameters())</span> <span id="cb4-10"><a href="#cb4-10"></a> dataset = load_dataset('my_dataset')</span> <span id="cb4-11"><a href="#cb4-11"></a> data = torch.utils.data.DataLoader(dataset, shuffle=True)</span> <span id="cb4-12"><a href="#cb4-12"></a></span> <span id="cb4-13"><a href="#cb4-13"></a> model, optimizer, dataloader = accelerator.prepare(model, optimizer, dataloader)</span> <span id="cb4-14"><a href="#cb4-14"></a></span> <span id="cb4-15"><a href="#cb4-15"></a> model.train()</span> <span id="cb4-16"><a href="#cb4-16"></a> for epoch in range(10):</span> <span id="cb4-17"><a href="#cb4-17"></a> for source, targets in dataloader:</span> <span id="cb4-18"><a href="#cb4-18"></a> source, targets = source.to(device), targets.to(device)</span> <span id="cb4-19"><a href="#cb4-19"></a> optimizer.zero_grad()</span> <span id="cb4-20"><a href="#cb4-20"></a> output = model(source)</span> <span id="cb4-21"><a href="#cb4-21"></a> loss = F.cross_entropy(output, targets)</span> <span id="cb4-22"><a href="#cb4-22"></a><span class="st">- loss.backward()</span></span> <span id="cb4-23"><a href="#cb4-23"></a><span class="va">+ accelerator.backward(loss)</span></span> <span id="cb4-24"><a href="#cb4-24"></a> optimizer.step()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> </div> </section> <section id="but-what-about-inference" class="slide level2"> <h2>But what about inference?</h2> <ul> <li>🤗 Accelerate is not just for training, and has helped make the GPU-Poor take control of the narrative</li> <li>Using tools like Big Model Inference, users with <em>tiny</em> compute can run large models locally</li> <li>Started with the boom of stable diffusion, and now has scaled to having the ability to run huge LLMs locally with a single graphics card</li> </ul> </section> <section id="how-does-it-work" class="slide level2"> <h2>How does it work?</h2> <ul> <li>PyTorch introduced <code>device="meta"</code></li> <li>🤗 Accelerate introduced <code>device_map="auto"</code></li> </ul> <div style="padding-left:15%;padding-right:20%"> <video id="video_shortcode_videojs_video1" width="800" height="400" class="video-js vjs-default-skin " controls="" preload="auto" data-setup="{}" title=""><source src="big_model_visualization.mp4"></video> </div> </section> <section id="a-cli-interface" class="slide level2"> <h2>A CLI Interface</h2> <ul> <li><code>accelerate config</code> <ul> <li>Configure the environment</li> </ul></li> <li><code>accelerate launch</code> <ul> <li>How to run your script</li> </ul></li> </ul> </section> <section id="launching-distributed-training-is-hard" class="slide level2"> <h2>Launching distributed training is hard</h2> <div style="padding-top:0%;padding-left:10%;padding-right:15%;padding-bottom:0%"> <div class="sourceCode" id="cb5"><pre class="sourceCode numberSource bash number-lines code-with-copy"><code class="sourceCode bash"><span id="cb5-1"><a href="#cb5-1"></a><span class="ex">python</span> script.py</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> </div> <div style="padding-left:50%;padding-bottom:0%;padding-top:0%;"> <p>vs.</p> </div> <p><br></p> <div style="padding-top:0%;padding-left:10%;padding-right:15%;padding-bottom:0%"> <div class="sourceCode" id="cb6"><pre class="sourceCode numberSource bash number-lines code-with-copy"><code class="sourceCode bash"><span id="cb6-1"><a href="#cb6-1"></a><span class="ex">torchrun</span> <span class="at">--nnodes</span><span class="op">=</span>1 <span class="at">--nproc_per_node</span><span class="op">=</span>2 script.py</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> </div> <div style="padding-left:50%;padding-bottom:0%;padding-top:0%;"> <p>vs.</p> </div> <p><br></p> <div style="padding-top:0%;padding-left:10%;padding-right:15%;padding-bottom:0%"> <div class="sourceCode" id="cb7"><pre class="sourceCode numberSource bash number-lines code-with-copy"><code class="sourceCode bash"><span id="cb7-1"><a href="#cb7-1"></a><span class="ex">deepspeed</span> <span class="at">--num_gpus</span><span class="op">=</span>2 script.py</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> <p><br></p> </div> <p>How can we make this better?</p> </section> <section id="accelerate-launch" class="slide level2"> <h2><code>accelerate launch</code></h2> <div style="padding-top:0%;padding-left:5%;padding-right:10%;padding-bottom:0%"> <div class="sourceCode" id="cb8"><pre class="sourceCode numberSource bash number-lines code-with-copy"><code class="sourceCode bash"><span id="cb8-1"><a href="#cb8-1"></a><span class="ex">accelerate</span> launch script.py</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> <p><br></p> <div class="sourceCode" id="cb9"><pre class="sourceCode numberSource bash number-lines code-with-copy"><code class="sourceCode bash"><span id="cb9-1"><a href="#cb9-1"></a><span class="ex">accelerate</span> launch <span class="at">--multi_gpu</span> <span class="at">--num_processes</span> 2 script.py</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> <p><br></p> <div class="sourceCode" id="cb10"><pre class="sourceCode numberSource bash number-lines code-with-copy"><code class="sourceCode bash"><span id="cb10-1"><a href="#cb10-1"></a><span class="ex">accelerate</span> launch <span class="dt">\</span></span> <span id="cb10-2"><a href="#cb10-2"></a> <span class="at">--multi_gpu</span> <span class="dt">\ </span></span> <span id="cb10-3"><a href="#cb10-3"></a> <span class="ex">--use_deepspeed</span> <span class="dt">\</span></span> <span id="cb10-4"><a href="#cb10-4"></a> <span class="at">--num_processes</span> 2 <span class="dt">\</span></span> <span id="cb10-5"><a href="#cb10-5"></a> script.py</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> </div> </section> <section id="accelerate-config" class="slide level2"> <h2><code>accelerate config</code></h2> <ul> <li>Rely on <code>config.yaml</code> files</li> <li>Choose to either running <code>accelerate config</code> or write your own:</li> </ul> <div class="columns" style="font-size: 60%;padding-left:5%;padding-right:5%"> <div class="column" style="width:40%;"> <div class="code-with-filename"> <div class="code-with-filename-file"> <pre><strong>ddp_config.yaml</strong></pre> </div> <div class="sourceCode" id="cb11"><pre class="sourceCode numberSource yaml number-lines code-with-copy"><code class="sourceCode yaml"><span id="cb11-1"><a href="#cb11-1"></a><span class="fu">compute_environment</span><span class="kw">:</span><span class="at"> LOCAL_MACHINE</span></span> <span id="cb11-2"><a href="#cb11-2"></a><span class="fu">distributed_type</span><span class="kw">:</span><span class="at"> MULTI_GPU</span></span> <span id="cb11-3"><a href="#cb11-3"></a><span class="fu">main_training_function</span><span class="kw">:</span><span class="at"> main</span></span> <span id="cb11-4"><a href="#cb11-4"></a><span class="fu">mixed_precision</span><span class="kw">:</span><span class="at"> bf16</span></span> <span id="cb11-5"><a href="#cb11-5"></a><span class="fu">num_machines</span><span class="kw">:</span><span class="at"> </span><span class="dv">1</span></span> <span id="cb11-6"><a href="#cb11-6"></a><span class="fu">num_processes</span><span class="kw">:</span><span class="at"> </span><span class="dv">8</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> </div> </div><div class="column" style="width:40%;"> <div class="code-with-filename"> <div class="code-with-filename-file"> <pre><strong>fsdp_config.yaml</strong></pre> </div> <div class="sourceCode" id="cb12"><pre class="sourceCode numberSource yaml number-lines code-with-copy"><code class="sourceCode yaml"><span id="cb12-1"><a href="#cb12-1"></a><span class="fu">compute_environment</span><span class="kw">:</span><span class="at"> LOCAL_MACHINE</span></span> <span id="cb12-2"><a href="#cb12-2"></a><span class="fu">distributed_type</span><span class="kw">:</span><span class="at"> FSDP</span></span> <span id="cb12-3"><a href="#cb12-3"></a><span class="fu">fsdp_config</span><span class="kw">:</span></span> <span id="cb12-4"><a href="#cb12-4"></a><span class="at"> </span><span class="fu">fsdp_auto_wrap_policy</span><span class="kw">:</span><span class="at"> TRANSFORMER_BASED_WRAP</span></span> <span id="cb12-5"><a href="#cb12-5"></a><span class="at"> </span><span class="fu">fsdp_backward_prefetch</span><span class="kw">:</span><span class="at"> BACKWARD_PRE</span></span> <span id="cb12-6"><a href="#cb12-6"></a><span class="at"> </span><span class="fu">fsdp_cpu_ram_efficient_loading</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span> <span id="cb12-7"><a href="#cb12-7"></a><span class="at"> </span><span class="fu">fsdp_forward_prefetch</span><span class="kw">:</span><span class="at"> </span><span class="ch">false</span></span> <span id="cb12-8"><a href="#cb12-8"></a><span class="at"> </span><span class="fu">fsdp_offload_params</span><span class="kw">:</span><span class="at"> </span><span class="ch">false</span></span> <span id="cb12-9"><a href="#cb12-9"></a><span class="at"> </span><span class="fu">fsdp_sharding_strategy</span><span class="kw">:</span><span class="at"> FULL_SHARD</span></span> <span id="cb12-10"><a href="#cb12-10"></a><span class="at"> </span><span class="fu">fsdp_state_dict_type</span><span class="kw">:</span><span class="at"> SHARDED_STATE_DICT</span></span> <span id="cb12-11"><a href="#cb12-11"></a><span class="at"> </span><span class="fu">fsdp_sync_module_states</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span> <span id="cb12-12"><a href="#cb12-12"></a><span class="at"> </span><span class="fu">fsdp_use_orig_params</span><span class="kw">:</span><span class="at"> </span><span class="ch">false</span></span> <span id="cb12-13"><a href="#cb12-13"></a><span class="fu">main_training_function</span><span class="kw">:</span><span class="at"> main</span></span> <span id="cb12-14"><a href="#cb12-14"></a><span class="fu">mixed_precision</span><span class="kw">:</span><span class="at"> bf16</span></span> <span id="cb12-15"><a href="#cb12-15"></a><span class="fu">num_machines</span><span class="kw">:</span><span class="at"> </span><span class="dv">1</span></span> <span id="cb12-16"><a href="#cb12-16"></a><span class="fu">num_processes</span><span class="kw">:</span><span class="at"> </span><span class="dv">8</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> </div> </div> </div> </section> <section id="now-that-youre-up-to-speed-whats-new" class="title-slide slide level1 center"> <h1>Now that you’re up to speed, what’s new?</h1> </section> <section> <section id="weve-had-a-busy-last-year-and-so-has-the-ml-community" class="title-slide slide level1 center"> <h1>We’ve had a busy last year, and so has the ML Community!</h1> </section> <section id="new-training-techniques" class="slide level2"> <h2>New training techniques</h2> <ul> <li>Quantization has taken the field by storm</li> <li>New ideas such as FSDP + QLoRA to train huge models on tiny compute!</li> <li>New precision backends as we train natively on smaller precision</li> <li>Optimizing futher how much we can push on a single machine through efficient RAM and timing techniques</li> </ul> </section> <section id="larger-compute-landscape" class="slide level2"> <h2>Larger compute landscape</h2> <ul> <li>As we search for alternatives to NVIDIA, new compilers rise: <ul> <li>XPU (Intel)</li> <li>NPU (Intel)</li> <li>MLU (Cambricon)</li> </ul></li> </ul> <p>All of which are supported by 🤗 Accelerate</p> </section> <section id="lower-abstractions" class="slide level2"> <h2>Lower abstractions</h2> <ul> <li>While the <code>Accelerator</code> was great, needed better abstractions focused on controlling behaviors</li> <li>Introduced the <code>PartialState</code></li> </ul> <div style="padding-left:10%;padding-top:0%;padding-right:15%"> <div class="sourceCode" id="cb13"><pre class="sourceCode numberSource python number-lines code-with-copy"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1"></a><span class="im">from</span> accelerate <span class="im">import</span> PartialState</span> <span id="cb13-2"><a href="#cb13-2"></a></span> <span id="cb13-3"><a href="#cb13-3"></a><span class="cf">if</span> PartialState().is_main_process:</span> <span id="cb13-4"><a href="#cb13-4"></a> <span class="co"># Run on only 1 device</span></span> <span id="cb13-5"><a href="#cb13-5"></a></span> <span id="cb13-6"><a href="#cb13-6"></a><span class="cf">with</span> PartialState().main_process_first:</span> <span id="cb13-7"><a href="#cb13-7"></a> <span class="co"># Useful for dataset processing</span></span> <span id="cb13-8"><a href="#cb13-8"></a></span> <span id="cb13-9"><a href="#cb13-9"></a><span class="co"># Device-agnostic without the bulk of the `Accelerator`</span></span> <span id="cb13-10"><a href="#cb13-10"></a>device <span class="op">=</span> PartialState().device</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> </div> </section> <section id="faster-and-better-inference-alternatives" class="slide level2"> <h2>Faster and better inference alternatives</h2> <div style="font-size:70%"> <ul> <li><code>PiPPy</code> gives us efficient pipeline-parallelism in distributed environments to increase throughput while keeping a simple torch-bound API</li> <li>Rather than having to wait for each GPU, every GPU can be busy in parallel</li> <li>Will be critical as larger LLMs take hold and more than one computer is needed</li> </ul> </div> <div style="font-size:60%;padding-left:19%;padding-top:0%;padding-right:24%;"> <div class="sourceCode" id="cb14"><pre class="sourceCode numberSource python number-lines code-with-copy"><code class="sourceCode python"><span id="cb14-1"><a href="#cb14-1"></a><span class="im">import</span> torch</span> <span id="cb14-2"><a href="#cb14-2"></a><span class="im">from</span> transformers <span class="im">import</span> AutoModelForSequenceClassification</span> <span id="cb14-3"><a href="#cb14-3"></a></span> <span id="cb14-4"><a href="#cb14-4"></a><span class="im">from</span> accelerate <span class="im">import</span> PartialState, prepare_pippy</span> <span id="cb14-5"><a href="#cb14-5"></a></span> <span id="cb14-6"><a href="#cb14-6"></a>model <span class="op">=</span> AutoModelForSequenceClassification.from_pretrained(<span class="st">"gpt2"</span>)</span> <span id="cb14-7"><a href="#cb14-7"></a>model.<span class="bu">eval</span>()</span> <span id="cb14-8"><a href="#cb14-8"></a></span> <span id="cb14-9"><a href="#cb14-9"></a><span class="bu">input</span> <span class="op">=</span> torch.randint(</span> <span id="cb14-10"><a href="#cb14-10"></a> low<span class="op">=</span><span class="dv">0</span>,</span> <span id="cb14-11"><a href="#cb14-11"></a> high<span class="op">=</span>model.config.vocab_size,</span> <span id="cb14-12"><a href="#cb14-12"></a> size<span class="op">=</span>(<span class="dv">2</span>, <span class="dv">1024</span>), <span class="co"># bs x seq_len</span></span> <span id="cb14-13"><a href="#cb14-13"></a> device<span class="op">=</span><span class="st">"cpu"</span>,</span> <span id="cb14-14"><a href="#cb14-14"></a>)</span> <span id="cb14-15"><a href="#cb14-15"></a></span> <span id="cb14-16"><a href="#cb14-16"></a>model <span class="op">=</span> prepare_pippy(model, split_points<span class="op">=</span><span class="st">"auto"</span>, example_args<span class="op">=</span>(<span class="bu">input</span>,))</span> <span id="cb14-17"><a href="#cb14-17"></a></span> <span id="cb14-18"><a href="#cb14-18"></a><span class="cf">with</span> torch.no_grad():</span> <span id="cb14-19"><a href="#cb14-19"></a> output <span class="op">=</span> model(<span class="bu">input</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> </div> </section></section> <section> <section id="adoption-accelerate-in-the-ecosystem" class="title-slide slide level1 center"> <h1>Adoption: Accelerate in the ecosystem</h1> </section> <section id="accelerate-in-the-ecosystem" class="slide level2"> <h2>Accelerate in the Ecosystem</h2> <ul> <li>Many of the frameworks you use daily already rely on 🤗 Accelerate! <ul> <li>Nearly all of 🤗</li> <li><code>axolotl</code></li> <li><code>fastai</code></li> <li><code>FastChat</code></li> <li><code>lucidrains</code></li> <li><code>kornia</code></li> </ul></li> </ul> </section> <section id="accelerate-in-the-ecosystem-1" class="slide level2"> <h2>Accelerate in the Ecosystem</h2> <div style="font-size: 70%;"> <ul> <li>Started as a way to isolate out distributed code on TPU and <code>DistributedDataParallelism</code></li> </ul> </div> <div style="padding-left: 30%"> <p><img data-src="sylvain_tweet.JPG" style="width:70.0%"></p> </div> </section> <section id="accelerate-in-the-ecosystem-2" class="slide level2"> <h2>Accelerate in the Ecosystem</h2> <div style="font-size: 70%;"> <ul> <li>Now is the backbone of some of the largest PyTorch training frameworks in the ecosystem</li> </ul> </div> <div style="padding-left: 30%;"> <p><img data-src="hf_trainer.JPG" style="width:70.0%"></p> </div> </section></section> <section id="whats-next" class="title-slide slide level1 center"> <h1>What’s next?</h1> </section> <section id="elevating-the-community" class="title-slide slide level1 center"> <h1>Elevating the community</h1> <ul> <li>Now that more advanced training techniques are reachable (FSDP, DeepSpeed, etc), we need to focus on educating the community on how to use it best</li> <li>Goes beyond how to use the <code>Trainer</code> or <code>Accelerator</code>, but how to use <em>what</em> where</li> <li>Keep Accelerate as a tool for the community to utilize when new techniques come out and play with, to push new ideas to scale quickly</li> </ul> </section> <section id="soon" class="title-slide slide level1 center"> <h1>1.0.0: Soon!</h1> <ul> <li>Tried and battle-tested by over 7M users/month | 110M+ total downloads</li> <li>As we’ve been stable for over a year now, we’re near ready to release 1.0.0</li> </ul> </section> <section id="thanks-for-joining" class="title-slide slide level1 center"> <h1>Thanks for joining!</h1> <div style="font-size: 70%;"> <ul> <li><a href="https://hf.co/docs/accelerate">🤗 Accelerate documentation</a></li> <li><a href="https://huggingface.co/docs/accelerate/basic_tutorials/launch">Launching distributed code</a></li> <li><a href="https://huggingface.co/docs/accelerate/basic_tutorials/notebook">Distributed code and Jupyter Notebooks</a></li> <li><a href="https://huggingface.co/docs/accelerate/basic_tutorials/migration">Migrating to 🤗 Accelerate easily</a></li> <li><a href="https://huggingface.co/docs/accelerate/usage_guides/big_modeling">Big Model Inference tutorial</a></li> <li><a href="https://huggingface.co/docs/accelerate/usage_guides/deepspeed">DeepSpeed and 🤗 Accelerate</a></li> <li><a href="https://huggingface.co/docs/accelerate/usage_guides/fsdp">Fully Sharded Data Parallelism and 🤗 Accelerate</a></li> <li><a href="https://huggingface.co/docs/accelerate/concept_guides/fsdp_and_deepspeed">FSDP vs DeepSpeed In-Depth</a></li> </ul> </div> <div class="footer footer-default"> </div> </section> </div> </div> <script>window.backupDefine = window.define; window.define = undefined;</script> <script src="accelerate_files/libs/revealjs/dist/reveal.js"></script> <!-- reveal.js plugins --> <script src="accelerate_files/libs/revealjs/plugin/quarto-line-highlight/line-highlight.js"></script> <script src="accelerate_files/libs/revealjs/plugin/pdf-export/pdfexport.js"></script> <script src="accelerate_files/libs/revealjs/plugin/reveal-menu/menu.js"></script> <script src="accelerate_files/libs/revealjs/plugin/reveal-menu/quarto-menu.js"></script> <script src="accelerate_files/libs/revealjs/plugin/quarto-support/support.js"></script> <script src="accelerate_files/libs/revealjs/plugin/notes/notes.js"></script> <script src="accelerate_files/libs/revealjs/plugin/search/search.js"></script> <script src="accelerate_files/libs/revealjs/plugin/zoom/zoom.js"></script> <script src="accelerate_files/libs/revealjs/plugin/math/math.js"></script> <script>window.define = window.backupDefine; window.backupDefine = undefined;</script> <script> // Full list of configuration options available at: // https://revealjs.com/config/ Reveal.initialize({ 'controlsAuto': true, 'previewLinksAuto': false, 'smaller': false, 'pdfSeparateFragments': false, 'autoAnimateEasing': "ease", 'autoAnimateDuration': 1, 'autoAnimateUnmatched': true, 'menu': {"side":"left","useTextContentForMissingTitles":true,"markers":false,"loadIcons":false,"custom":[{"title":"Tools","icon":"<i class=\"fas fa-gear\"></i>","content":"<ul class=\"slide-menu-items\">\n<li class=\"slide-tool-item active\" data-item=\"0\"><a href=\"#\" onclick=\"RevealMenuToolHandlers.fullscreen(event)\"><kbd>f</kbd> Fullscreen</a></li>\n<li class=\"slide-tool-item\" data-item=\"1\"><a href=\"#\" onclick=\"RevealMenuToolHandlers.speakerMode(event)\"><kbd>s</kbd> Speaker View</a></li>\n<li class=\"slide-tool-item\" data-item=\"2\"><a href=\"#\" onclick=\"RevealMenuToolHandlers.overview(event)\"><kbd>o</kbd> Slide Overview</a></li>\n<li class=\"slide-tool-item\" data-item=\"3\"><a href=\"#\" onclick=\"RevealMenuToolHandlers.togglePdfExport(event)\"><kbd>e</kbd> PDF Export Mode</a></li>\n<li class=\"slide-tool-item\" data-item=\"4\"><a href=\"#\" onclick=\"RevealMenuToolHandlers.keyboardHelp(event)\"><kbd>?</kbd> Keyboard Help</a></li>\n</ul>"}],"openButton":true}, 'smaller': false, // Display controls in the bottom right corner controls: false, // Help the user learn the controls by providing hints, for example by // bouncing the down arrow when they first encounter a vertical slide controlsTutorial: false, // Determines where controls appear, "edges" or "bottom-right" controlsLayout: 'edges', // Visibility rule for backwards navigation arrows; "faded", "hidden" // or "visible" controlsBackArrows: 'faded', // Display a presentation progress bar progress: true, // Display the page number of the current slide slideNumber: false, // 'all', 'print', or 'speaker' showSlideNumber: 'all', // Add the current slide number to the URL hash so that reloading the // page/copying the URL will return you to the same slide hash: true, // Start with 1 for the hash rather than 0 hashOneBasedIndex: false, // Flags if we should monitor the hash and change slides accordingly respondToHashChanges: true, // Push each slide change to the browser history history: true, // Enable keyboard shortcuts for navigation keyboard: true, // Enable the slide overview mode overview: true, // Disables the default reveal.js slide layout (scaling and centering) // so that you can use custom CSS layout disableLayout: false, // Vertical centering of slides center: false, // Enables touch navigation on devices with touch input touch: true, // Loop the presentation loop: false, // Change the presentation direction to be RTL rtl: false, // see https://revealjs.com/vertical-slides/#navigation-mode navigationMode: 'linear', // Randomizes the order of slides each time the presentation loads shuffle: false, // Turns fragments on and off globally fragments: true, // Flags whether to include the current fragment in the URL, // so that reloading brings you to the same fragment position fragmentInURL: false, // Flags if the presentation is running in an embedded mode, // i.e. contained within a limited portion of the screen embedded: false, // Flags if we should show a help overlay when the questionmark // key is pressed help: true, // Flags if it should be possible to pause the presentation (blackout) pause: true, // Flags if speaker notes should be visible to all viewers showNotes: false, // Global override for autoplaying embedded media (null/true/false) autoPlayMedia: true, // Global override for preloading lazy-loaded iframes (null/true/false) preloadIframes: null, // Number of milliseconds between automatically proceeding to the // next slide, disabled when set to 0, this value can be overwritten // by using a data-autoslide attribute on your slides autoSlide: 0, // Stop auto-sliding after user input autoSlideStoppable: true, // Use this method for navigation when auto-sliding autoSlideMethod: null, // Specify the average time in seconds that you think you will spend // presenting each slide. This is used to show a pacing timer in the // speaker view defaultTiming: null, // Enable slide navigation via mouse wheel mouseWheel: false, // The display mode that will be used to show slides display: 'block', // Hide cursor if inactive hideInactiveCursor: true, // Time before the cursor is hidden (in ms) hideCursorTime: 5000, // Opens links in an iframe preview overlay previewLinks: false, // Transition style (none/fade/slide/convex/concave/zoom) transition: 'none', // Transition speed (default/fast/slow) transitionSpeed: 'default', // Transition style for full page slide backgrounds // (none/fade/slide/convex/concave/zoom) backgroundTransition: 'none', // Number of slides away from the current that are visible viewDistance: 3, // Number of slides away from the current that are visible on mobile // devices. It is advisable to set this to a lower number than // viewDistance in order to save resources. mobileViewDistance: 2, // The "normal" size of the presentation, aspect ratio will be preserved // when the presentation is scaled to fit different resolutions. Can be // specified using percentage units. width: 1050, height: 700, // Factor of the display size that should remain empty around the content margin: 0.1, math: { mathjax: 'https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js', config: 'TeX-AMS_HTML-full', tex2jax: { inlineMath: [['\\(','\\)']], displayMath: [['\\[','\\]']], balanceBraces: true, processEscapes: false, processRefs: true, processEnvironments: true, preview: 'TeX', skipTags: ['script','noscript','style','textarea','pre','code'], ignoreClass: 'tex2jax_ignore', processClass: 'tex2jax_process' }, }, // reveal.js plugins plugins: [QuartoLineHighlight, PdfExport, RevealMenu, QuartoSupport, RevealMath, RevealNotes, RevealSearch, RevealZoom ] }); </script> <script id="quarto-html-after-body" type="application/javascript"> window.document.addEventListener("DOMContentLoaded", function (event) { const toggleBodyColorMode = (bsSheetEl) => { const mode = bsSheetEl.getAttribute("data-mode"); const bodyEl = window.document.querySelector("body"); if (mode === "dark") { bodyEl.classList.add("quarto-dark"); bodyEl.classList.remove("quarto-light"); } else { bodyEl.classList.add("quarto-light"); bodyEl.classList.remove("quarto-dark"); } } const toggleBodyColorPrimary = () => { const bsSheetEl = window.document.querySelector("link#quarto-bootstrap"); if (bsSheetEl) { toggleBodyColorMode(bsSheetEl); } } toggleBodyColorPrimary(); const tabsets = window.document.querySelectorAll(".panel-tabset-tabby") tabsets.forEach(function(tabset) { const tabby = new Tabby('#' + tabset.id); }); const isCodeAnnotation = (el) => { for (const clz of el.classList) { if (clz.startsWith('code-annotation-')) { return true; } } return false; } const clipboard = new window.ClipboardJS('.code-copy-button', { text: function(trigger) { const codeEl = trigger.previousElementSibling.cloneNode(true); for (const childEl of codeEl.children) { if (isCodeAnnotation(childEl)) { childEl.remove(); } } return codeEl.innerText; } }); clipboard.on('success', function(e) { // button target const button = e.trigger; // don't keep focus button.blur(); // flash "checked" button.classList.add('code-copy-button-checked'); var currentTitle = button.getAttribute("title"); button.setAttribute("title", "Copied!"); let tooltip; if (window.bootstrap) { button.setAttribute("data-bs-toggle", "tooltip"); button.setAttribute("data-bs-placement", "left"); button.setAttribute("data-bs-title", "Copied!"); tooltip = new bootstrap.Tooltip(button, { trigger: "manual", customClass: "code-copy-button-tooltip", offset: [0, -8]}); tooltip.show(); } setTimeout(function() { if (tooltip) { tooltip.hide(); button.removeAttribute("data-bs-title"); button.removeAttribute("data-bs-toggle"); button.removeAttribute("data-bs-placement"); } button.setAttribute("title", currentTitle); button.classList.remove('code-copy-button-checked'); }, 1000); // clear code selection e.clearSelection(); }); function tippyHover(el, contentFn) { const config = { allowHTML: true, content: contentFn, maxWidth: 500, delay: 100, arrow: false, appendTo: function(el) { return el.closest('section.slide') || el.parentElement; }, interactive: true, interactiveBorder: 10, theme: 'light-border', placement: 'bottom-start' }; config['offset'] = [0,0]; config['maxWidth'] = 700; window.tippy(el, config); } const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]'); for (var i=0; i<noterefs.length; i++) { const ref = noterefs[i]; tippyHover(ref, function() { // use id or data attribute instead here let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href'); try { href = new URL(href).hash; } catch {} const id = href.replace(/^#\/?/, ""); const note = window.document.getElementById(id); return note.innerHTML; }); } const findCites = (el) => { const parentEl = el.parentElement; if (parentEl) { const cites = parentEl.dataset.cites; if (cites) { return { el, cites: cites.split(' ') }; } else { return findCites(el.parentElement) } } else { return undefined; } }; var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]'); for (var i=0; i<bibliorefs.length; i++) { const ref = bibliorefs[i]; const citeInfo = findCites(ref); if (citeInfo) { tippyHover(citeInfo.el, function() { var popup = window.document.createElement('div'); citeInfo.cites.forEach(function(cite) { var citeDiv = window.document.createElement('div'); citeDiv.classList.add('hanging-indent'); citeDiv.classList.add('csl-entry'); var biblioDiv = window.document.getElementById('ref-' + cite); if (biblioDiv) { citeDiv.innerHTML = biblioDiv.innerHTML; } popup.appendChild(citeDiv); }); return popup.innerHTML; }); } } }); </script> </body></html>