<!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8" /> <meta name="viewport" content="width=device-width, initial-scale=1.0" /> <script> function strToHtml(str) { let parser = new DOMParser(); return parser.parseFromString(str, "text/html"); } //Short, jQuery-independent function to read html table and write them into an Array. //Kudos to RobG at StackOverflow function tableToObj(table) { var rows = table.rows; var propCells = rows[0].cells; var propNames = []; var results = []; var obj, row, cells; // Use the first row for the property names // Could use a header section but result is the same if // there is only one header row for (var i = 0, iLen = propCells.length; i < iLen; i++) { propNames.push( (propCells[i].textContent || propCells[i].innerText).trim() ); } // Use the rows for data // Could use tbody rows here to exclude header & footer // but starting from 1 gives required result for (var j = 1, jLen = rows.length; j < jLen; j++) { cells = rows[j].cells; obj = {}; for (var k = 0; k < iLen; k++) { obj[propNames[k]] = ( cells[k].textContent || cells[k].innerText ).trim(); } results.push(obj); } return results; } function formatGpu(gpus) { return gpus.map( (g) => `${g["Product Name"]} - ${g["Memory"].split(",")[0]}` ); } const gguf_quants = { "IQ1_S": 1.56, "IQ2_XXS": 2.06, "IQ2_XS": 2.31, "IQ2_S": 2.5, "IQ2_M": 2.7, "IQ3_XXS": 3.06, "IQ3_XS": 3.3, "Q2_K": 3.35, "Q3_K_S": 3.5, "IQ3_S": 3.5, "IQ3_M": 3.7, "Q3_K_M": 3.91, "Q3_K_L": 4.27, "IQ4_XS": 4.25, "IQ4_NL": 4.5, "Q4_0": 4.55, "Q4_K_S": 4.58, "Q4_K_M": 4.85, "Q5_0": 5.54, "Q5_K_S": 5.54, "Q5_K_M": 5.69, "Q6_K": 6.59, "Q8_0": 8.5, } async function modelConfig(hf_model, hf_token) { auth = hf_token == "" ? {} : { headers: { 'Authorization': `Bearer ${hf_token}` } } let config = await fetch( `https://huggingface.co/${hf_model}/raw/main/config.json`, auth ).then(r => r.json()) let model_size = 0 try { model_size = (await fetch(`https://huggingface.co/${hf_model}/resolve/main/model.safetensors.index.json`, auth).then(r => r.json()))["metadata"]["total_size"] / 2 if (isNaN(model_size)) { throw new Erorr("no size in safetensors metadata") } } catch (e) { try { model_size = (await fetch(`https://huggingface.co/${hf_model}/resolve/main/pytorch_model.bin.index.json`, auth).then(r => r.json()))["metadata"]["total_size"] / 2 if (isNaN(model_size)) { throw new Erorr("no size in pytorch metadata") } } catch { let model_page = await fetch( "https://corsproxy.io/?" + encodeURIComponent(`https://huggingface.co/${hf_model}`) ).then(r => r.text()) let el = document.createElement( 'html' ); el.innerHTML = model_page let params_el = el.querySelector('div[data-target="ModelSafetensorsParams"]') if (params_el !== null) { model_size = JSON.parse(params_el.attributes.getNamedItem("data-props").value)["safetensors"]["total"] } else { params_el = el.querySelector('div[data-target="ModelHeader"]') model_size = JSON.parse(params_el.attributes.getNamedItem("data-props").value)["model"]["safetensors"]["total"] } } } config.parameters = model_size return config } function inputBuffer(context=8192, model_config, bsz=512) { /* Calculation taken from github:ggerganov/llama.cpp/llama.cpp:11248 ctx->inp_tokens = ggml_new_tensor_1d(ctx->ctx_input, GGML_TYPE_I32, cparams.n_batch); ctx->inp_embd = ggml_new_tensor_2d(ctx->ctx_input, GGML_TYPE_F32, hparams.n_embd, cparams.n_batch); ctx->inp_pos = ggml_new_tensor_1d(ctx->ctx_input, GGML_TYPE_I32, cparams.n_batch); ctx->inp_KQ_mask = ggml_new_tensor_2d(ctx->ctx_input, GGML_TYPE_F32, cparams.n_ctx, cparams.n_batch); ctx->inp_K_shift = ggml_new_tensor_1d(ctx->ctx_input, GGML_TYPE_I32, cparams.n_ctx); ctx->inp_sum = ggml_new_tensor_2d(ctx->ctx_input, GGML_TYPE_F32, 1, cparams.n_batch); n_embd is hidden size (github:ggeranov/llama.cpp/convert.py:248) */ const inp_tokens = bsz const inp_embd = model_config["hidden_size"] * bsz const inp_pos = bsz const inp_KQ_mask = context * bsz const inp_K_shift = context const inp_sum = bsz return inp_tokens + inp_embd + inp_pos + inp_KQ_mask + inp_K_shift + inp_sum } function computeBuffer(context=8192, model_config, bsz=512) { if (bsz != 512) { alert("batch size other than 512 is currently not supported for the compute buffer, using batchsize 512 for compute buffer calculation, end result result will be an overestimatition") } return (context / 1024 * 2 + 0.75) * model_config["num_attention_heads"] * 1024 * 1024 } function kvCache(context=8192, model_config, cache_bit=16) { const n_gqa = model_config["num_attention_heads"] / model_config["num_key_value_heads"] const n_embd_gqa = model_config["hidden_size"] / n_gqa const n_elements = n_embd_gqa * (model_config["num_hidden_layers"] * context) const size = 2 * n_elements return size * (cache_bit / 8) } function contextSize(context=8192, model_config, bsz=512, cache_bit=16) { return Number.parseFloat((inputBuffer(context, model_config, bsz) + kvCache(context, model_config, cache_bit) + computeBuffer(context, model_config, bsz)).toFixed(2)) } function modelSize(model_config, bpw=4.5) { return Number.parseFloat((model_config["parameters"] * bpw / 8).toFixed(2)) } async function calculateSizes(format) { try { const model_config = await modelConfig(document.getElementById("modelsearch").value, document.getElementById("hf_token").value) const context = parseInt(document.getElementById("contextsize").value) let bsz = 512 let cache_bit = 16 let bpw = 0 if (format === "gguf") { bsz = parseInt(document.getElementById("batchsize").value) bpw = gguf_quants[document.getElementById("quantsize").innerText] } else if (format == "exl2") { cache_bit = Number.parseInt(document.getElementById("kvCache").value) bpw = Number.parseFloat(document.getElementById("bpw").value) } const model_size = modelSize(model_config, bpw) const context_size = contextSize(context, model_config, bsz, cache_bit) const total_size = ((model_size + context_size) / 2**30) document.getElementById("resultmodel").innerText = (model_size / 2**30).toFixed(2) document.getElementById("resultcontext").innerText = (context_size / 2**30).toFixed(2) const result_total_el = document.getElementById("resulttotal"); result_total_el.innerText = total_size.toFixed(2) const gpu = document.getElementById("gpusearch").value if (gpu !== "") { const vram = parseFloat(gpu.split("-")[1].replace("GB", "").trim()) if (vram - total_size > 0.5) { result_total_el.style.backgroundColor = "#bef264" } else if (vram - total_size > 0) { result_total_el.style.backgroundColor = "#facc15" } else { result_total_el.style.backgroundColor = "#ef4444" } } } catch(e) { alert(e); } } </script> <link href="./styles.css" rel="stylesheet"> <title>Can I run it? - LLM VRAM Calculator</title> </head> <body class="p-8"> <div x-data="{ format: 'gguf' }" class="flex flex-col max-h-screen items-center mt-16 gap-10"> <h1 class="text-xl font-semibold leading-6 text-gray-900"> LLM Model, Can I run it? </h1> <p> To support gated or private repos, you need to <a href="https://huggingface.co/settings/tokens" style="color: #4444ff"><b>create an authentification token</b></a>, to check the box <span style="color: #6e1818"><b>"Read access to contents of all public gated repos you can access"</b></span> and then enter the token in the field below. </p> <div class="flex flex-col gap-10"> <div class="w-auto flex flex-col gap-4"> <!-- Huggingface Authentification Token --> <div class="relative" x-data="{ results: null, query: null }" > <label for="gpusearch" class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" >Huggingface Token (optional)</label > <input class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" id="hf_token" /> </div> <!-- GPU Selector --> <div class="relative" x-data="{ results: null, query: null }" > <label for="gpusearch" class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" >GPU (optional)</label > <input class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" placeholder="GeForce RTX 3090 - 24 GB" id="gpusearch" name="gpusearch" list="gpulist" x-model="query" @keypress.debounce.150ms="results = query === '' ? [] : formatGpu(tableToObj(strToHtml(await fetch('https://corsproxy.io/?https://www.techpowerup.com/gpu-specs/?ajaxsrch=' + query).then(r => r.text())).querySelector('table')))" /> <datalist id="gpulist"> <template x-for="item in results"> <option :value="item" x-text="item"></option> </template> </datalist> </div> <!-- Model Selector --> <div class="flex flex-row gap-4 relative"> <label for="contextsize" class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" > Model (unquantized) </label> <div class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" x-data="{ open: false, value: 'Nexusflow/Starling-LM-7B-beta', results: null, toggle() { if (this.open) { return this.close() } this.$refs.input.focus() this.open = true }, close(focusAfter) { if (! this.open) return this.open = false focusAfter && focusAfter.focus() } }" x-on:keydown.escape.prevent.stop="close($refs.input)" x-id="['model-typeahead']" class="relative" > <!-- Input --> <input id="modelsearch" x-ref="input" x-on:click="toggle()" @keypress.debounce.150ms="results = (await fetch('https://huggingface.co/api/quicksearch?type=model&q=' + encodeURIComponent(value)).then(r => r.json())).models.filter(m => !m.id.includes('GGUF') && !m.id.includes('AWQ') && !m.id.includes('GPTQ') && !m.id.includes('exl2'));" :aria-expanded="open" :aria-controls="$id('model-typeahead')" x-model="value" class="flex justify-between items-center gap-2 w-full" /> <!-- Panel --> <div x-ref="panel" x-show="open" x-transition.origin.top.left x-on:click.outside="close($refs.input)" :id="$id('model-typeahead')" style="display: none" class="absolute left-0 mt-4 w-full rounded-md bg-white shadow-sm ring-1 ring-inset ring-gray-300 z-10" > <template x-for="result in results"> <a @click="value = result.id; close($refs.input)" x-text="result.id" class="flex cursor-pointer items-center gap-2 w-full first-of-type:rounded-t-md last-of-type:rounded-b-md px-4 py-2.5 text-left text-sm hover:bg-gray-500/5 disabled:text-gray-500" ></a> </template> </div> </div> </div> <!-- Context Size Selector --> <div class="relative"> <label for="contextsize" class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" > Context Size </label> <input value="8192" type="number" name="contextsize" id="contextsize" step="1024" class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" /> </div> <!-- Quant Format Selector --> <div class="relative"> <label class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" >Quant Format</label > <fieldset x-model="format" class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" > <legend class="sr-only">Quant format</legend> <div class="space-y-4 sm:flex sm:items-center sm:space-x-10 sm:space-y-0" > <div class="flex items-center"> <input id="gguf-format" name="quant-format" type="radio" value="gguf" checked class="h-4 w-4 border-gray-300 text-indigo-600 focus:ring-indigo-600" /> <label for="gguf-format" class="ml-3 block text-sm font-medium leading-6 text-gray-900" >GGUF</label > </div> <div class="flex items-center"> <input id="exl2-format" name="quant-format" type="radio" value="exl2" class="h-4 w-4 border-gray-300 text-indigo-600 focus:ring-indigo-600" /> <label for="exl2-format" class="ml-3 block text-sm font-medium leading-6 text-gray-900" >EXL2</label > </div> <div class="flex items-center"> <input id="gptq-format" name="quant-format" type="radio" disabled value="gptq" class="h-4 w-4 border-gray-300 text-indigo-600 focus:ring-indigo-600" /> <label for="gptq-format" class="ml-3 block text-sm font-medium leading-6 text-gray-900" >GPTQ (coming soon)</label > </div> </div> </fieldset> </div> <!-- EXL2 Options --> <div x-show="format === 'exl2'" class="flex flex-row gap-4"> <div class="relative flex-grow"> <label for="bpw" class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" > BPW </label> <input value="4.5" type="number" step="0.01" id="bpw" name="bpw" class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" /> </div> <div class="flex-shrink relative rounded-md" > <div class="w-fit p-3 h-full flex items-center gap-2 justify-center rounded-md border-0 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" > <label for="kvCache" class="inline-block bg-white text-xs font-medium text-gray-900" > KV Cache </label> <select id="kvCache" name="kvCache"> <option value="16">16 bit</option> <option value="8">8 bit</option> <option value="4">4 bit</option> </select> </div> </div> </div> <!-- GGUF Options --> <div x-show="format === 'gguf'" class="relative"> <div class="flex flex-row gap-4"> <label for="contextsize" class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" > Quantization Size </label> <div class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" x-data="{ open: false, value: '', toggle() { if (this.open) { return this.close() } this.$refs.button.focus() this.open = true }, close(focusAfter) { if (! this.open) return this.open = false focusAfter && focusAfter.focus() } }" x-on:keydown.escape.prevent.stop="close($refs.button)" x-id="['dropdown-button']" class="relative" > <!-- Button --> <button x-ref="button" x-on:click="toggle()" :aria-expanded="open" :aria-controls="$id('dropdown-button')" type="button" id="quantsize" x-text="value.length === 0 ? 'Q4_K_S' : value" class="flex justify-between items-center gap-2 w-full" > Q4_K_S <!-- Heroicon: chevron-down --> <svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5 text-gray-400" viewBox="0 0 20 20" fill="currentColor" > <path fill-rule="evenodd" d="M5.293 7.293a1 1 0 011.414 0L10 10.586l3.293-3.293a1 1 0 111.414 1.414l-4 4a1 1 0 01-1.414 0l-4-4a1 1 0 010-1.414z" clip-rule="evenodd" /> </svg> </button> <!-- Panel --> <div x-data="{ quants: [ 'IQ1_S', 'IQ2_XXS', 'IQ2_XS', 'IQ2_S', 'IQ2_M', 'IQ3_XXS', 'IQ3_XS', 'Q2_K', 'Q3_K_S', 'IQ3_S', 'IQ3_M', 'Q3_K_M', 'Q3_K_L', 'IQ4_XS', 'IQ4_NL', 'Q4_0', 'Q4_K_S', 'Q4_K_M', 'Q5_0', 'Q5_K_S', 'Q5_K_M', 'Q6_K', 'Q8_0' ]}" x-ref="panel" x-show="open" x-transition.origin.top.left x-on:click.outside="close($refs.button)" :id="$id('dropdown-button')" style="display: none" class="absolute left-0 mt-4 w-full rounded-md bg-white shadow-sm ring-1 ring-inset ring-gray-300 z-10" > <template x-for="quant in quants"> <a @click="value = quant; close($refs.button)" x-text="quant" class="flex cursor-pointer items-center gap-2 w-full first-of-type:rounded-t-md last-of-type:rounded-b-md px-4 py-2.5 text-left text-sm hover:bg-gray-500/5 disabled:text-gray-500" ></a> </template> </div> </div> <div class="relative"> <label for="batchsize" class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" > Batch Size </label> <input value="512" type="number" step="128" id="batchsize" class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" /> </div> </div> </div> <button type="button" class="rounded-md bg-slate-800 px-3 py-2 text-sm font-semibold text-white shadow-sm hover:bg-slate-700 focus-visible:outline focus-visible:outline-2 focus-visible:outline-offset-2 focus-visible:outline-indigo-600" @click="calculateSizes(format)" > Submit </button> </div> <div class="w-auto flex flex-col gap-4"> <div class="relative"> <label class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" > Model Size (GB) </label> <div id="resultmodel" class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" >4.20</div> </div> <div class="relative"> <label class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" > Context Size (GB) </label> <div id="resultcontext" class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" >6.90</div> </div> <div class="relative"> <label class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" > Total Size (GB) </label> <div id="resulttotal" class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" >420.69</div> </div> </div> </div> </div> <script src="https://cdn.jsdelivr.net/npm/alpinejs@3.x.x/dist/cdn.min.js" ></script> <script defer> calculateSizes("gguf") </script> </body> </html>