|
<!DOCTYPE html> |
|
<html lang="en"> |
|
<head> |
|
<meta charset="UTF-8"> |
|
<meta name="viewport" content="width=device-width, initial-scale=1.0"> |
|
<title>Model Parameter Calculator</title> |
|
<style> |
|
body { |
|
font-family: Arial, sans-serif; |
|
margin: 0; |
|
padding: 0; |
|
display: flex; |
|
flex-direction: row; |
|
height: 100vh; |
|
} |
|
|
|
.console { |
|
width: 20%; |
|
padding: 20px; |
|
background-color: #f6b5b5; |
|
overflow-y: auto; |
|
border-right: 2px solid #ee4a4f; |
|
} |
|
|
|
#additionalFieldsMOE { |
|
display: none; |
|
} |
|
|
|
.output { |
|
width: 80%; |
|
padding: 20px; |
|
overflow-y: auto; |
|
} |
|
|
|
table { |
|
width: 100%; |
|
border-collapse: collapse; |
|
margin-bottom: 20px; |
|
background-color: #fff7f9; |
|
border: 1px solid #f6b5b5; |
|
} |
|
|
|
th, td { |
|
border: 1px solid #f6b5b5; |
|
padding: 8px; |
|
text-align: center; |
|
} |
|
|
|
th { |
|
background-color: #f6d4d4; |
|
} |
|
|
|
.highlight { |
|
background-color: #ffe8e8; |
|
color: #ad0d0d; |
|
padding: 10px; |
|
margin: 20px 0; |
|
border: 1px solid #f6b5b5; |
|
border-radius: 4px; |
|
text-align: center; |
|
} |
|
|
|
h3 { |
|
color: #ad0d0d; |
|
} |
|
|
|
.section { |
|
margin-bottom: 20px; |
|
} |
|
label { |
|
font-weight: bold; |
|
font-size: 0.9rem; |
|
} |
|
|
|
input[type="number"], select { |
|
width: 100%; |
|
padding: 8px; |
|
border: 1px solid #ddd; |
|
border-radius: 5px; |
|
box-sizing: border-box; |
|
font-size: 0.9rem; |
|
} |
|
|
|
input[type="number"]:focus, select:focus { |
|
border-color: #ff6666; |
|
outline: none; |
|
box-shadow: 0px 0px 5px rgba(255, 102, 102, 0.5); |
|
} |
|
|
|
button { |
|
background-color: #ff6666; |
|
color: white; |
|
border: none; |
|
border-radius: 5px; |
|
padding: 5px; |
|
width: 100%; |
|
font-size: 1rem; |
|
cursor: pointer; |
|
transition: background-color 0.3s ease; |
|
} |
|
|
|
button:hover { |
|
background-color: #e60000; |
|
} |
|
</style> |
|
<script> |
|
function handleSelectChange() { |
|
// Récupère la valeur sélectionnée |
|
const isMoe = document.getElementById('is_moe').value; |
|
const additionalFieldsMOE = document.getElementById('additionalFieldsMOE'); |
|
|
|
// Affiche ou cache les champs supplémentaires |
|
if (isMoe === 'yes') { |
|
additionalFieldsMOE.style.display = 'block'; |
|
} else { |
|
additionalFieldsMOE.style.display = 'none'; |
|
} |
|
} |
|
|
|
function calculateParameters() { |
|
const hiddenSize = parseInt(document.getElementById('hidden_size').value); |
|
const num_kv_heads = parseInt(document.getElementById('num_key_value_heads').value); |
|
const num_heads = parseInt(document.getElementById('num_heads').value); |
|
const intermediateSize = parseInt(document.getElementById('intermediate_size').value); |
|
const vocabSize = parseInt(document.getElementById('vocab_size').value); |
|
const numHiddenLayers = parseInt(document.getElementById('num_hidden_layers').value); |
|
const active_expert_number = parseInt(document.getElementById('active_expert_number').value); |
|
const expert_number = parseInt(document.getElementById('expert_number').value); |
|
const includeBias = document.getElementById('include_bias').value === 'yes'; |
|
const isMoe = document.getElementById('is_moe').value === 'yes'; |
|
|
|
const calcTotal = (input, output) => input * output; |
|
const calcKVdim = (hiddenSize, num_heads,num_kv_heads) => hiddenSize * num_kv_heads / num_heads; |
|
|
|
// Attention calculations |
|
const attention = [ |
|
{ name: 'Query', input: hiddenSize, output: hiddenSize }, |
|
{ name: 'Key', input: hiddenSize, output: calcKVdim(hiddenSize, num_heads,num_kv_heads) }, |
|
{ name: 'Value', input: hiddenSize, output: calcKVdim(hiddenSize, num_heads,num_kv_heads) }, |
|
{ name: 'Projection', input: hiddenSize, output: hiddenSize } |
|
].map(entry => ({ |
|
...entry, |
|
wTotal: calcTotal(entry.input, entry.output), |
|
bTotal: includeBias ? entry.output : 0 |
|
})); |
|
|
|
const attentionTotal = attention.reduce((sum, entry) => sum + entry.wTotal + entry.bTotal, 0); |
|
|
|
// Feed Forward calculations |
|
const switchW = calcTotal(hiddenSize, intermediateSize); |
|
const switchB = includeBias ? intermediateSize : 0; |
|
const luW = calcTotal(hiddenSize, intermediateSize); |
|
const luB = includeBias ? intermediateSize : 0; |
|
const projW = calcTotal(intermediateSize, hiddenSize); |
|
const projB = includeBias ? hiddenSize : 0; |
|
let feedForwardTotal; |
|
|
|
feedForwardTotal = switchW + switchB + luW + luB + projW + projB; |
|
const feedForwardTotalCache = feedForwardTotal |
|
|
|
let feedForwardTotalActive; |
|
|
|
if (isMoe) { |
|
feedForwardTotalActive = feedForwardTotalCache * active_expert_number; |
|
feedForwardTotal = feedForwardTotalCache * expert_number; |
|
console.log(feedForwardTotal) |
|
} |
|
// Embedding |
|
const embeddingTotal = calcTotal(vocabSize, hiddenSize); |
|
|
|
// 1 layer parameters |
|
const oneLayerParams = attentionTotal + feedForwardTotal; |
|
|
|
let oneLayerParamsActive; |
|
if (isMoe) { |
|
oneLayerParamsActive = attentionTotal + feedForwardTotalActive; |
|
} |
|
|
|
// Full layers parameters |
|
const fullLayersParams = oneLayerParams * numHiddenLayers; |
|
|
|
let fullLayersParamsMOEActive; |
|
if (isMoe) { |
|
fullLayersParamsMOEActive = oneLayerParamsActive * numHiddenLayers; |
|
} |
|
|
|
// Full size (includes embedding) |
|
const fullSize = fullLayersParams + embeddingTotal; |
|
|
|
let fullSizeActive; |
|
if (isMoe) { |
|
fullSizeActive = fullLayersParamsMOEActive + embeddingTotal; |
|
} |
|
|
|
// Display results |
|
const outputDiv = document.getElementById('output'); |
|
outputDiv.innerHTML = ` |
|
<h1>Model Parameter Calculator</h1> |
|
<div class="section"> |
|
<h3>Attention</h3> |
|
<table> |
|
<tr> |
|
<th>Parameter</th> |
|
<th>Input Size</th> |
|
<th>Output Size</th> |
|
<th>Total parameters (input*ouput)</th> |
|
</tr> |
|
${attention.map(entry => ` |
|
<tr> |
|
<td>${entry.name} - W</td> |
|
<td>${entry.input.toLocaleString()}</td> |
|
<td>${entry.output.toLocaleString()}</td> |
|
<td>${entry.wTotal.toLocaleString()}</td> |
|
</tr> |
|
${includeBias ? ` |
|
<tr> |
|
<td>${entry.name} - b</td> |
|
<td>-</td> |
|
<td>${entry.output.toLocaleString()}</td> |
|
<td>${entry.bTotal.toLocaleString()}</td> |
|
</tr> |
|
` : ''} |
|
`).join('')} |
|
<tr> |
|
<th colspan="3">Total attention parameters</th> |
|
<td colspan="2">${attentionTotal.toLocaleString()}</td> |
|
</tr> |
|
</table> |
|
</div> |
|
<div class="section"> |
|
<h3>Feed Forward</h3> |
|
<table> |
|
<tr> |
|
<th>Parameter</th> |
|
<th>Input Size</th> |
|
<th>Output Size</th> |
|
<th>Total (input*output)</th> |
|
</tr> |
|
<tr> |
|
<td>Swish - W</td> |
|
<td>${hiddenSize.toLocaleString()}</td> |
|
<td>${intermediateSize.toLocaleString()}</td> |
|
<td>${switchW.toLocaleString()}</td> |
|
</tr> |
|
${includeBias ? `<tr> |
|
<td>Swish - b</td> |
|
<td>-</td> |
|
<td>${intermediateSize.toLocaleString()}</td> |
|
<td>${switchB.toLocaleString()}</td> |
|
</tr>` : ''} |
|
<tr> |
|
<td>GLU W</td> |
|
<td>${hiddenSize.toLocaleString()}</td> |
|
<td>${intermediateSize.toLocaleString()}</td> |
|
<td>${luW.toLocaleString()}</td> |
|
</tr> |
|
${includeBias ? `<tr> |
|
<td>GLU - b</td> |
|
<td>-</td> |
|
<td>${intermediateSize.toLocaleString()}</td> |
|
<td>${luB.toLocaleString()}</td> |
|
</tr>` : ''} |
|
<tr> |
|
<td>Projection - W</td> |
|
<td>${intermediateSize.toLocaleString()}</td> |
|
<td>${hiddenSize.toLocaleString()}</td> |
|
<td>${projW.toLocaleString()}</td> |
|
</tr> |
|
${includeBias ? `<tr> |
|
<td>Projection - b</td> |
|
<td>-</td> |
|
<td>${hiddenSize.toLocaleString()}</td> |
|
<td>${projB.toLocaleString()}</td> |
|
</tr>` : ''} |
|
<tr> |
|
<th colspan="3">Total Feed Forward parameters</th> |
|
<td>${feedForwardTotal.toLocaleString()}</td> |
|
</tr> |
|
${isMoe ?` |
|
<tr> |
|
<th colspan="3">Total active Feed Forward parameters</th> |
|
<td>${feedForwardTotalActive.toLocaleString()}</td> |
|
</tr>` : ''} |
|
</table> |
|
</div> |
|
<div class="highlight"> |
|
<strong>1 Layer Parameters (Attention + FFN):</strong> ${oneLayerParams.toLocaleString()}<br> |
|
<strong>Full Layers Parameters (1 layer parameters * num layers):</strong> ${fullLayersParams.toLocaleString()}<br><br> |
|
${isMoe ? `<tr> |
|
<strong>1 Layer Parameters Active(Attention + FFN):</strong> ${oneLayerParamsActive.toLocaleString()}<br> |
|
<strong>Full Layers Parameters Active(1 layer active parameters * num layers):</strong> ${fullLayersParamsMOEActive.toLocaleString()}<br> |
|
</tr>` : ''} |
|
</div> |
|
<div class="section"> |
|
<h3>Embedding</h3> |
|
<table> |
|
<tr> |
|
<th>Parameter</th> |
|
<th>Vocab Size</th> |
|
<th>Hidden Size</th> |
|
<th>Total (vocab * hidden)</th> |
|
</tr> |
|
<tr> |
|
<td>Embedding</td> |
|
<td>${vocabSize.toLocaleString()}</td> |
|
<td>${hiddenSize.toLocaleString()}</td> |
|
<td>${embeddingTotal.toLocaleString()}</td> |
|
</tr> |
|
</table> |
|
</div> |
|
<div class="highlight"> |
|
<strong>Complete Model Parmeters (embedding size + full layers size):</strong> ${fullSize.toLocaleString()}<br><br> |
|
${isMoe ? `<strong>Complete Model Parmeters Active:</strong> ${fullSizeActive.toLocaleString()}` : ''} |
|
</div> |
|
`; |
|
} |
|
</script> |
|
</head> |
|
<body> |
|
|
|
<div class="console"> |
|
<h3>Input Parameters</h3> |
|
<label for="hidden_size">Hidden size:</label><br> |
|
<input type="number" id="hidden_size" value="896"><br><br> |
|
|
|
<label for="intermediate_size">Intermediate size:</label><br> |
|
<input type="number" id="intermediate_size" value="4864"><br><br> |
|
|
|
<label for="vocab_size">Vocab size:</label><br> |
|
<input type="number" id="vocab_size" value="151646"><br><br> |
|
|
|
<label for="num_key_value_heads">Number of key-value heads:</label><br> |
|
<input type="number" id="num_key_value_heads" value="2"><br><br> |
|
|
|
<label for="num_heads">Number of attention (query) heads:</label><br> |
|
<input type="number" id="num_heads" value="14"><br><br> |
|
|
|
<label for="num_hidden_layers">Number of hidden layers:</label><br> |
|
<input type="number" id="num_hidden_layers" value="24"><br><br> |
|
|
|
<label for="include_bias">Include bias?</label><br> |
|
<select id="include_bias"> |
|
<option value="no">No</option> |
|
<option value="yes">Yes</option> |
|
</select><br><br> |
|
|
|
<label for="is_moe">Is MOE ?</label><br> |
|
<select id="is_moe" onchange="handleSelectChange()"> |
|
<option value="no">No</option> |
|
<option value="yes">Yes</option> |
|
</select><br><br> |
|
|
|
<div id="additionalFieldsMOE"> |
|
<label for="expert_number">Total expert number :</label><br> |
|
<input type="number" id="expert_number" name="expert_number"><br><br> |
|
|
|
<label for="active_expert_number">Total active experts (shared + specifics):</label><br> |
|
<input type="number" id="active_expert_number" name="active_expert_number"><br><br> |
|
</div> |
|
|
|
<button onclick="calculateParameters()">Calculate</button> |
|
</div> |
|
|
|
<div class="output" id="output"> |
|
<h1>Transformer total number of parameters Calculator</h1> |
|
<h3>Enter model hyperparameters in the console and press calculate (curently working for classic transformer/LLM architecture with GQA and GLU)</h3> |
|
</div> |
|
|
|
</body> |
|
</html> |
|
|