UlrickBL's picture
Update index.html
97c0098 verified
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Model Parameter Calculator</title>
<style>
body {
font-family: Arial, sans-serif;
margin: 0;
padding: 0;
display: flex;
flex-direction: row;
height: 100vh;
}
.console {
width: 20%;
padding: 20px;
background-color: #f6b5b5;
overflow-y: auto;
border-right: 2px solid #ee4a4f;
}
#additionalFieldsMOE {
display: none;
}
.output {
width: 80%;
padding: 20px;
overflow-y: auto;
}
table {
width: 100%;
border-collapse: collapse;
margin-bottom: 20px;
background-color: #fff7f9;
border: 1px solid #f6b5b5;
}
th, td {
border: 1px solid #f6b5b5;
padding: 8px;
text-align: center;
}
th {
background-color: #f6d4d4;
}
.highlight {
background-color: #ffe8e8;
color: #ad0d0d;
padding: 10px;
margin: 20px 0;
border: 1px solid #f6b5b5;
border-radius: 4px;
text-align: center;
}
h3 {
color: #ad0d0d;
}
.section {
margin-bottom: 20px;
}
label {
font-weight: bold;
font-size: 0.9rem;
}
input[type="number"], select {
width: 100%;
padding: 8px;
border: 1px solid #ddd;
border-radius: 5px;
box-sizing: border-box;
font-size: 0.9rem;
}
input[type="number"]:focus, select:focus {
border-color: #ff6666;
outline: none;
box-shadow: 0px 0px 5px rgba(255, 102, 102, 0.5);
}
button {
background-color: #ff6666;
color: white;
border: none;
border-radius: 5px;
padding: 5px;
width: 100%;
font-size: 1rem;
cursor: pointer;
transition: background-color 0.3s ease;
}
button:hover {
background-color: #e60000;
}
</style>
<script>
function handleSelectChange() {
// Récupère la valeur sélectionnée
const isMoe = document.getElementById('is_moe').value;
const additionalFieldsMOE = document.getElementById('additionalFieldsMOE');
// Affiche ou cache les champs supplémentaires
if (isMoe === 'yes') {
additionalFieldsMOE.style.display = 'block';
} else {
additionalFieldsMOE.style.display = 'none';
}
}
function calculateParameters() {
const hiddenSize = parseInt(document.getElementById('hidden_size').value);
const num_kv_heads = parseInt(document.getElementById('num_key_value_heads').value);
const num_heads = parseInt(document.getElementById('num_heads').value);
const intermediateSize = parseInt(document.getElementById('intermediate_size').value);
const vocabSize = parseInt(document.getElementById('vocab_size').value);
const numHiddenLayers = parseInt(document.getElementById('num_hidden_layers').value);
const active_expert_number = parseInt(document.getElementById('active_expert_number').value);
const expert_number = parseInt(document.getElementById('expert_number').value);
const includeBias = document.getElementById('include_bias').value === 'yes';
const isMoe = document.getElementById('is_moe').value === 'yes';
const calcTotal = (input, output) => input * output;
const calcKVdim = (hiddenSize, num_heads,num_kv_heads) => hiddenSize * num_kv_heads / num_heads;
// Attention calculations
const attention = [
{ name: 'Query', input: hiddenSize, output: hiddenSize },
{ name: 'Key', input: hiddenSize, output: calcKVdim(hiddenSize, num_heads,num_kv_heads) },
{ name: 'Value', input: hiddenSize, output: calcKVdim(hiddenSize, num_heads,num_kv_heads) },
{ name: 'Projection', input: hiddenSize, output: hiddenSize }
].map(entry => ({
...entry,
wTotal: calcTotal(entry.input, entry.output),
bTotal: includeBias ? entry.output : 0
}));
const attentionTotal = attention.reduce((sum, entry) => sum + entry.wTotal + entry.bTotal, 0);
// Feed Forward calculations
const switchW = calcTotal(hiddenSize, intermediateSize);
const switchB = includeBias ? intermediateSize : 0;
const luW = calcTotal(hiddenSize, intermediateSize);
const luB = includeBias ? intermediateSize : 0;
const projW = calcTotal(intermediateSize, hiddenSize);
const projB = includeBias ? hiddenSize : 0;
let feedForwardTotal;
feedForwardTotal = switchW + switchB + luW + luB + projW + projB;
const feedForwardTotalCache = feedForwardTotal
let feedForwardTotalActive;
if (isMoe) {
feedForwardTotalActive = feedForwardTotalCache * active_expert_number;
feedForwardTotal = feedForwardTotalCache * expert_number;
console.log(feedForwardTotal)
}
// Embedding
const embeddingTotal = calcTotal(vocabSize, hiddenSize);
// 1 layer parameters
const oneLayerParams = attentionTotal + feedForwardTotal;
let oneLayerParamsActive;
if (isMoe) {
oneLayerParamsActive = attentionTotal + feedForwardTotalActive;
}
// Full layers parameters
const fullLayersParams = oneLayerParams * numHiddenLayers;
let fullLayersParamsMOEActive;
if (isMoe) {
fullLayersParamsMOEActive = oneLayerParamsActive * numHiddenLayers;
}
// Full size (includes embedding)
const fullSize = fullLayersParams + embeddingTotal;
let fullSizeActive;
if (isMoe) {
fullSizeActive = fullLayersParamsMOEActive + embeddingTotal;
}
// Display results
const outputDiv = document.getElementById('output');
outputDiv.innerHTML = `
<h1>Model Parameter Calculator</h1>
<div class="section">
<h3>Attention</h3>
<table>
<tr>
<th>Parameter</th>
<th>Input Size</th>
<th>Output Size</th>
<th>Total parameters (input*ouput)</th>
</tr>
${attention.map(entry => `
<tr>
<td>${entry.name} - W</td>
<td>${entry.input.toLocaleString()}</td>
<td>${entry.output.toLocaleString()}</td>
<td>${entry.wTotal.toLocaleString()}</td>
</tr>
${includeBias ? `
<tr>
<td>${entry.name} - b</td>
<td>-</td>
<td>${entry.output.toLocaleString()}</td>
<td>${entry.bTotal.toLocaleString()}</td>
</tr>
` : ''}
`).join('')}
<tr>
<th colspan="3">Total attention parameters</th>
<td colspan="2">${attentionTotal.toLocaleString()}</td>
</tr>
</table>
</div>
<div class="section">
<h3>Feed Forward</h3>
<table>
<tr>
<th>Parameter</th>
<th>Input Size</th>
<th>Output Size</th>
<th>Total (input*output)</th>
</tr>
<tr>
<td>Swish - W</td>
<td>${hiddenSize.toLocaleString()}</td>
<td>${intermediateSize.toLocaleString()}</td>
<td>${switchW.toLocaleString()}</td>
</tr>
${includeBias ? `<tr>
<td>Swish - b</td>
<td>-</td>
<td>${intermediateSize.toLocaleString()}</td>
<td>${switchB.toLocaleString()}</td>
</tr>` : ''}
<tr>
<td>GLU W</td>
<td>${hiddenSize.toLocaleString()}</td>
<td>${intermediateSize.toLocaleString()}</td>
<td>${luW.toLocaleString()}</td>
</tr>
${includeBias ? `<tr>
<td>GLU - b</td>
<td>-</td>
<td>${intermediateSize.toLocaleString()}</td>
<td>${luB.toLocaleString()}</td>
</tr>` : ''}
<tr>
<td>Projection - W</td>
<td>${intermediateSize.toLocaleString()}</td>
<td>${hiddenSize.toLocaleString()}</td>
<td>${projW.toLocaleString()}</td>
</tr>
${includeBias ? `<tr>
<td>Projection - b</td>
<td>-</td>
<td>${hiddenSize.toLocaleString()}</td>
<td>${projB.toLocaleString()}</td>
</tr>` : ''}
<tr>
<th colspan="3">Total Feed Forward parameters</th>
<td>${feedForwardTotal.toLocaleString()}</td>
</tr>
${isMoe ?`
<tr>
<th colspan="3">Total active Feed Forward parameters</th>
<td>${feedForwardTotalActive.toLocaleString()}</td>
</tr>` : ''}
</table>
</div>
<div class="highlight">
<strong>1 Layer Parameters (Attention + FFN):</strong> ${oneLayerParams.toLocaleString()}<br>
<strong>Full Layers Parameters (1 layer parameters * num layers):</strong> ${fullLayersParams.toLocaleString()}<br><br>
${isMoe ? `<tr>
<strong>1 Layer Parameters Active(Attention + FFN):</strong> ${oneLayerParamsActive.toLocaleString()}<br>
<strong>Full Layers Parameters Active(1 layer active parameters * num layers):</strong> ${fullLayersParamsMOEActive.toLocaleString()}<br>
</tr>` : ''}
</div>
<div class="section">
<h3>Embedding</h3>
<table>
<tr>
<th>Parameter</th>
<th>Vocab Size</th>
<th>Hidden Size</th>
<th>Total (vocab * hidden)</th>
</tr>
<tr>
<td>Embedding</td>
<td>${vocabSize.toLocaleString()}</td>
<td>${hiddenSize.toLocaleString()}</td>
<td>${embeddingTotal.toLocaleString()}</td>
</tr>
</table>
</div>
<div class="highlight">
<strong>Complete Model Parmeters (embedding size + full layers size):</strong> ${fullSize.toLocaleString()}<br><br>
${isMoe ? `<strong>Complete Model Parmeters Active:</strong> ${fullSizeActive.toLocaleString()}` : ''}
</div>
`;
}
</script>
</head>
<body>
<div class="console">
<h3>Input Parameters</h3>
<label for="hidden_size">Hidden size:</label><br>
<input type="number" id="hidden_size" value="896"><br><br>
<label for="intermediate_size">Intermediate size:</label><br>
<input type="number" id="intermediate_size" value="4864"><br><br>
<label for="vocab_size">Vocab size:</label><br>
<input type="number" id="vocab_size" value="151646"><br><br>
<label for="num_key_value_heads">Number of key-value heads:</label><br>
<input type="number" id="num_key_value_heads" value="2"><br><br>
<label for="num_heads">Number of attention (query) heads:</label><br>
<input type="number" id="num_heads" value="14"><br><br>
<label for="num_hidden_layers">Number of hidden layers:</label><br>
<input type="number" id="num_hidden_layers" value="24"><br><br>
<label for="include_bias">Include bias?</label><br>
<select id="include_bias">
<option value="no">No</option>
<option value="yes">Yes</option>
</select><br><br>
<label for="is_moe">Is MOE ?</label><br>
<select id="is_moe" onchange="handleSelectChange()">
<option value="no">No</option>
<option value="yes">Yes</option>
</select><br><br>
<div id="additionalFieldsMOE">
<label for="expert_number">Total expert number :</label><br>
<input type="number" id="expert_number" name="expert_number"><br><br>
<label for="active_expert_number">Total active experts (shared + specifics):</label><br>
<input type="number" id="active_expert_number" name="active_expert_number"><br><br>
</div>
<button onclick="calculateParameters()">Calculate</button>
</div>
<div class="output" id="output">
<h1>Transformer total number of parameters Calculator</h1>
<h3>Enter model hyperparameters in the console and press calculate (curently working for classic transformer/LLM architecture with GQA and GLU)</h3>
</div>
</body>
</html>