|
<!DOCTYPE html> |
|
<html lang="en"> |
|
<head> |
|
<meta charset="UTF-8"> |
|
<meta name="viewport" content="width=device-width, initial-scale=1.0"> |
|
<title>Model Parameter Calculator</title> |
|
<style> |
|
body { |
|
font-family: Arial, sans-serif; |
|
margin: 0; |
|
padding: 0; |
|
display: flex; |
|
flex-direction: row; |
|
height: 100vh; |
|
background: linear-gradient(to right, #ff9a9e, #fad0c4); |
|
color: #4b0082; |
|
} |
|
|
|
.console { |
|
width: 30%; |
|
padding: 20px; |
|
background-color: #fdfdfd; |
|
border-right: 2px solid #ff9a9e; |
|
overflow-y: auto; |
|
} |
|
|
|
.output { |
|
width: 70%; |
|
padding: 20px; |
|
overflow-y: auto; |
|
} |
|
|
|
table { |
|
width: 100%; |
|
border-collapse: collapse; |
|
margin-bottom: 20px; |
|
background-color: #ffffff; |
|
border-radius: 8px; |
|
overflow: hidden; |
|
box-shadow: 0px 4px 6px rgba(0, 0, 0, 0.1); |
|
} |
|
|
|
th, td { |
|
border: 1px solid #ddd; |
|
padding: 8px; |
|
text-align: center; |
|
} |
|
|
|
th { |
|
background-color: #ff9a9e; |
|
color: #fff; |
|
text-transform: uppercase; |
|
} |
|
|
|
.highlight { |
|
background-color: #fdf2ff; |
|
color: #800080; |
|
padding: 10px; |
|
margin: 20px 0; |
|
border: 1px solid #ff9a9e; |
|
border-radius: 8px; |
|
} |
|
|
|
.section-title { |
|
margin-top: 30px; |
|
font-size: 1.2em; |
|
font-weight: bold; |
|
color: #800080; |
|
text-align: center; |
|
} |
|
</style> |
|
<script> |
|
function calculateParameters() { |
|
const hiddenSize = parseInt(document.getElementById('hidden_size').value); |
|
const intermediateSize = parseInt(document.getElementById('intermediate_size').value); |
|
const vocabSize = parseInt(document.getElementById('vocab_size').value); |
|
const numKeyValueHeads = parseInt(document.getElementById('num_key_value_heads').value); |
|
const numHiddenLayers = parseInt(document.getElementById('num_hidden_layers').value); |
|
const includeBias = document.getElementById('include_bias').value === 'yes'; |
|
|
|
// Attention calculations |
|
const attentionQW = [hiddenSize, hiddenSize, hiddenSize * hiddenSize]; |
|
const attentionV = [hiddenSize, numKeyValueHeads, hiddenSize * numKeyValueHeads]; |
|
const attentionBq = includeBias ? [hiddenSize, 1, hiddenSize] : [0, 0, 0]; |
|
const attentionBv = includeBias ? [numKeyValueHeads, 1, numKeyValueHeads] : [0, 0, 0]; |
|
const attentionTotal = attentionQW[2] + attentionV[2] + attentionBq[2] + attentionBv[2]; |
|
|
|
// Feed Forward calculations |
|
const switchW = [hiddenSize, intermediateSize, hiddenSize * intermediateSize]; |
|
const switchB = includeBias ? [intermediateSize, 1, intermediateSize] : [0, 0, 0]; |
|
const projW = [intermediateSize, hiddenSize, intermediateSize * hiddenSize]; |
|
const projB = includeBias ? [hiddenSize, 1, hiddenSize] : [0, 0, 0]; |
|
const feedForwardTotal = switchW[2] + switchB[2] + projW[2] + projB[2]; |
|
|
|
// Embedding |
|
const embedding = [vocabSize, hiddenSize, vocabSize * hiddenSize]; |
|
|
|
// 1 layer parameters |
|
const oneLayerParams = attentionTotal + feedForwardTotal; |
|
|
|
// Full layers parameters |
|
const fullLayersParams = oneLayerParams * numHiddenLayers; |
|
|
|
// Full size (includes embedding) |
|
const fullSize = fullLayersParams + embedding[2]; |
|
|
|
// Generate tables |
|
const attentionTable = ` |
|
<table> |
|
<tr> |
|
<th>Attention</th> |
|
<th>Input Size</th> |
|
<th>Output Size</th> |
|
<th>Total Size</th> |
|
</tr> |
|
<tr> |
|
<td>Q / Wq</td> |
|
<td>${attentionQW[0]}</td> |
|
<td>${attentionQW[1]}</td> |
|
<td>${attentionQW[2].toLocaleString()}</td> |
|
</tr> |
|
<tr> |
|
<td>V / Wv</td> |
|
<td>${attentionV[0]}</td> |
|
<td>${attentionV[1]}</td> |
|
<td>${attentionV[2].toLocaleString()}</td> |
|
</tr> |
|
${includeBias ? ` |
|
<tr> |
|
<td>Bq</td> |
|
<td>${attentionBq[0]}</td> |
|
<td>${attentionBq[1]}</td> |
|
<td>${attentionBq[2].toLocaleString()}</td> |
|
</tr> |
|
<tr> |
|
<td>Bv</td> |
|
<td>${attentionBv[0]}</td> |
|
<td>${attentionBv[1]}</td> |
|
<td>${attentionBv[2].toLocaleString()}</td> |
|
</tr> |
|
` : ''} |
|
</table>`; |
|
|
|
const feedForwardTable = ` |
|
<table> |
|
<tr> |
|
<th>Feed Forward</th> |
|
<th>Input Size</th> |
|
<th>Output Size</th> |
|
<th>Total Size</th> |
|
</tr> |
|
<tr> |
|
<td>Switch / W</td> |
|
<td>${switchW[0]}</td> |
|
<td>${switchW[1]}</td> |
|
<td>${switchW[2].toLocaleString()}</td> |
|
</tr> |
|
${includeBias ? `<tr> |
|
<td>Switch / b</td> |
|
<td>${switchB[0]}</td> |
|
<td>${switchB[1]}</td> |
|
<td>${switchB[2].toLocaleString()}</td> |
|
</tr>` : ''} |
|
<tr> |
|
<td>Proj / W</td> |
|
<td>${projW[0]}</td> |
|
<td>${projW[1]}</td> |
|
<td>${projW[2].toLocaleString()}</td> |
|
</tr> |
|
${includeBias ? `<tr> |
|
<td>Proj / b</td> |
|
<td>${projB[0]}</td> |
|
<td>${projB[1]}</td> |
|
<td>${projB[2].toLocaleString()}</td> |
|
</tr>` : ''} |
|
</table>`; |
|
|
|
const embeddingTable = ` |
|
<table> |
|
<tr> |
|
<th>Embedding</th> |
|
<th>Input Size</th> |
|
<th>Output Size</th> |
|
<th>Total Size</th> |
|
</tr> |
|
<tr> |
|
<td>Embedding</td> |
|
<td>${embedding[0]}</td> |
|
<td>${embedding[1]}</td> |
|
<td>${embedding[2].toLocaleString()}</td> |
|
</tr> |
|
</table>`; |
|
|
|
// Update output |
|
document.getElementById('output').innerHTML = ` |
|
<div class="highlight"> |
|
<strong>1 Layer Parameters:</strong> ${oneLayerParams.toLocaleString()}<br> |
|
<strong>Full Layers Parameters:</strong> ${fullLayersParams.toLocaleString()}<br> |
|
</div> |
|
${attentionTable} |
|
${feedForwardTable} |
|
<div class="highlight"> |
|
<strong>Complete Model Size:</strong> ${fullSize.toLocaleString()} |
|
</div> |
|
${embeddingTable}`; |
|
} |
|
</script> |
|
</head> |
|
<body> |
|
|
|
<div class="console"> |
|
<h3>Input Parameters</h3> |
|
<label for="hidden_size">Hidden size:</label><br> |
|
<input type="number" id="hidden_size" value="2048"><br><br> |
|
|
|
<label for="intermediate_size">Intermediate size:</label><br> |
|
<input type="number" id="intermediate_size" value="16384"><br><br> |
|
|
|
<label for="vocab_size">Vocab size:</label><br> |
|
<input type="number" id="vocab_size" value="64000"><br><br> |
|
|
|
<label for="num_key_value_heads">Number of key-value heads:</label><br> |
|
<input type="number" id="num_key_value_heads" value="80"><br><br> |
|
|
|
<label for="num_hidden_layers">Number of hidden layers:</label><br> |
|
<input type="number" id="num_hidden_layers" value="64"><br><br> |
|
|
|
<label for="include_bias">Include bias?</label><br> |
|
<select id="include_bias"> |
|
<option value="yes">Yes</option> |
|
<option value="no">No</option> |
|
</select><br><br> |
|
|
|
<button onclick="calculateParameters()">Calculate</button> |
|
</div> |
|
|
|
<div class="output" id="output"> |
|
<h3>Model Parameter Results</h3> |
|
</div> |
|
|
|
</body> |
|
</html> |
|
|