Memory/token:{' '}
{(calculateMemoryPerInput(hiddenSize, numLayers) * 1_000_000).toFixed(0)} KB
setBatchSize(Number(e.target.value))}
/>
setSeqLength(Number(e.target.value))}
/>
{!batchSize && !seqLength ? (
Input a batch size or sequence length to see the maximum batch size or
sequence length you can run on your device.
) : null}
{batchSize && !seqLength ? (
<>
Max Sequence Lengths:
FP32:{' '}
{calculateMaxInputSize(
deviceMemory,
modelParams,
hiddenSize,
numLayers,
'fp32',
batchSize,
) > 0
? calculateMaxInputSize(
deviceMemory,
modelParams,
hiddenSize,
numLayers,
'fp32',
batchSize,
)
: 'Out of Memory'}
FP16:{' '}
{calculateMaxInputSize(
deviceMemory,
modelParams,
hiddenSize,
numLayers,
'fp16',
batchSize,
) > 0
? calculateMaxInputSize(
deviceMemory,
modelParams,
hiddenSize,
numLayers,
'fp16',
batchSize,
)
: 'Out of Memory'}
INT8:{' '}
{calculateMaxInputSize(
deviceMemory,
modelParams,
hiddenSize,
numLayers,
'int8',
batchSize,
) > 0
? calculateMaxInputSize(
deviceMemory,
modelParams,
hiddenSize,
numLayers,
'int8',
batchSize,
)
: 'Out of Memory'}
INT4:{' '}
{calculateMaxInputSize(
deviceMemory,
modelParams,
hiddenSize,
numLayers,
'int4',
batchSize,
) > 0
? calculateMaxInputSize(
deviceMemory,
modelParams,
hiddenSize,
numLayers,
'int4',
batchSize,
)
: 'Out of Memory'}
>
) : null}
{!batchSize && seqLength ? (
<>
Max Batch Sizes:
FP32:{' '}
{calculateMaxInputSize(
deviceMemory,
modelParams,
hiddenSize,
numLayers,
'fp32',
seqLength,
) > 0
? calculateMaxInputSize(
deviceMemory,
modelParams,
hiddenSize,
numLayers,
'fp32',
seqLength,
)
: 'Out of Memory'}
FP16:{' '}
{calculateMaxInputSize(
deviceMemory,
modelParams,
hiddenSize,
numLayers,
'fp16',
seqLength,
) > 0
? calculateMaxInputSize(
deviceMemory,
modelParams,
hiddenSize,
numLayers,
'fp16',
seqLength,
)
: 'Out of Memory'}
INT8:{' '}
{calculateMaxInputSize(
deviceMemory,
modelParams,
hiddenSize,
numLayers,
'int8',
seqLength,
) > 0
? calculateMaxInputSize(
deviceMemory,
modelParams,
hiddenSize,
numLayers,
'int8',
seqLength,
)
: 'Out of Memory'}
INT4:{' '}
{calculateMaxInputSize(
deviceMemory,
modelParams,
hiddenSize,
numLayers,
'int4',
seqLength,
) > 0
? calculateMaxInputSize(
deviceMemory,
modelParams,
hiddenSize,
numLayers,
'int4',
seqLength,
)
: 'Out of Memory'}
>
) : null}
{batchSize && seqLength ? (
<>
Total Memory Usage:
FP32:{' '}
{calculateMemoryValid(
deviceMemory,
modelParams,
hiddenSize,
numLayers,
'fp32',
batchSize,
seqLength,
)
? (
calculateMemory(modelParams, 'fp32') +
calculateMemoryPerInput(hiddenSize, numLayers) *
batchSize *
seqLength
).toFixed(2) + ' GB'
: 'Out of Memory'}
FP16:{' '}
{calculateMemoryValid(
deviceMemory,
modelParams,
hiddenSize,
numLayers,
'fp16',
batchSize,
seqLength,
)
? (
calculateMemory(modelParams, 'fp16') +
calculateMemoryPerInput(hiddenSize, numLayers) *
batchSize *
seqLength
).toFixed(2) + ' GB'
: 'Out of Memory'}
INT8:{' '}
{calculateMemoryValid(
deviceMemory,
modelParams,
hiddenSize,
numLayers,
'int8',
batchSize,
seqLength,
)
? (
calculateMemory(modelParams, 'int8') +
calculateMemoryPerInput(hiddenSize, numLayers) *
batchSize *
seqLength
).toFixed(2) + ' GB'
: 'Out of Memory'}
INT4:{' '}
{calculateMemoryValid(
deviceMemory,
modelParams,
hiddenSize,
numLayers,
'int4',
batchSize,
seqLength,
)
? (
calculateMemory(modelParams, 'int4') +
calculateMemoryPerInput(hiddenSize, numLayers) *
batchSize *
seqLength
).toFixed(2) + ' GB'
: 'Out of Memory'}
>
) : null}