Gogryu commited on
Commit
8a7c56c
·
1 Parent(s): 44e1649

minor fixes

Browse files
Files changed (1) hide show
  1. src/pages/Calculator.tsx +412 -401
src/pages/Calculator.tsx CHANGED
@@ -614,90 +614,94 @@ const PrefillChunkingCalculator = ({
614
  )
615
 
616
  return (
617
-
618
- <div>
619
- <div className='chart mb-8'>
620
- <div className='flex flex-col items-center'>
621
- <div className='text-2xl'>Model Footprint with Prefill Chunking</div>
622
- </div>
623
- <div>
624
- <div className='chart-row'>
625
- <div className='chart-row-title'>FP32</div>
626
- <PrefillChunkingModelSizeBarChart
627
- modelSize={calculateMemory(modelParams, 'fp32')}
628
- largestModelSize={deviceMemory || calculateMemory(modelParams, 'fp32')}
629
- modelPrecision='fp32'
630
- deviceMemorySet={deviceMemory !== null && deviceMemory > 0}
631
- activationMemorySize={activationMemorySize}
632
- />
633
- <div className='chart-row-size ml-8'>
634
- {(calculateMemory(modelParams, 'fp32') + activationMemorySize).toFixed(2)}{' '}
635
- {deviceMemory !== null && deviceMemory > 0 ? `/ ${deviceMemory} ` : null}GB
636
- </div>
637
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
638
 
639
- <div className='chart-row my-8'>
640
- <div className='chart-row-title'>FP16</div>
641
- <PrefillChunkingModelSizeBarChart
642
- modelSize={calculateMemory(modelParams, 'fp16')}
643
- largestModelSize={deviceMemory || calculateMemory(modelParams, 'fp16')}
644
- modelPrecision='fp16'
645
- deviceMemorySet={deviceMemory !== null && deviceMemory > 0}
646
- activationMemorySize={activationMemorySize}
647
- />
648
- <div className='chart-row-size ml-8'>
649
- {(calculateMemory(modelParams, 'fp16') + activationMemorySize).toFixed(2)}{' '}
650
- {deviceMemory !== null && deviceMemory > 0 ? `/ ${deviceMemory} ` : null}GB
 
651
  </div>
652
- </div>
653
 
654
- <div className='chart-row my-8'>
655
- <div className='chart-row-title'>INT8</div>
656
- <PrefillChunkingModelSizeBarChart
657
- modelSize={calculateMemory(modelParams, 'int8')}
658
- largestModelSize={deviceMemory || calculateMemory(modelParams, 'int8')}
659
- modelPrecision='int8'
660
- deviceMemorySet={deviceMemory !== null && deviceMemory > 0}
661
- activationMemorySize={activationMemorySize}
662
- />
663
- <div className='chart-row-size ml-8'>
664
- {(calculateMemory(modelParams, 'int8') + activationMemorySize).toFixed(2)}{' '}
665
- {deviceMemory !== null && deviceMemory > 0 ? `/ ${deviceMemory} ` : null}GB
 
666
  </div>
667
- </div>
668
 
669
- <div className='chart-row my-8'>
670
- <div className='chart-row-title'>INT4</div>
671
- <PrefillChunkingModelSizeBarChart
672
- modelSize={calculateMemory(modelParams, 'int4')}
673
- largestModelSize={deviceMemory || calculateMemory(modelParams, 'int4')}
674
- modelPrecision='int4'
675
- deviceMemorySet={deviceMemory !== null && deviceMemory > 0}
676
- activationMemorySize={activationMemorySize}
677
- />
678
- <div className='chart-row-size ml-8'>
679
- {(calculateMemory(modelParams, 'int4') + activationMemorySize).toFixed(2)}{' '}
680
- {deviceMemory !== null && deviceMemory > 0 ? `/ ${deviceMemory} ` : null}GB
 
681
  </div>
682
  </div>
683
  </div>
684
- </div>
685
- <div className='chart'>
686
- <div className='flex flex-col items-center'>
687
- <div className='text-2xl'>Maximum Batch Size / Sequence Length with Prefill Chunking</div>
 
 
 
 
 
 
 
 
 
 
 
 
688
  </div>
689
- <PrefillChunkingInferenceRuntimeLineChart
690
- availableMemory={{
691
- int4: deviceMemory - calculateMemory(modelParams, 'int4'),
692
- int8: deviceMemory - calculateMemory(modelParams, 'int8'),
693
- fp16: deviceMemory - calculateMemory(modelParams, 'fp16'),
694
- fp32: deviceMemory - calculateMemory(modelParams, 'fp32'),
695
- }}
696
- memoryPerInput={memoryPerInput}
697
- activationMemorySize={activationMemorySize}
698
- />
699
  </div>
700
- </div>
701
  )
702
  }
703
 
@@ -770,343 +774,353 @@ const Calculator = () => {
770
  }
771
 
772
  return (
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
773
 
774
- <div className="flex flex-col items-center justify-center min-h-screen px-4">
775
- {/* Toggle Button */}
776
- <div className="mb-4 flex space-x-4">
777
- <button
778
- className={`${
779
- !isPrefillChunking ? 'calculator-input-tab-active' : 'calculator-input-tab'
780
- }`}
781
- onClick={() => setIsPrefillChunking(false)}
782
- >
783
- Standard Calculator
784
- </button>
785
- <button
786
- className={`${
787
- isPrefillChunking ? 'calculator-input-tab-active' : 'calculator-input-tab'
788
- }`}
789
- onClick={() => setIsPrefillChunking(true)}
790
- >
791
- Calculator with Prefill Chunking
792
- </button>
793
- </div>
794
-
795
- {/* Model Memory Calculator */}
796
- <div className="w-full max-w-4xl">
797
- <div className="text-4xl mb-4 text-center">Model Memory Calculator</div>
798
- <div className="mb-6 text-center">
799
- Use our Model Memory Calculator to help you estimate the memory footprint of your model for different precisions and the maximum batch size / sequence length combination you can run on your device.
800
- </div>
801
 
802
- {/* Model and Device Selection */}
803
- <div className="grid grid-cols-1 sm:grid-cols-2 gap-4 mb-6">
804
- {/* Model Selection */}
805
- <div className="calculator-input-box">
806
- <div className="text-2xl calculator-input-title">Model</div>
807
- <div className="calculator-input-content">
808
- <div className="mb-2">
809
- <button
810
- className={`${
811
- modelSelectionTab ? 'calculator-input-tab-active' : 'calculator-input-tab'
812
- }`}
813
- onClick={() => setModelSelectionTab(true)}
814
- >
815
- Model Selection
816
- </button>
817
- <button
818
- className={`${
819
- modelSelectionTab ? 'calculator-input-tab' : 'calculator-input-tab-active'
820
- }`}
821
- onClick={() => setModelSelectionTab(false)}
822
- >
823
- Custom Model
824
- </button>
825
- </div>
826
- <div>
827
- {modelSelectionTab ? (
828
- <>
829
- <label htmlFor="model">Select a Model</label>
830
- <select
831
- id="model"
832
- className="calculator-select"
833
- onChange={(e) => {
834
- setModelParams(Number(e.target.value));
835
- setHiddenSize(
836
- Number(
837
- e.target.options[e.target.selectedIndex].getAttribute('data-hiddenSize')
838
- )
839
- );
840
- setNumLayers(
841
- Number(
842
- e.target.options[e.target.selectedIndex].getAttribute('data-numLayers')
843
- )
844
- );
845
- }}
846
  >
847
- <option value="">None selected</option>
848
- {MODELS.map((model) => (
849
- <option
850
- key={model.name}
851
- value={model.params}
852
- data-hiddenSize={model.hidden_size}
853
- data-numLayers={model.num_hidden_layers}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
854
  >
855
- {model.name}
856
- </option>
857
- ))}
858
- </select>
859
- </>
860
- ) : (
861
- <>
862
- <label htmlFor="modelParams">Model Parameters (in billions)</label>
863
- <input
864
- type="number"
865
- id="modelParams"
866
- className="calculator-input mb-2"
867
- placeholder="e.g. 7 (for LLaMA-7B)"
868
- value={modelParams || ''}
869
- min={0}
870
- onChange={(e) => setModelParams(Number(e.target.value))}
871
- />
872
- <label htmlFor="hiddenSize">Hidden Size</label>
873
- <input
874
- type="number"
875
- id="hiddenSize"
876
- className="calculator-input mb-2"
877
- placeholder="e.g. 4096 (for LLaMA-7B)"
878
- value={hiddenSize || ''}
879
- min={1}
880
- onChange={(e) => setHiddenSize(Number(e.target.value))}
881
- />
882
- <label htmlFor="numLayers">Number of Layers</label>
883
- <input
884
- type="number"
885
- id="numLayers"
886
- className="calculator-input"
887
- placeholder="e.g. 32 (for LLaMA-7B)"
888
- value={numLayers || ''}
889
- min={1}
890
- onChange={(e) => setNumLayers(Number(e.target.value))}
891
- />
892
- </>
893
- )}
 
 
 
 
 
 
 
 
 
 
894
  </div>
895
- </div>
896
- </div>
897
 
898
- {/* Device Selection */}
899
- <div className="calculator-input-box">
900
- <div className="text-2xl calculator-input-title">Device</div>
901
- <div className="calculator-input-content">
902
- <div className="mb-2">
903
- <button
904
- className={`${
905
- deviceSelectionTab ? 'calculator-input-tab-active' : 'calculator-input-tab'
906
- }`}
907
- onClick={() => {
908
- setDeviceSelectionTab(true);
909
- setDeviceMemory(null);
910
- }}
911
- >
912
- Device Selection
913
- </button>
914
- <button
915
- className={`${
916
- deviceSelectionTab ? 'calculator-input-tab' : 'calculator-input-tab-active'
917
- }`}
918
- onClick={() => {
919
- setDeviceSelectionTab(false);
920
- setDeviceMemory(null);
921
- }}
922
- >
923
- Custom Device
924
- </button>
925
- </div>
926
- <div>
927
- {deviceSelectionTab ? (
928
- <>
929
- <label htmlFor="device">Select a Device</label>
930
- <select
931
- id="device"
932
- className="calculator-select"
933
- onChange={(e) => setDeviceMemory(Number(e.target.value))}
934
  >
935
- <option value="">None selected</option>
936
- {DEVICES.map((device) => (
937
- <option key={device.name} value={device.size}>
938
- {device.name}
939
- </option>
940
- ))}
941
- </select>
942
- </>
943
- ) : (
944
- <>
945
- <label htmlFor="deviceMemory">Device RAM (in GB)</label>
946
- <input
947
- type="number"
948
- id="deviceMemory"
949
- className="calculator-input"
950
- placeholder="e.g. 24"
951
- value={deviceMemory || ''}
952
- min={0}
953
- onChange={(e) => setDeviceMemory(Number(e.target.value))}
954
- />
955
- </>
956
- )}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
957
  </div>
958
- </div>
959
- </div>
960
 
961
- <div className='calculator-box'>
962
- <div className='text-2xl ml-5 mb-4'>Backend Precision Table</div>
963
- <div className='ml-5 mb-4'>
964
- <BackendPrecisionTable />
965
- </div>
966
- <div className='ml-5'>
967
- This table shows the precision used by each Takeoff backend for CPUs and GPUs, as well
968
- as their accuracy preservation.
 
969
  </div>
970
- </div>
971
 
972
- <div className='calculator-box'>
973
- <div className='text-2xl ml-5 mb-4'>Input parameters</div>
974
- <div className='ml-5 mb-4'>
975
- <strong>Sequence Length</strong>: The combined length of input tokens and output tokens.
976
- To restrict the maximum sequence length for inference on Takeoff, use the API parameters{' '}
977
- <code>prompt_new_tokens</code> for input tokens and <code>max_new_tokens</code> for
978
- output tokens when making a request.
979
- </div>
980
- <div className='ml-5'>
981
- <strong>Batch Size</strong>: The number of sequences that can be processed in parallel.
982
- To set a maximum batch size for inference on Takeoff, set the environment variable{' '}
983
- <code>TAKEOFF_MAX_BATCH_SIZE</code> to your desired value.
 
984
  </div>
985
  </div>
986
 
987
- </div>
988
-
989
- {/* Prefill Chunking Settings */}
990
- {isPrefillChunking && (
991
- <div className="calculator-input-box mb-6">
992
- <div className="text-2xl calculator-input-title">Prefill Chunking Settings</div>
993
- <div className="calculator-input-content">
994
- <label htmlFor="maxChunkSize">Max Chunk Size</label>
995
- <input
996
- type="number"
997
- id="maxChunkSize"
998
- className="calculator-input mb-2"
999
- placeholder="e.g. 512"
1000
- value={maxChunkSize || ''}
1001
- min={1}
1002
- onChange={(e) => setMaxChunkSize(Number(e.target.value))}
1003
- />
1004
- <label htmlFor="intermediateSize">Intermediate Size</label>
1005
- <input
1006
- type="number"
1007
- id="intermediateSize"
1008
- className="calculator-input"
1009
- placeholder="e.g. 2048"
1010
- value={intermediateSize || ''}
1011
- min={1}
1012
- onChange={(e) => setIntermediateSize(Number(e.target.value))}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1013
  />
1014
- </div>
1015
- </div>
1016
- )}
1017
-
1018
- {/* Charts Section */}
1019
- {isPrefillChunking ? (
1020
- <PrefillChunkingCalculator
1021
- deviceMemory={deviceMemory!}
1022
- modelParams={modelParams!}
1023
- hiddenSize={hiddenSize!}
1024
- numLayers={numLayers!}
1025
- batchSize={batchSize}
1026
- seqLength={seqLength}
1027
- maxChunkSize={maxChunkSize}
1028
- intermediateSize={intermediateSize}
1029
- />
1030
- ) : (
1031
- hiddenSize && numLayers && deviceMemory && modelParams && (
1032
- <>
1033
- {/* Model Footprint Chart */}
1034
- <div className="chart mb-8">
1035
- <div className="text-2xl text-center mb-4">Model Footprint</div>
1036
- <div className="space-y-8">
1037
- <div className="chart-row">
1038
-
1039
- <div className="chart-row-title">FP32</div>
1040
- <ModelSizeBarChart
1041
- modelSize={calculateMemory(modelParams, 'fp32')}
1042
- largestModelSize={deviceMemory || calculateMemory(modelParams, 'fp32')}
1043
- modelPrecision="fp32"
1044
- deviceMemorySet={deviceMemory !== null && deviceMemory > 0}
1045
- />
1046
- <div className="chart-row-size ml-8">
1047
- {calculateMemory(modelParams, 'fp32')} {deviceMemory ? `/ ${deviceMemory} ` : null}GB
1048
- </div>
1049
- </div>
1050
-
1051
- {/* FP16 */}
1052
- <div className="chart-row">
1053
- <div className="chart-row-title">FP16</div>
1054
- <ModelSizeBarChart
1055
- modelSize={calculateMemory(modelParams, 'fp16')}
1056
- largestModelSize={deviceMemory || calculateMemory(modelParams, 'fp16')}
1057
- modelPrecision="fp16"
1058
- deviceMemorySet={deviceMemory !== null && deviceMemory > 0}
1059
- />
1060
- <div className="chart-row-size ml-8">
1061
- {calculateMemory(modelParams, 'fp16')} {deviceMemory ? `/ ${deviceMemory} ` : null}GB
1062
- </div>
1063
- </div>
1064
-
1065
- {/* INT8 */}
1066
- <div className="chart-row">
1067
- <div className="chart-row-title">INT8</div>
1068
- <ModelSizeBarChart
1069
- modelSize={calculateMemory(modelParams, 'int8')}
1070
- largestModelSize={deviceMemory || calculateMemory(modelParams, 'int8')}
1071
- modelPrecision="int8"
1072
- deviceMemorySet={deviceMemory !== null && deviceMemory > 0}
1073
- />
1074
- <div className="chart-row-size ml-8">
1075
- {calculateMemory(modelParams, 'int8')} {deviceMemory ? `/ ${deviceMemory} ` : null}GB
 
 
 
 
 
 
1076
  </div>
1077
  </div>
1078
 
1079
- {/* INT4 */}
1080
- <div className="chart-row">
1081
- <div className="chart-row-title">INT4</div>
1082
- <ModelSizeBarChart
1083
- modelSize={calculateMemory(modelParams, 'int4')}
1084
- largestModelSize={deviceMemory || calculateMemory(modelParams, 'int4')}
1085
- modelPrecision="int4"
1086
- deviceMemorySet={deviceMemory !== null && deviceMemory > 0}
1087
- />
1088
- <div className="chart-row-size ml-8">
1089
- {calculateMemory(modelParams, 'int4')} {deviceMemory ? `/ ${deviceMemory} ` : null}GB
1090
  </div>
1091
- </div>
1092
- </div>
1093
- </div>
1094
-
1095
- {/* Maximum Batch Size / Sequence Length Chart */}
1096
- <div className="chart mb-8">
1097
- <div className="text-2xl text-center mb-4">Maximum Batch Size / Sequence Length</div>
1098
- <div className="flex flex-row items-left">
1099
- <InferenceRuntimeLineChart
1100
- availableMemory={{
1101
- int4: deviceMemory - calculateMemory(modelParams, 'int4'),
1102
- int8: deviceMemory - calculateMemory(modelParams, 'int8'),
1103
- fp16: deviceMemory - calculateMemory(modelParams, 'fp16'),
1104
- fp32: deviceMemory - calculateMemory(modelParams, 'fp32'),
1105
- }}
1106
- memoryPerInput={calculateMemoryPerInput(hiddenSize, numLayers)}
1107
- />
1108
- <div className="chart-side-panel ml-4 pt-4">
1109
- <div className='mb-2'>
1110
  Memory/token:{' '}
1111
  {(calculateMemoryPerInput(hiddenSize, numLayers) * 1_000_000).toFixed(0)} KB
1112
  </div>
@@ -1411,17 +1425,14 @@ const Calculator = () => {
1411
  </>
1412
  ) : null}
1413
  </div>
 
 
1414
  </div>
1415
- </div>
1416
- </div>
1417
- </>
1418
- )
1419
- )}
1420
- </div>
1421
- </div>
1422
-
1423
-
1424
-
1425
  )
1426
  }
1427
 
 
614
  )
615
 
616
  return (
617
+ <>
618
+ {/* Model Footprint with Prefill Chunking */}
619
+ <div>
620
+ <div className='chart mb-8'>
621
+ <div className='flex flex-col items-center'>
622
+ <div className='text-2xl'>Model Footprint with Prefill Chunking</div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
623
  </div>
624
+ <div>
625
+ <div className='chart-row'>
626
+ <div className='chart-row-title'>FP32</div>
627
+ <PrefillChunkingModelSizeBarChart
628
+ modelSize={calculateMemory(modelParams, 'fp32')}
629
+ largestModelSize={deviceMemory || calculateMemory(modelParams, 'fp32')}
630
+ modelPrecision='fp32'
631
+ deviceMemorySet={deviceMemory !== null && deviceMemory > 0}
632
+ activationMemorySize={activationMemorySize}
633
+ />
634
+ <div className='chart-row-size ml-8'>
635
+ {(calculateMemory(modelParams, 'fp32') + activationMemorySize).toFixed(2)}{' '}
636
+ {deviceMemory !== null && deviceMemory > 0 ? `/ ${deviceMemory} ` : null}GB
637
+ </div>
638
+ </div>
639
 
640
+ <div className='chart-row my-8'>
641
+ <div className='chart-row-title'>FP16</div>
642
+ <PrefillChunkingModelSizeBarChart
643
+ modelSize={calculateMemory(modelParams, 'fp16')}
644
+ largestModelSize={deviceMemory || calculateMemory(modelParams, 'fp16')}
645
+ modelPrecision='fp16'
646
+ deviceMemorySet={deviceMemory !== null && deviceMemory > 0}
647
+ activationMemorySize={activationMemorySize}
648
+ />
649
+ <div className='chart-row-size ml-8'>
650
+ {(calculateMemory(modelParams, 'fp16') + activationMemorySize).toFixed(2)}{' '}
651
+ {deviceMemory !== null && deviceMemory > 0 ? `/ ${deviceMemory} ` : null}GB
652
+ </div>
653
  </div>
 
654
 
655
+ <div className='chart-row my-8'>
656
+ <div className='chart-row-title'>INT8</div>
657
+ <PrefillChunkingModelSizeBarChart
658
+ modelSize={calculateMemory(modelParams, 'int8')}
659
+ largestModelSize={deviceMemory || calculateMemory(modelParams, 'int8')}
660
+ modelPrecision='int8'
661
+ deviceMemorySet={deviceMemory !== null && deviceMemory > 0}
662
+ activationMemorySize={activationMemorySize}
663
+ />
664
+ <div className='chart-row-size ml-8'>
665
+ {(calculateMemory(modelParams, 'int8') + activationMemorySize).toFixed(2)}{' '}
666
+ {deviceMemory !== null && deviceMemory > 0 ? `/ ${deviceMemory} ` : null}GB
667
+ </div>
668
  </div>
 
669
 
670
+ <div className='chart-row my-8'>
671
+ <div className='chart-row-title'>INT4</div>
672
+ <PrefillChunkingModelSizeBarChart
673
+ modelSize={calculateMemory(modelParams, 'int4')}
674
+ largestModelSize={deviceMemory || calculateMemory(modelParams, 'int4')}
675
+ modelPrecision='int4'
676
+ deviceMemorySet={deviceMemory !== null && deviceMemory > 0}
677
+ activationMemorySize={activationMemorySize}
678
+ />
679
+ <div className='chart-row-size ml-8'>
680
+ {(calculateMemory(modelParams, 'int4') + activationMemorySize).toFixed(2)}{' '}
681
+ {deviceMemory !== null && deviceMemory > 0 ? `/ ${deviceMemory} ` : null}GB
682
+ </div>
683
  </div>
684
  </div>
685
  </div>
686
+ <div className='chart'>
687
+ <div className='flex flex-col items-center'>
688
+ <div className='text-2xl'>
689
+ Maximum Batch Size / Sequence Length with Prefill Chunking
690
+ </div>
691
+ </div>
692
+ <PrefillChunkingInferenceRuntimeLineChart
693
+ availableMemory={{
694
+ int4: deviceMemory - calculateMemory(modelParams, 'int4'),
695
+ int8: deviceMemory - calculateMemory(modelParams, 'int8'),
696
+ fp16: deviceMemory - calculateMemory(modelParams, 'fp16'),
697
+ fp32: deviceMemory - calculateMemory(modelParams, 'fp32'),
698
+ }}
699
+ memoryPerInput={memoryPerInput}
700
+ activationMemorySize={activationMemorySize}
701
+ />
702
  </div>
 
 
 
 
 
 
 
 
 
 
703
  </div>
704
+ </>
705
  )
706
  }
707
 
 
774
  }
775
 
776
  return (
777
+ <div className='flex flex-col items-center justify-center min-h-screen px-4'>
778
+ {/* Toggle Button */}
779
+ <div className='mb-4 flex space-x-4'>
780
+ <button
781
+ className={`${
782
+ !isPrefillChunking ? 'calculator-input-tab-active' : 'calculator-input-tab'
783
+ }`}
784
+ onClick={() => setIsPrefillChunking(false)}
785
+ >
786
+ Standard Calculator
787
+ </button>
788
+ <button
789
+ className={`${
790
+ isPrefillChunking ? 'calculator-input-tab-active' : 'calculator-input-tab'
791
+ }`}
792
+ onClick={() => setIsPrefillChunking(true)}
793
+ >
794
+ Calculator with Prefill Chunking
795
+ </button>
796
+ </div>
797
 
798
+ {/* Model Memory Calculator */}
799
+ <div className='w-full max-w-4xl'>
800
+ <div className='text-4xl mb-4 text-center'>Model Memory Calculator</div>
801
+ <div className='mb-6 text-center'>
802
+ Use our Model Memory Calculator to help you estimate the memory footprint of your model
803
+ for different precisions and the maximum batch size / sequence length combination you can
804
+ run on your device.
805
+ </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
806
 
807
+ {/* Model and Device Selection */}
808
+ <div className='grid grid-cols-1 sm:grid-cols-2 gap-4 mb-6'>
809
+ {/* Model Selection */}
810
+ <div className='calculator-input-box'>
811
+ <div className='text-2xl calculator-input-title'>Model</div>
812
+ <div className='calculator-input-content'>
813
+ <div className='mb-2'>
814
+ <button
815
+ className={`${
816
+ modelSelectionTab ? 'calculator-input-tab-active' : 'calculator-input-tab'
817
+ }`}
818
+ onClick={() => setModelSelectionTab(true)}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
819
  >
820
+ Model Selection
821
+ </button>
822
+ <button
823
+ className={`${
824
+ modelSelectionTab ? 'calculator-input-tab' : 'calculator-input-tab-active'
825
+ }`}
826
+ onClick={() => setModelSelectionTab(false)}
827
+ >
828
+ Custom Model
829
+ </button>
830
+ </div>
831
+ <div>
832
+ {modelSelectionTab ? (
833
+ <>
834
+ <label htmlFor='model'>Select a Model</label>
835
+ <select
836
+ id='model'
837
+ className='calculator-select'
838
+ onChange={(e) => {
839
+ setModelParams(Number(e.target.value))
840
+ setHiddenSize(
841
+ Number(
842
+ e.target.options[e.target.selectedIndex].getAttribute(
843
+ 'data-hiddenSize',
844
+ ),
845
+ ),
846
+ )
847
+ setNumLayers(
848
+ Number(
849
+ e.target.options[e.target.selectedIndex].getAttribute('data-numLayers'),
850
+ ),
851
+ )
852
+ }}
853
  >
854
+ <option value=''>None selected</option>
855
+ {MODELS.map((model) => (
856
+ <option
857
+ key={model.name}
858
+ value={model.params}
859
+ data-hiddenSize={model.hidden_size}
860
+ data-numLayers={model.num_hidden_layers}
861
+ >
862
+ {model.name}
863
+ </option>
864
+ ))}
865
+ </select>
866
+ </>
867
+ ) : (
868
+ <>
869
+ <label htmlFor='modelParams'>Model Parameters (in billions)</label>
870
+ <input
871
+ type='number'
872
+ id='modelParams'
873
+ className='calculator-input mb-2'
874
+ placeholder='e.g. 7 (for LLaMA-7B)'
875
+ value={modelParams || ''}
876
+ min={0}
877
+ onChange={(e) => setModelParams(Number(e.target.value))}
878
+ />
879
+ <label htmlFor='hiddenSize'>Hidden Size</label>
880
+ <input
881
+ type='number'
882
+ id='hiddenSize'
883
+ className='calculator-input mb-2'
884
+ placeholder='e.g. 4096 (for LLaMA-7B)'
885
+ value={hiddenSize || ''}
886
+ min={1}
887
+ onChange={(e) => setHiddenSize(Number(e.target.value))}
888
+ />
889
+ <label htmlFor='numLayers'>Number of Layers</label>
890
+ <input
891
+ type='number'
892
+ id='numLayers'
893
+ className='calculator-input'
894
+ placeholder='e.g. 32 (for LLaMA-7B)'
895
+ value={numLayers || ''}
896
+ min={1}
897
+ onChange={(e) => setNumLayers(Number(e.target.value))}
898
+ />
899
+ </>
900
+ )}
901
+ </div>
902
+ </div>
903
  </div>
 
 
904
 
905
+ {/* Device Selection */}
906
+ <div className='calculator-input-box'>
907
+ <div className='text-2xl calculator-input-title'>Device</div>
908
+ <div className='calculator-input-content'>
909
+ <div className='mb-2'>
910
+ <button
911
+ className={`${
912
+ deviceSelectionTab ? 'calculator-input-tab-active' : 'calculator-input-tab'
913
+ }`}
914
+ onClick={() => {
915
+ setDeviceSelectionTab(true)
916
+ setDeviceMemory(null)
917
+ }}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
918
  >
919
+ Device Selection
920
+ </button>
921
+ <button
922
+ className={`${
923
+ deviceSelectionTab ? 'calculator-input-tab' : 'calculator-input-tab-active'
924
+ }`}
925
+ onClick={() => {
926
+ setDeviceSelectionTab(false)
927
+ setDeviceMemory(null)
928
+ }}
929
+ >
930
+ Custom Device
931
+ </button>
932
+ </div>
933
+ <div>
934
+ {deviceSelectionTab ? (
935
+ <>
936
+ <label htmlFor='device'>Select a Device</label>
937
+ <select
938
+ id='device'
939
+ className='calculator-select'
940
+ onChange={(e) => setDeviceMemory(Number(e.target.value))}
941
+ >
942
+ <option value=''>None selected</option>
943
+ {DEVICES.map((device) => (
944
+ <option key={device.name} value={device.size}>
945
+ {device.name}
946
+ </option>
947
+ ))}
948
+ </select>
949
+ </>
950
+ ) : (
951
+ <>
952
+ <label htmlFor='deviceMemory'>Device RAM (in GB)</label>
953
+ <input
954
+ type='number'
955
+ id='deviceMemory'
956
+ className='calculator-input'
957
+ placeholder='e.g. 24'
958
+ value={deviceMemory || ''}
959
+ min={0}
960
+ onChange={(e) => setDeviceMemory(Number(e.target.value))}
961
+ />
962
+ </>
963
+ )}
964
+ </div>
965
+ </div>
966
  </div>
 
 
967
 
968
+ <div className='calculator-box'>
969
+ <div className='text-2xl ml-5 mb-4'>Backend Precision Table</div>
970
+ <div className='ml-5 mb-4'>
971
+ <BackendPrecisionTable />
972
+ </div>
973
+ <div className='ml-5'>
974
+ This table shows the precision used by each Takeoff backend for CPUs and GPUs, as well
975
+ as their accuracy preservation.
976
+ </div>
977
  </div>
 
978
 
979
+ <div className='calculator-box'>
980
+ <div className='text-2xl ml-5 mb-4'>Input parameters</div>
981
+ <div className='ml-5 mb-4'>
982
+ <strong>Sequence Length</strong>: The combined length of input tokens and output
983
+ tokens. To restrict the maximum sequence length for inference on Takeoff, use the API
984
+ parameters <code>prompt_new_tokens</code> for input tokens and{' '}
985
+ <code>max_new_tokens</code> for output tokens when making a request.
986
+ </div>
987
+ <div className='ml-5'>
988
+ <strong>Batch Size</strong>: The number of sequences that can be processed in
989
+ parallel. To set a maximum batch size for inference on Takeoff, set the environment
990
+ variable <code>TAKEOFF_MAX_BATCH_SIZE</code> to your desired value.
991
+ </div>
992
  </div>
993
  </div>
994
 
995
+ {/* Prefill Chunking Settings */}
996
+ {isPrefillChunking && (
997
+ <div className='calculator-input-box mb-6'>
998
+ <div className='text-2xl calculator-input-title'>Prefill Chunking Settings</div>
999
+ <div className='calculator-input-content'>
1000
+ <label htmlFor='maxChunkSize'>Max Chunk Size</label>
1001
+ <input
1002
+ type='number'
1003
+ id='maxChunkSize'
1004
+ className='calculator-input mb-2'
1005
+ placeholder='e.g. 512'
1006
+ value={maxChunkSize || ''}
1007
+ min={1}
1008
+ onChange={(e) => setMaxChunkSize(Number(e.target.value))}
1009
+ />
1010
+ <label htmlFor='intermediateSize'>Intermediate Size</label>
1011
+ <input
1012
+ type='number'
1013
+ id='intermediateSize'
1014
+ className='calculator-input'
1015
+ placeholder='e.g. 2048'
1016
+ value={intermediateSize || ''}
1017
+ min={1}
1018
+ onChange={(e) => setIntermediateSize(Number(e.target.value))}
1019
+ />
1020
+ </div>
1021
+ </div>
1022
+ )}
1023
+
1024
+ {/* Charts Section */}
1025
+ {isPrefillChunking ? (
1026
+ <PrefillChunkingCalculator
1027
+ deviceMemory={deviceMemory!}
1028
+ modelParams={modelParams!}
1029
+ hiddenSize={hiddenSize!}
1030
+ numLayers={numLayers!}
1031
+ batchSize={batchSize}
1032
+ seqLength={seqLength}
1033
+ maxChunkSize={maxChunkSize}
1034
+ intermediateSize={intermediateSize}
1035
  />
1036
+ ) : (
1037
+ hiddenSize &&
1038
+ numLayers &&
1039
+ deviceMemory &&
1040
+ modelParams && (
1041
+ <>
1042
+ {/* Model Footprint Chart */}
1043
+ <div className='chart mb-8'>
1044
+ <div className='text-2xl text-center mb-4'>Model Footprint</div>
1045
+ <div className='space-y-8'>
1046
+ <div className='chart-row'>
1047
+ <div className='chart-row-title'>FP32</div>
1048
+ <ModelSizeBarChart
1049
+ modelSize={calculateMemory(modelParams, 'fp32')}
1050
+ largestModelSize={deviceMemory || calculateMemory(modelParams, 'fp32')}
1051
+ modelPrecision='fp32'
1052
+ deviceMemorySet={deviceMemory !== null && deviceMemory > 0}
1053
+ />
1054
+ <div className='chart-row-size ml-8'>
1055
+ {calculateMemory(modelParams, 'fp32')}{' '}
1056
+ {deviceMemory ? `/ ${deviceMemory} ` : null}GB
1057
+ </div>
1058
+ </div>
1059
+
1060
+ {/* FP16 */}
1061
+ <div className='chart-row'>
1062
+ <div className='chart-row-title'>FP16</div>
1063
+ <ModelSizeBarChart
1064
+ modelSize={calculateMemory(modelParams, 'fp16')}
1065
+ largestModelSize={deviceMemory || calculateMemory(modelParams, 'fp16')}
1066
+ modelPrecision='fp16'
1067
+ deviceMemorySet={deviceMemory !== null && deviceMemory > 0}
1068
+ />
1069
+ <div className='chart-row-size ml-8'>
1070
+ {calculateMemory(modelParams, 'fp16')}{' '}
1071
+ {deviceMemory ? `/ ${deviceMemory} ` : null}GB
1072
+ </div>
1073
+ </div>
1074
+
1075
+ {/* INT8 */}
1076
+ <div className='chart-row'>
1077
+ <div className='chart-row-title'>INT8</div>
1078
+ <ModelSizeBarChart
1079
+ modelSize={calculateMemory(modelParams, 'int8')}
1080
+ largestModelSize={deviceMemory || calculateMemory(modelParams, 'int8')}
1081
+ modelPrecision='int8'
1082
+ deviceMemorySet={deviceMemory !== null && deviceMemory > 0}
1083
+ />
1084
+ <div className='chart-row-size ml-8'>
1085
+ {calculateMemory(modelParams, 'int8')}{' '}
1086
+ {deviceMemory ? `/ ${deviceMemory} ` : null}GB
1087
+ </div>
1088
+ </div>
1089
+
1090
+ {/* INT4 */}
1091
+ <div className='chart-row'>
1092
+ <div className='chart-row-title'>INT4</div>
1093
+ <ModelSizeBarChart
1094
+ modelSize={calculateMemory(modelParams, 'int4')}
1095
+ largestModelSize={deviceMemory || calculateMemory(modelParams, 'int4')}
1096
+ modelPrecision='int4'
1097
+ deviceMemorySet={deviceMemory !== null && deviceMemory > 0}
1098
+ />
1099
+ <div className='chart-row-size ml-8'>
1100
+ {calculateMemory(modelParams, 'int4')}{' '}
1101
+ {deviceMemory ? `/ ${deviceMemory} ` : null}GB
1102
+ </div>
1103
+ </div>
1104
  </div>
1105
  </div>
1106
 
1107
+ {/* Maximum Batch Size / Sequence Length Chart */}
1108
+ <div className='chart mb-8'>
1109
+ <div className='text-2xl text-center mb-4'>
1110
+ Maximum Batch Size / Sequence Length
 
 
 
 
 
 
 
1111
  </div>
1112
+ <div className='flex flex-row items-left'>
1113
+ <InferenceRuntimeLineChart
1114
+ availableMemory={{
1115
+ int4: deviceMemory - calculateMemory(modelParams, 'int4'),
1116
+ int8: deviceMemory - calculateMemory(modelParams, 'int8'),
1117
+ fp16: deviceMemory - calculateMemory(modelParams, 'fp16'),
1118
+ fp32: deviceMemory - calculateMemory(modelParams, 'fp32'),
1119
+ }}
1120
+ memoryPerInput={calculateMemoryPerInput(hiddenSize, numLayers)}
1121
+ />
1122
+ <div className='chart-side-panel ml-4 pt-4'>
1123
+ <div className='mb-2'>
 
 
 
 
 
 
 
1124
  Memory/token:{' '}
1125
  {(calculateMemoryPerInput(hiddenSize, numLayers) * 1_000_000).toFixed(0)} KB
1126
  </div>
 
1425
  </>
1426
  ) : null}
1427
  </div>
1428
+ </div>
1429
+ </div>
1430
  </div>
1431
+ </>
1432
+ )
1433
+ )}
1434
+ </div>
1435
+ </div>
 
 
 
 
 
1436
  )
1437
  }
1438