Spaces:
Running
Running
minor fixes
Browse files- src/pages/Calculator.tsx +412 -401
src/pages/Calculator.tsx
CHANGED
@@ -614,90 +614,94 @@ const PrefillChunkingCalculator = ({
|
|
614 |
)
|
615 |
|
616 |
return (
|
617 |
-
|
618 |
-
|
619 |
-
<div
|
620 |
-
<div className='
|
621 |
-
<div className='
|
622 |
-
|
623 |
-
<div>
|
624 |
-
<div className='chart-row'>
|
625 |
-
<div className='chart-row-title'>FP32</div>
|
626 |
-
<PrefillChunkingModelSizeBarChart
|
627 |
-
modelSize={calculateMemory(modelParams, 'fp32')}
|
628 |
-
largestModelSize={deviceMemory || calculateMemory(modelParams, 'fp32')}
|
629 |
-
modelPrecision='fp32'
|
630 |
-
deviceMemorySet={deviceMemory !== null && deviceMemory > 0}
|
631 |
-
activationMemorySize={activationMemorySize}
|
632 |
-
/>
|
633 |
-
<div className='chart-row-size ml-8'>
|
634 |
-
{(calculateMemory(modelParams, 'fp32') + activationMemorySize).toFixed(2)}{' '}
|
635 |
-
{deviceMemory !== null && deviceMemory > 0 ? `/ ${deviceMemory} ` : null}GB
|
636 |
-
</div>
|
637 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
638 |
|
639 |
-
|
640 |
-
|
641 |
-
|
642 |
-
|
643 |
-
|
644 |
-
|
645 |
-
|
646 |
-
|
647 |
-
|
648 |
-
|
649 |
-
|
650 |
-
|
|
|
651 |
</div>
|
652 |
-
</div>
|
653 |
|
654 |
-
|
655 |
-
|
656 |
-
|
657 |
-
|
658 |
-
|
659 |
-
|
660 |
-
|
661 |
-
|
662 |
-
|
663 |
-
|
664 |
-
|
665 |
-
|
|
|
666 |
</div>
|
667 |
-
</div>
|
668 |
|
669 |
-
|
670 |
-
|
671 |
-
|
672 |
-
|
673 |
-
|
674 |
-
|
675 |
-
|
676 |
-
|
677 |
-
|
678 |
-
|
679 |
-
|
680 |
-
|
|
|
681 |
</div>
|
682 |
</div>
|
683 |
</div>
|
684 |
-
|
685 |
-
|
686 |
-
|
687 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
688 |
</div>
|
689 |
-
<PrefillChunkingInferenceRuntimeLineChart
|
690 |
-
availableMemory={{
|
691 |
-
int4: deviceMemory - calculateMemory(modelParams, 'int4'),
|
692 |
-
int8: deviceMemory - calculateMemory(modelParams, 'int8'),
|
693 |
-
fp16: deviceMemory - calculateMemory(modelParams, 'fp16'),
|
694 |
-
fp32: deviceMemory - calculateMemory(modelParams, 'fp32'),
|
695 |
-
}}
|
696 |
-
memoryPerInput={memoryPerInput}
|
697 |
-
activationMemorySize={activationMemorySize}
|
698 |
-
/>
|
699 |
</div>
|
700 |
-
|
701 |
)
|
702 |
}
|
703 |
|
@@ -770,343 +774,353 @@ const Calculator = () => {
|
|
770 |
}
|
771 |
|
772 |
return (
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
773 |
|
774 |
-
|
775 |
-
|
776 |
-
|
777 |
-
|
778 |
-
|
779 |
-
|
780 |
-
|
781 |
-
|
782 |
-
>
|
783 |
-
Standard Calculator
|
784 |
-
</button>
|
785 |
-
<button
|
786 |
-
className={`${
|
787 |
-
isPrefillChunking ? 'calculator-input-tab-active' : 'calculator-input-tab'
|
788 |
-
}`}
|
789 |
-
onClick={() => setIsPrefillChunking(true)}
|
790 |
-
>
|
791 |
-
Calculator with Prefill Chunking
|
792 |
-
</button>
|
793 |
-
</div>
|
794 |
-
|
795 |
-
{/* Model Memory Calculator */}
|
796 |
-
<div className="w-full max-w-4xl">
|
797 |
-
<div className="text-4xl mb-4 text-center">Model Memory Calculator</div>
|
798 |
-
<div className="mb-6 text-center">
|
799 |
-
Use our Model Memory Calculator to help you estimate the memory footprint of your model for different precisions and the maximum batch size / sequence length combination you can run on your device.
|
800 |
-
</div>
|
801 |
|
802 |
-
|
803 |
-
|
804 |
-
|
805 |
-
|
806 |
-
|
807 |
-
|
808 |
-
|
809 |
-
|
810 |
-
|
811 |
-
|
812 |
-
|
813 |
-
|
814 |
-
>
|
815 |
-
Model Selection
|
816 |
-
</button>
|
817 |
-
<button
|
818 |
-
className={`${
|
819 |
-
modelSelectionTab ? 'calculator-input-tab' : 'calculator-input-tab-active'
|
820 |
-
}`}
|
821 |
-
onClick={() => setModelSelectionTab(false)}
|
822 |
-
>
|
823 |
-
Custom Model
|
824 |
-
</button>
|
825 |
-
</div>
|
826 |
-
<div>
|
827 |
-
{modelSelectionTab ? (
|
828 |
-
<>
|
829 |
-
<label htmlFor="model">Select a Model</label>
|
830 |
-
<select
|
831 |
-
id="model"
|
832 |
-
className="calculator-select"
|
833 |
-
onChange={(e) => {
|
834 |
-
setModelParams(Number(e.target.value));
|
835 |
-
setHiddenSize(
|
836 |
-
Number(
|
837 |
-
e.target.options[e.target.selectedIndex].getAttribute('data-hiddenSize')
|
838 |
-
)
|
839 |
-
);
|
840 |
-
setNumLayers(
|
841 |
-
Number(
|
842 |
-
e.target.options[e.target.selectedIndex].getAttribute('data-numLayers')
|
843 |
-
)
|
844 |
-
);
|
845 |
-
}}
|
846 |
>
|
847 |
-
|
848 |
-
|
849 |
-
|
850 |
-
|
851 |
-
|
852 |
-
|
853 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
854 |
>
|
855 |
-
|
856 |
-
|
857 |
-
|
858 |
-
|
859 |
-
|
860 |
-
|
861 |
-
|
862 |
-
|
863 |
-
|
864 |
-
|
865 |
-
|
866 |
-
|
867 |
-
|
868 |
-
|
869 |
-
|
870 |
-
|
871 |
-
|
872 |
-
|
873 |
-
|
874 |
-
|
875 |
-
|
876 |
-
|
877 |
-
|
878 |
-
|
879 |
-
|
880 |
-
|
881 |
-
|
882 |
-
|
883 |
-
|
884 |
-
|
885 |
-
|
886 |
-
|
887 |
-
|
888 |
-
|
889 |
-
|
890 |
-
|
891 |
-
|
892 |
-
|
893 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
894 |
</div>
|
895 |
-
</div>
|
896 |
-
</div>
|
897 |
|
898 |
-
|
899 |
-
|
900 |
-
|
901 |
-
|
902 |
-
|
903 |
-
|
904 |
-
|
905 |
-
|
906 |
-
|
907 |
-
|
908 |
-
|
909 |
-
|
910 |
-
|
911 |
-
>
|
912 |
-
Device Selection
|
913 |
-
</button>
|
914 |
-
<button
|
915 |
-
className={`${
|
916 |
-
deviceSelectionTab ? 'calculator-input-tab' : 'calculator-input-tab-active'
|
917 |
-
}`}
|
918 |
-
onClick={() => {
|
919 |
-
setDeviceSelectionTab(false);
|
920 |
-
setDeviceMemory(null);
|
921 |
-
}}
|
922 |
-
>
|
923 |
-
Custom Device
|
924 |
-
</button>
|
925 |
-
</div>
|
926 |
-
<div>
|
927 |
-
{deviceSelectionTab ? (
|
928 |
-
<>
|
929 |
-
<label htmlFor="device">Select a Device</label>
|
930 |
-
<select
|
931 |
-
id="device"
|
932 |
-
className="calculator-select"
|
933 |
-
onChange={(e) => setDeviceMemory(Number(e.target.value))}
|
934 |
>
|
935 |
-
|
936 |
-
|
937 |
-
|
938 |
-
|
939 |
-
|
940 |
-
|
941 |
-
|
942 |
-
|
943 |
-
|
944 |
-
|
945 |
-
|
946 |
-
|
947 |
-
|
948 |
-
|
949 |
-
|
950 |
-
|
951 |
-
|
952 |
-
|
953 |
-
|
954 |
-
|
955 |
-
|
956 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
957 |
</div>
|
958 |
-
</div>
|
959 |
-
</div>
|
960 |
|
961 |
-
|
962 |
-
|
963 |
-
|
964 |
-
|
965 |
-
|
966 |
-
|
967 |
-
|
968 |
-
|
|
|
969 |
</div>
|
970 |
-
</div>
|
971 |
|
972 |
-
|
973 |
-
|
974 |
-
|
975 |
-
|
976 |
-
|
977 |
-
|
978 |
-
|
979 |
-
|
980 |
-
|
981 |
-
|
982 |
-
|
983 |
-
|
|
|
984 |
</div>
|
985 |
</div>
|
986 |
|
987 |
-
|
988 |
-
|
989 |
-
|
990 |
-
|
991 |
-
|
992 |
-
|
993 |
-
|
994 |
-
|
995 |
-
|
996 |
-
|
997 |
-
|
998 |
-
|
999 |
-
|
1000 |
-
|
1001 |
-
|
1002 |
-
|
1003 |
-
|
1004 |
-
|
1005 |
-
|
1006 |
-
|
1007 |
-
|
1008 |
-
|
1009 |
-
|
1010 |
-
|
1011 |
-
|
1012 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1013 |
/>
|
1014 |
-
|
1015 |
-
|
1016 |
-
|
1017 |
-
|
1018 |
-
|
1019 |
-
|
1020 |
-
|
1021 |
-
|
1022 |
-
|
1023 |
-
|
1024 |
-
|
1025 |
-
|
1026 |
-
|
1027 |
-
|
1028 |
-
|
1029 |
-
|
1030 |
-
|
1031 |
-
|
1032 |
-
|
1033 |
-
|
1034 |
-
|
1035 |
-
|
1036 |
-
|
1037 |
-
|
1038 |
-
|
1039 |
-
|
1040 |
-
|
1041 |
-
|
1042 |
-
|
1043 |
-
|
1044 |
-
|
1045 |
-
|
1046 |
-
|
1047 |
-
|
1048 |
-
|
1049 |
-
|
1050 |
-
|
1051 |
-
|
1052 |
-
|
1053 |
-
|
1054 |
-
|
1055 |
-
|
1056 |
-
|
1057 |
-
|
1058 |
-
|
1059 |
-
|
1060 |
-
|
1061 |
-
|
1062 |
-
|
1063 |
-
|
1064 |
-
|
1065 |
-
|
1066 |
-
|
1067 |
-
|
1068 |
-
|
1069 |
-
|
1070 |
-
|
1071 |
-
|
1072 |
-
|
1073 |
-
|
1074 |
-
|
1075 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
1076 |
</div>
|
1077 |
</div>
|
1078 |
|
1079 |
-
{/*
|
1080 |
-
<div className=
|
1081 |
-
<div className=
|
1082 |
-
|
1083 |
-
modelSize={calculateMemory(modelParams, 'int4')}
|
1084 |
-
largestModelSize={deviceMemory || calculateMemory(modelParams, 'int4')}
|
1085 |
-
modelPrecision="int4"
|
1086 |
-
deviceMemorySet={deviceMemory !== null && deviceMemory > 0}
|
1087 |
-
/>
|
1088 |
-
<div className="chart-row-size ml-8">
|
1089 |
-
{calculateMemory(modelParams, 'int4')} {deviceMemory ? `/ ${deviceMemory} ` : null}GB
|
1090 |
</div>
|
1091 |
-
|
1092 |
-
|
1093 |
-
|
1094 |
-
|
1095 |
-
|
1096 |
-
|
1097 |
-
|
1098 |
-
|
1099 |
-
|
1100 |
-
|
1101 |
-
|
1102 |
-
|
1103 |
-
fp16: deviceMemory - calculateMemory(modelParams, 'fp16'),
|
1104 |
-
fp32: deviceMemory - calculateMemory(modelParams, 'fp32'),
|
1105 |
-
}}
|
1106 |
-
memoryPerInput={calculateMemoryPerInput(hiddenSize, numLayers)}
|
1107 |
-
/>
|
1108 |
-
<div className="chart-side-panel ml-4 pt-4">
|
1109 |
-
<div className='mb-2'>
|
1110 |
Memory/token:{' '}
|
1111 |
{(calculateMemoryPerInput(hiddenSize, numLayers) * 1_000_000).toFixed(0)} KB
|
1112 |
</div>
|
@@ -1411,17 +1425,14 @@ const Calculator = () => {
|
|
1411 |
</>
|
1412 |
) : null}
|
1413 |
</div>
|
|
|
|
|
1414 |
</div>
|
1415 |
-
|
1416 |
-
|
1417 |
-
|
1418 |
-
|
1419 |
-
|
1420 |
-
</div>
|
1421 |
-
</div>
|
1422 |
-
|
1423 |
-
|
1424 |
-
|
1425 |
)
|
1426 |
}
|
1427 |
|
|
|
614 |
)
|
615 |
|
616 |
return (
|
617 |
+
<>
|
618 |
+
{/* Model Footprint with Prefill Chunking */}
|
619 |
+
<div>
|
620 |
+
<div className='chart mb-8'>
|
621 |
+
<div className='flex flex-col items-center'>
|
622 |
+
<div className='text-2xl'>Model Footprint with Prefill Chunking</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
623 |
</div>
|
624 |
+
<div>
|
625 |
+
<div className='chart-row'>
|
626 |
+
<div className='chart-row-title'>FP32</div>
|
627 |
+
<PrefillChunkingModelSizeBarChart
|
628 |
+
modelSize={calculateMemory(modelParams, 'fp32')}
|
629 |
+
largestModelSize={deviceMemory || calculateMemory(modelParams, 'fp32')}
|
630 |
+
modelPrecision='fp32'
|
631 |
+
deviceMemorySet={deviceMemory !== null && deviceMemory > 0}
|
632 |
+
activationMemorySize={activationMemorySize}
|
633 |
+
/>
|
634 |
+
<div className='chart-row-size ml-8'>
|
635 |
+
{(calculateMemory(modelParams, 'fp32') + activationMemorySize).toFixed(2)}{' '}
|
636 |
+
{deviceMemory !== null && deviceMemory > 0 ? `/ ${deviceMemory} ` : null}GB
|
637 |
+
</div>
|
638 |
+
</div>
|
639 |
|
640 |
+
<div className='chart-row my-8'>
|
641 |
+
<div className='chart-row-title'>FP16</div>
|
642 |
+
<PrefillChunkingModelSizeBarChart
|
643 |
+
modelSize={calculateMemory(modelParams, 'fp16')}
|
644 |
+
largestModelSize={deviceMemory || calculateMemory(modelParams, 'fp16')}
|
645 |
+
modelPrecision='fp16'
|
646 |
+
deviceMemorySet={deviceMemory !== null && deviceMemory > 0}
|
647 |
+
activationMemorySize={activationMemorySize}
|
648 |
+
/>
|
649 |
+
<div className='chart-row-size ml-8'>
|
650 |
+
{(calculateMemory(modelParams, 'fp16') + activationMemorySize).toFixed(2)}{' '}
|
651 |
+
{deviceMemory !== null && deviceMemory > 0 ? `/ ${deviceMemory} ` : null}GB
|
652 |
+
</div>
|
653 |
</div>
|
|
|
654 |
|
655 |
+
<div className='chart-row my-8'>
|
656 |
+
<div className='chart-row-title'>INT8</div>
|
657 |
+
<PrefillChunkingModelSizeBarChart
|
658 |
+
modelSize={calculateMemory(modelParams, 'int8')}
|
659 |
+
largestModelSize={deviceMemory || calculateMemory(modelParams, 'int8')}
|
660 |
+
modelPrecision='int8'
|
661 |
+
deviceMemorySet={deviceMemory !== null && deviceMemory > 0}
|
662 |
+
activationMemorySize={activationMemorySize}
|
663 |
+
/>
|
664 |
+
<div className='chart-row-size ml-8'>
|
665 |
+
{(calculateMemory(modelParams, 'int8') + activationMemorySize).toFixed(2)}{' '}
|
666 |
+
{deviceMemory !== null && deviceMemory > 0 ? `/ ${deviceMemory} ` : null}GB
|
667 |
+
</div>
|
668 |
</div>
|
|
|
669 |
|
670 |
+
<div className='chart-row my-8'>
|
671 |
+
<div className='chart-row-title'>INT4</div>
|
672 |
+
<PrefillChunkingModelSizeBarChart
|
673 |
+
modelSize={calculateMemory(modelParams, 'int4')}
|
674 |
+
largestModelSize={deviceMemory || calculateMemory(modelParams, 'int4')}
|
675 |
+
modelPrecision='int4'
|
676 |
+
deviceMemorySet={deviceMemory !== null && deviceMemory > 0}
|
677 |
+
activationMemorySize={activationMemorySize}
|
678 |
+
/>
|
679 |
+
<div className='chart-row-size ml-8'>
|
680 |
+
{(calculateMemory(modelParams, 'int4') + activationMemorySize).toFixed(2)}{' '}
|
681 |
+
{deviceMemory !== null && deviceMemory > 0 ? `/ ${deviceMemory} ` : null}GB
|
682 |
+
</div>
|
683 |
</div>
|
684 |
</div>
|
685 |
</div>
|
686 |
+
<div className='chart'>
|
687 |
+
<div className='flex flex-col items-center'>
|
688 |
+
<div className='text-2xl'>
|
689 |
+
Maximum Batch Size / Sequence Length with Prefill Chunking
|
690 |
+
</div>
|
691 |
+
</div>
|
692 |
+
<PrefillChunkingInferenceRuntimeLineChart
|
693 |
+
availableMemory={{
|
694 |
+
int4: deviceMemory - calculateMemory(modelParams, 'int4'),
|
695 |
+
int8: deviceMemory - calculateMemory(modelParams, 'int8'),
|
696 |
+
fp16: deviceMemory - calculateMemory(modelParams, 'fp16'),
|
697 |
+
fp32: deviceMemory - calculateMemory(modelParams, 'fp32'),
|
698 |
+
}}
|
699 |
+
memoryPerInput={memoryPerInput}
|
700 |
+
activationMemorySize={activationMemorySize}
|
701 |
+
/>
|
702 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
703 |
</div>
|
704 |
+
</>
|
705 |
)
|
706 |
}
|
707 |
|
|
|
774 |
}
|
775 |
|
776 |
return (
|
777 |
+
<div className='flex flex-col items-center justify-center min-h-screen px-4'>
|
778 |
+
{/* Toggle Button */}
|
779 |
+
<div className='mb-4 flex space-x-4'>
|
780 |
+
<button
|
781 |
+
className={`${
|
782 |
+
!isPrefillChunking ? 'calculator-input-tab-active' : 'calculator-input-tab'
|
783 |
+
}`}
|
784 |
+
onClick={() => setIsPrefillChunking(false)}
|
785 |
+
>
|
786 |
+
Standard Calculator
|
787 |
+
</button>
|
788 |
+
<button
|
789 |
+
className={`${
|
790 |
+
isPrefillChunking ? 'calculator-input-tab-active' : 'calculator-input-tab'
|
791 |
+
}`}
|
792 |
+
onClick={() => setIsPrefillChunking(true)}
|
793 |
+
>
|
794 |
+
Calculator with Prefill Chunking
|
795 |
+
</button>
|
796 |
+
</div>
|
797 |
|
798 |
+
{/* Model Memory Calculator */}
|
799 |
+
<div className='w-full max-w-4xl'>
|
800 |
+
<div className='text-4xl mb-4 text-center'>Model Memory Calculator</div>
|
801 |
+
<div className='mb-6 text-center'>
|
802 |
+
Use our Model Memory Calculator to help you estimate the memory footprint of your model
|
803 |
+
for different precisions and the maximum batch size / sequence length combination you can
|
804 |
+
run on your device.
|
805 |
+
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
806 |
|
807 |
+
{/* Model and Device Selection */}
|
808 |
+
<div className='grid grid-cols-1 sm:grid-cols-2 gap-4 mb-6'>
|
809 |
+
{/* Model Selection */}
|
810 |
+
<div className='calculator-input-box'>
|
811 |
+
<div className='text-2xl calculator-input-title'>Model</div>
|
812 |
+
<div className='calculator-input-content'>
|
813 |
+
<div className='mb-2'>
|
814 |
+
<button
|
815 |
+
className={`${
|
816 |
+
modelSelectionTab ? 'calculator-input-tab-active' : 'calculator-input-tab'
|
817 |
+
}`}
|
818 |
+
onClick={() => setModelSelectionTab(true)}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
819 |
>
|
820 |
+
Model Selection
|
821 |
+
</button>
|
822 |
+
<button
|
823 |
+
className={`${
|
824 |
+
modelSelectionTab ? 'calculator-input-tab' : 'calculator-input-tab-active'
|
825 |
+
}`}
|
826 |
+
onClick={() => setModelSelectionTab(false)}
|
827 |
+
>
|
828 |
+
Custom Model
|
829 |
+
</button>
|
830 |
+
</div>
|
831 |
+
<div>
|
832 |
+
{modelSelectionTab ? (
|
833 |
+
<>
|
834 |
+
<label htmlFor='model'>Select a Model</label>
|
835 |
+
<select
|
836 |
+
id='model'
|
837 |
+
className='calculator-select'
|
838 |
+
onChange={(e) => {
|
839 |
+
setModelParams(Number(e.target.value))
|
840 |
+
setHiddenSize(
|
841 |
+
Number(
|
842 |
+
e.target.options[e.target.selectedIndex].getAttribute(
|
843 |
+
'data-hiddenSize',
|
844 |
+
),
|
845 |
+
),
|
846 |
+
)
|
847 |
+
setNumLayers(
|
848 |
+
Number(
|
849 |
+
e.target.options[e.target.selectedIndex].getAttribute('data-numLayers'),
|
850 |
+
),
|
851 |
+
)
|
852 |
+
}}
|
853 |
>
|
854 |
+
<option value=''>None selected</option>
|
855 |
+
{MODELS.map((model) => (
|
856 |
+
<option
|
857 |
+
key={model.name}
|
858 |
+
value={model.params}
|
859 |
+
data-hiddenSize={model.hidden_size}
|
860 |
+
data-numLayers={model.num_hidden_layers}
|
861 |
+
>
|
862 |
+
{model.name}
|
863 |
+
</option>
|
864 |
+
))}
|
865 |
+
</select>
|
866 |
+
</>
|
867 |
+
) : (
|
868 |
+
<>
|
869 |
+
<label htmlFor='modelParams'>Model Parameters (in billions)</label>
|
870 |
+
<input
|
871 |
+
type='number'
|
872 |
+
id='modelParams'
|
873 |
+
className='calculator-input mb-2'
|
874 |
+
placeholder='e.g. 7 (for LLaMA-7B)'
|
875 |
+
value={modelParams || ''}
|
876 |
+
min={0}
|
877 |
+
onChange={(e) => setModelParams(Number(e.target.value))}
|
878 |
+
/>
|
879 |
+
<label htmlFor='hiddenSize'>Hidden Size</label>
|
880 |
+
<input
|
881 |
+
type='number'
|
882 |
+
id='hiddenSize'
|
883 |
+
className='calculator-input mb-2'
|
884 |
+
placeholder='e.g. 4096 (for LLaMA-7B)'
|
885 |
+
value={hiddenSize || ''}
|
886 |
+
min={1}
|
887 |
+
onChange={(e) => setHiddenSize(Number(e.target.value))}
|
888 |
+
/>
|
889 |
+
<label htmlFor='numLayers'>Number of Layers</label>
|
890 |
+
<input
|
891 |
+
type='number'
|
892 |
+
id='numLayers'
|
893 |
+
className='calculator-input'
|
894 |
+
placeholder='e.g. 32 (for LLaMA-7B)'
|
895 |
+
value={numLayers || ''}
|
896 |
+
min={1}
|
897 |
+
onChange={(e) => setNumLayers(Number(e.target.value))}
|
898 |
+
/>
|
899 |
+
</>
|
900 |
+
)}
|
901 |
+
</div>
|
902 |
+
</div>
|
903 |
</div>
|
|
|
|
|
904 |
|
905 |
+
{/* Device Selection */}
|
906 |
+
<div className='calculator-input-box'>
|
907 |
+
<div className='text-2xl calculator-input-title'>Device</div>
|
908 |
+
<div className='calculator-input-content'>
|
909 |
+
<div className='mb-2'>
|
910 |
+
<button
|
911 |
+
className={`${
|
912 |
+
deviceSelectionTab ? 'calculator-input-tab-active' : 'calculator-input-tab'
|
913 |
+
}`}
|
914 |
+
onClick={() => {
|
915 |
+
setDeviceSelectionTab(true)
|
916 |
+
setDeviceMemory(null)
|
917 |
+
}}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
918 |
>
|
919 |
+
Device Selection
|
920 |
+
</button>
|
921 |
+
<button
|
922 |
+
className={`${
|
923 |
+
deviceSelectionTab ? 'calculator-input-tab' : 'calculator-input-tab-active'
|
924 |
+
}`}
|
925 |
+
onClick={() => {
|
926 |
+
setDeviceSelectionTab(false)
|
927 |
+
setDeviceMemory(null)
|
928 |
+
}}
|
929 |
+
>
|
930 |
+
Custom Device
|
931 |
+
</button>
|
932 |
+
</div>
|
933 |
+
<div>
|
934 |
+
{deviceSelectionTab ? (
|
935 |
+
<>
|
936 |
+
<label htmlFor='device'>Select a Device</label>
|
937 |
+
<select
|
938 |
+
id='device'
|
939 |
+
className='calculator-select'
|
940 |
+
onChange={(e) => setDeviceMemory(Number(e.target.value))}
|
941 |
+
>
|
942 |
+
<option value=''>None selected</option>
|
943 |
+
{DEVICES.map((device) => (
|
944 |
+
<option key={device.name} value={device.size}>
|
945 |
+
{device.name}
|
946 |
+
</option>
|
947 |
+
))}
|
948 |
+
</select>
|
949 |
+
</>
|
950 |
+
) : (
|
951 |
+
<>
|
952 |
+
<label htmlFor='deviceMemory'>Device RAM (in GB)</label>
|
953 |
+
<input
|
954 |
+
type='number'
|
955 |
+
id='deviceMemory'
|
956 |
+
className='calculator-input'
|
957 |
+
placeholder='e.g. 24'
|
958 |
+
value={deviceMemory || ''}
|
959 |
+
min={0}
|
960 |
+
onChange={(e) => setDeviceMemory(Number(e.target.value))}
|
961 |
+
/>
|
962 |
+
</>
|
963 |
+
)}
|
964 |
+
</div>
|
965 |
+
</div>
|
966 |
</div>
|
|
|
|
|
967 |
|
968 |
+
<div className='calculator-box'>
|
969 |
+
<div className='text-2xl ml-5 mb-4'>Backend Precision Table</div>
|
970 |
+
<div className='ml-5 mb-4'>
|
971 |
+
<BackendPrecisionTable />
|
972 |
+
</div>
|
973 |
+
<div className='ml-5'>
|
974 |
+
This table shows the precision used by each Takeoff backend for CPUs and GPUs, as well
|
975 |
+
as their accuracy preservation.
|
976 |
+
</div>
|
977 |
</div>
|
|
|
978 |
|
979 |
+
<div className='calculator-box'>
|
980 |
+
<div className='text-2xl ml-5 mb-4'>Input parameters</div>
|
981 |
+
<div className='ml-5 mb-4'>
|
982 |
+
<strong>Sequence Length</strong>: The combined length of input tokens and output
|
983 |
+
tokens. To restrict the maximum sequence length for inference on Takeoff, use the API
|
984 |
+
parameters <code>prompt_new_tokens</code> for input tokens and{' '}
|
985 |
+
<code>max_new_tokens</code> for output tokens when making a request.
|
986 |
+
</div>
|
987 |
+
<div className='ml-5'>
|
988 |
+
<strong>Batch Size</strong>: The number of sequences that can be processed in
|
989 |
+
parallel. To set a maximum batch size for inference on Takeoff, set the environment
|
990 |
+
variable <code>TAKEOFF_MAX_BATCH_SIZE</code> to your desired value.
|
991 |
+
</div>
|
992 |
</div>
|
993 |
</div>
|
994 |
|
995 |
+
{/* Prefill Chunking Settings */}
|
996 |
+
{isPrefillChunking && (
|
997 |
+
<div className='calculator-input-box mb-6'>
|
998 |
+
<div className='text-2xl calculator-input-title'>Prefill Chunking Settings</div>
|
999 |
+
<div className='calculator-input-content'>
|
1000 |
+
<label htmlFor='maxChunkSize'>Max Chunk Size</label>
|
1001 |
+
<input
|
1002 |
+
type='number'
|
1003 |
+
id='maxChunkSize'
|
1004 |
+
className='calculator-input mb-2'
|
1005 |
+
placeholder='e.g. 512'
|
1006 |
+
value={maxChunkSize || ''}
|
1007 |
+
min={1}
|
1008 |
+
onChange={(e) => setMaxChunkSize(Number(e.target.value))}
|
1009 |
+
/>
|
1010 |
+
<label htmlFor='intermediateSize'>Intermediate Size</label>
|
1011 |
+
<input
|
1012 |
+
type='number'
|
1013 |
+
id='intermediateSize'
|
1014 |
+
className='calculator-input'
|
1015 |
+
placeholder='e.g. 2048'
|
1016 |
+
value={intermediateSize || ''}
|
1017 |
+
min={1}
|
1018 |
+
onChange={(e) => setIntermediateSize(Number(e.target.value))}
|
1019 |
+
/>
|
1020 |
+
</div>
|
1021 |
+
</div>
|
1022 |
+
)}
|
1023 |
+
|
1024 |
+
{/* Charts Section */}
|
1025 |
+
{isPrefillChunking ? (
|
1026 |
+
<PrefillChunkingCalculator
|
1027 |
+
deviceMemory={deviceMemory!}
|
1028 |
+
modelParams={modelParams!}
|
1029 |
+
hiddenSize={hiddenSize!}
|
1030 |
+
numLayers={numLayers!}
|
1031 |
+
batchSize={batchSize}
|
1032 |
+
seqLength={seqLength}
|
1033 |
+
maxChunkSize={maxChunkSize}
|
1034 |
+
intermediateSize={intermediateSize}
|
1035 |
/>
|
1036 |
+
) : (
|
1037 |
+
hiddenSize &&
|
1038 |
+
numLayers &&
|
1039 |
+
deviceMemory &&
|
1040 |
+
modelParams && (
|
1041 |
+
<>
|
1042 |
+
{/* Model Footprint Chart */}
|
1043 |
+
<div className='chart mb-8'>
|
1044 |
+
<div className='text-2xl text-center mb-4'>Model Footprint</div>
|
1045 |
+
<div className='space-y-8'>
|
1046 |
+
<div className='chart-row'>
|
1047 |
+
<div className='chart-row-title'>FP32</div>
|
1048 |
+
<ModelSizeBarChart
|
1049 |
+
modelSize={calculateMemory(modelParams, 'fp32')}
|
1050 |
+
largestModelSize={deviceMemory || calculateMemory(modelParams, 'fp32')}
|
1051 |
+
modelPrecision='fp32'
|
1052 |
+
deviceMemorySet={deviceMemory !== null && deviceMemory > 0}
|
1053 |
+
/>
|
1054 |
+
<div className='chart-row-size ml-8'>
|
1055 |
+
{calculateMemory(modelParams, 'fp32')}{' '}
|
1056 |
+
{deviceMemory ? `/ ${deviceMemory} ` : null}GB
|
1057 |
+
</div>
|
1058 |
+
</div>
|
1059 |
+
|
1060 |
+
{/* FP16 */}
|
1061 |
+
<div className='chart-row'>
|
1062 |
+
<div className='chart-row-title'>FP16</div>
|
1063 |
+
<ModelSizeBarChart
|
1064 |
+
modelSize={calculateMemory(modelParams, 'fp16')}
|
1065 |
+
largestModelSize={deviceMemory || calculateMemory(modelParams, 'fp16')}
|
1066 |
+
modelPrecision='fp16'
|
1067 |
+
deviceMemorySet={deviceMemory !== null && deviceMemory > 0}
|
1068 |
+
/>
|
1069 |
+
<div className='chart-row-size ml-8'>
|
1070 |
+
{calculateMemory(modelParams, 'fp16')}{' '}
|
1071 |
+
{deviceMemory ? `/ ${deviceMemory} ` : null}GB
|
1072 |
+
</div>
|
1073 |
+
</div>
|
1074 |
+
|
1075 |
+
{/* INT8 */}
|
1076 |
+
<div className='chart-row'>
|
1077 |
+
<div className='chart-row-title'>INT8</div>
|
1078 |
+
<ModelSizeBarChart
|
1079 |
+
modelSize={calculateMemory(modelParams, 'int8')}
|
1080 |
+
largestModelSize={deviceMemory || calculateMemory(modelParams, 'int8')}
|
1081 |
+
modelPrecision='int8'
|
1082 |
+
deviceMemorySet={deviceMemory !== null && deviceMemory > 0}
|
1083 |
+
/>
|
1084 |
+
<div className='chart-row-size ml-8'>
|
1085 |
+
{calculateMemory(modelParams, 'int8')}{' '}
|
1086 |
+
{deviceMemory ? `/ ${deviceMemory} ` : null}GB
|
1087 |
+
</div>
|
1088 |
+
</div>
|
1089 |
+
|
1090 |
+
{/* INT4 */}
|
1091 |
+
<div className='chart-row'>
|
1092 |
+
<div className='chart-row-title'>INT4</div>
|
1093 |
+
<ModelSizeBarChart
|
1094 |
+
modelSize={calculateMemory(modelParams, 'int4')}
|
1095 |
+
largestModelSize={deviceMemory || calculateMemory(modelParams, 'int4')}
|
1096 |
+
modelPrecision='int4'
|
1097 |
+
deviceMemorySet={deviceMemory !== null && deviceMemory > 0}
|
1098 |
+
/>
|
1099 |
+
<div className='chart-row-size ml-8'>
|
1100 |
+
{calculateMemory(modelParams, 'int4')}{' '}
|
1101 |
+
{deviceMemory ? `/ ${deviceMemory} ` : null}GB
|
1102 |
+
</div>
|
1103 |
+
</div>
|
1104 |
</div>
|
1105 |
</div>
|
1106 |
|
1107 |
+
{/* Maximum Batch Size / Sequence Length Chart */}
|
1108 |
+
<div className='chart mb-8'>
|
1109 |
+
<div className='text-2xl text-center mb-4'>
|
1110 |
+
Maximum Batch Size / Sequence Length
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1111 |
</div>
|
1112 |
+
<div className='flex flex-row items-left'>
|
1113 |
+
<InferenceRuntimeLineChart
|
1114 |
+
availableMemory={{
|
1115 |
+
int4: deviceMemory - calculateMemory(modelParams, 'int4'),
|
1116 |
+
int8: deviceMemory - calculateMemory(modelParams, 'int8'),
|
1117 |
+
fp16: deviceMemory - calculateMemory(modelParams, 'fp16'),
|
1118 |
+
fp32: deviceMemory - calculateMemory(modelParams, 'fp32'),
|
1119 |
+
}}
|
1120 |
+
memoryPerInput={calculateMemoryPerInput(hiddenSize, numLayers)}
|
1121 |
+
/>
|
1122 |
+
<div className='chart-side-panel ml-4 pt-4'>
|
1123 |
+
<div className='mb-2'>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1124 |
Memory/token:{' '}
|
1125 |
{(calculateMemoryPerInput(hiddenSize, numLayers) * 1_000_000).toFixed(0)} KB
|
1126 |
</div>
|
|
|
1425 |
</>
|
1426 |
) : null}
|
1427 |
</div>
|
1428 |
+
</div>
|
1429 |
+
</div>
|
1430 |
</div>
|
1431 |
+
</>
|
1432 |
+
)
|
1433 |
+
)}
|
1434 |
+
</div>
|
1435 |
+
</div>
|
|
|
|
|
|
|
|
|
|
|
1436 |
)
|
1437 |
}
|
1438 |
|