Spaces:
Running
Running
Remove default model selection on startup
#18
by
junhyeokk
- opened
- src/app.py +47 -33
src/app.py
CHANGED
@@ -35,23 +35,24 @@ model_list = [
|
|
35 |
"tiiuae/falcon-180B",
|
36 |
"tiiuae/falcon-180B-Chat",
|
37 |
]
|
|
|
38 |
st.title("Can you run it? LLM version")
|
39 |
|
40 |
percentage_width_main = 80
|
41 |
st.markdown(
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
|
50 |
@st.cache_resource()
|
51 |
def cache_model_list():
|
52 |
model_list_info = {}
|
53 |
for model_name in model_list:
|
54 |
-
if not "tiiuae/falcon" in model_name:
|
55 |
model = get_model(model_name, library="transformers", access_token="")
|
56 |
model_list_info[model_name] = calculate_memory(model, ["float32", "float16/bfloat16", "int8", "int4"])
|
57 |
del model
|
@@ -85,7 +86,7 @@ def show_gpu_info(info, trainable_params=0, vendor=""):
|
|
85 |
if var == 'LoRa Fine-tuning':
|
86 |
msg += f" ({trainable_params}%)"
|
87 |
else:
|
88 |
-
if _info['Number of GPUs']==1:
|
89 |
msg = f"You can run **{var}**"
|
90 |
func = st.success
|
91 |
icon = "✅"
|
@@ -95,7 +96,6 @@ def show_gpu_info(info, trainable_params=0, vendor=""):
|
|
95 |
icon = "⛔"
|
96 |
func(msg, icon=icon)
|
97 |
|
98 |
-
|
99 |
def get_name(index):
|
100 |
row = gpu_specs.iloc[index]
|
101 |
return f"{row['Product Name']} ({row['RAM (GB)']} GB, {row['Year']})"
|
@@ -106,16 +106,20 @@ def custom_ceil(a, precision=0):
|
|
106 |
gpu_specs = get_gpu_specs()
|
107 |
model_list_info = cache_model_list()
|
108 |
|
109 |
-
_, col, _ = st.columns([1,3,1])
|
110 |
with col.expander("Information", expanded=True):
|
111 |
-
st.markdown(
|
|
|
112 |
- Mainly based on [Model Memory Calculator by hf-accelerate](https://huggingface.co/spaces/hf-accelerate/model-memory-usage)
|
113 |
using `transformers` library
|
114 |
- Inference is calculated following [EleutherAI Transformer Math 101](https://blog.eleuther.ai/transformer-math/),
|
115 |
-
where is estimated as """
|
|
|
116 |
|
117 |
st.latex(r"""\text{Memory}_\text{Inference} \approx \text{Model Size} \times 1.2""")
|
118 |
-
st.markdown(
|
|
|
|
|
119 |
st.latex(r"\text{Memory}_\text{LoRa} \approx \left(\text{Model Size} + \text{ \# trainable Params}_\text{Billions}\times\frac{16}{8} \times 4\right) \times 1.2")
|
120 |
|
121 |
access_token = st.sidebar.text_input("Access token")
|
@@ -123,9 +127,9 @@ access_token = st.sidebar.text_input("Access token")
|
|
123 |
if access_token:
|
124 |
login(token=access_token)
|
125 |
|
126 |
-
#model_name = st.sidebar.text_input("Model name", value="mistralai/Mistral-7B-v0.1")
|
127 |
with st.sidebar.container():
|
128 |
-
model_name
|
|
|
129 |
if not model_name:
|
130 |
st.info("Please enter a model name")
|
131 |
st.stop()
|
@@ -145,7 +149,6 @@ if model_name not in st.session_state:
|
|
145 |
gc.collect()
|
146 |
st.session_state['actual_model'] = model_name
|
147 |
|
148 |
-
|
149 |
gpu_vendor = st.sidebar.selectbox("GPU Vendor", ["NVIDIA", "AMD", "Intel", "Apple"])
|
150 |
# year = st.sidebar.selectbox("Filter by Release Year", list(range(2014, 2024))[::-1], index=None)
|
151 |
gpu_info = gpu_specs[gpu_specs['Vendor'] == gpu_vendor].sort_values('Product Name')
|
@@ -159,7 +162,7 @@ gpu_info = gpu_info[gpu_info["RAM (GB)"].between(ram[0], ram[1])]
|
|
159 |
if len(gpu_info) == 0:
|
160 |
st.sidebar.error(f"**{gpu_vendor}** has no GPU in that RAM range")
|
161 |
st.stop()
|
162 |
-
gpu = st.sidebar.selectbox("GPU", gpu_info['Product Name'].index.tolist(), format_func=lambda x
|
163 |
gpu_spec = gpu_specs.iloc[gpu]
|
164 |
gpu_spec.name = 'INFO'
|
165 |
|
@@ -169,8 +172,8 @@ st.sidebar.dataframe(gpu_spec.T.astype(str))
|
|
169 |
|
170 |
memory_table = pd.DataFrame(st.session_state[model_name]).set_index('dtype')
|
171 |
memory_table['LoRA Fine-Tuning (GB)'] = (memory_table["Total Size (GB)"] +
|
172 |
-
(memory_table["Parameters (Billion)"]* lora_pct/100 * (16/8)*4)) * 1.2
|
173 |
-
|
174 |
_memory_table = memory_table.copy()
|
175 |
memory_table = memory_table.round(2).T
|
176 |
_memory_table /= gpu_spec['RAM (GB)']
|
@@ -178,13 +181,16 @@ _memory_table = _memory_table.apply(np.ceil).astype(int).drop(columns=['Paramete
|
|
178 |
_memory_table.columns = ['Inference', 'Full Training Adam', 'LoRa Fine-tuning']
|
179 |
_memory_table = _memory_table.stack().reset_index()
|
180 |
_memory_table.columns = ['dtype', 'Variable', 'Number of GPUs']
|
181 |
-
col1, col2 = st.columns([1,1.3])
|
182 |
|
183 |
if gpu_vendor == "Apple":
|
184 |
-
col.warning(
|
185 |
-
|
|
|
|
|
|
|
186 |
with col1:
|
187 |
-
st.write(f"#### [{model_name}](https://huggingface.co/{model_name}) ({custom_ceil(memory_table.iloc[3,0],1):.1f}B)")
|
188 |
|
189 |
dtypes = memory_table.columns.tolist()[::-1]
|
190 |
tabs = st.tabs(dtypes)
|
@@ -201,12 +207,20 @@ with col2:
|
|
201 |
if gpu_vendor == "Apple":
|
202 |
st.warning("This graph is irrelevant for M1/M2 chips as they can't run in parallel.", icon="⚠️")
|
203 |
extra = "⚠️"
|
204 |
-
num_colors= 4
|
205 |
-
colors = [px.colors.sequential.RdBu[int(i*(len(px.colors.sequential.RdBu)-1)/(num_colors-1))] for i in range(num_colors)]
|
206 |
-
fig = px.bar(
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
"tiiuae/falcon-180B",
|
36 |
"tiiuae/falcon-180B-Chat",
|
37 |
]
|
38 |
+
|
39 |
st.title("Can you run it? LLM version")
|
40 |
|
41 |
percentage_width_main = 80
|
42 |
st.markdown(
|
43 |
+
f"""<style>
|
44 |
+
.appview-container .main .block-container{{
|
45 |
+
max-width: {percentage_width_main}%;}}
|
46 |
+
</style>
|
47 |
+
""",
|
48 |
+
unsafe_allow_html=True,
|
49 |
+
)
|
50 |
|
51 |
@st.cache_resource()
|
52 |
def cache_model_list():
|
53 |
model_list_info = {}
|
54 |
for model_name in model_list:
|
55 |
+
if not "tiiuae/falcon" in model_name: # Exclude Falcon models
|
56 |
model = get_model(model_name, library="transformers", access_token="")
|
57 |
model_list_info[model_name] = calculate_memory(model, ["float32", "float16/bfloat16", "int8", "int4"])
|
58 |
del model
|
|
|
86 |
if var == 'LoRa Fine-tuning':
|
87 |
msg += f" ({trainable_params}%)"
|
88 |
else:
|
89 |
+
if _info['Number of GPUs'] == 1:
|
90 |
msg = f"You can run **{var}**"
|
91 |
func = st.success
|
92 |
icon = "✅"
|
|
|
96 |
icon = "⛔"
|
97 |
func(msg, icon=icon)
|
98 |
|
|
|
99 |
def get_name(index):
|
100 |
row = gpu_specs.iloc[index]
|
101 |
return f"{row['Product Name']} ({row['RAM (GB)']} GB, {row['Year']})"
|
|
|
106 |
gpu_specs = get_gpu_specs()
|
107 |
model_list_info = cache_model_list()
|
108 |
|
109 |
+
_, col, _ = st.columns([1, 3, 1])
|
110 |
with col.expander("Information", expanded=True):
|
111 |
+
st.markdown(
|
112 |
+
"""- GPU information comes from [TechPowerUp GPU Specs](https://www.techpowerup.com/gpu-specs/)
|
113 |
- Mainly based on [Model Memory Calculator by hf-accelerate](https://huggingface.co/spaces/hf-accelerate/model-memory-usage)
|
114 |
using `transformers` library
|
115 |
- Inference is calculated following [EleutherAI Transformer Math 101](https://blog.eleuther.ai/transformer-math/),
|
116 |
+
where is estimated as """
|
117 |
+
)
|
118 |
|
119 |
st.latex(r"""\text{Memory}_\text{Inference} \approx \text{Model Size} \times 1.2""")
|
120 |
+
st.markdown(
|
121 |
+
"""- For LoRa Fine-tuning, I'm asuming a **16-bit** dtype of trainable parameters. The formula (in terms of GB) is"""
|
122 |
+
)
|
123 |
st.latex(r"\text{Memory}_\text{LoRa} \approx \left(\text{Model Size} + \text{ \# trainable Params}_\text{Billions}\times\frac{16}{8} \times 4\right) \times 1.2")
|
124 |
|
125 |
access_token = st.sidebar.text_input("Access token")
|
|
|
127 |
if access_token:
|
128 |
login(token=access_token)
|
129 |
|
|
|
130 |
with st.sidebar.container():
|
131 |
+
model_name = stDatalist("Model name (Press Enter to apply)", model_list)
|
132 |
+
|
133 |
if not model_name:
|
134 |
st.info("Please enter a model name")
|
135 |
st.stop()
|
|
|
149 |
gc.collect()
|
150 |
st.session_state['actual_model'] = model_name
|
151 |
|
|
|
152 |
gpu_vendor = st.sidebar.selectbox("GPU Vendor", ["NVIDIA", "AMD", "Intel", "Apple"])
|
153 |
# year = st.sidebar.selectbox("Filter by Release Year", list(range(2014, 2024))[::-1], index=None)
|
154 |
gpu_info = gpu_specs[gpu_specs['Vendor'] == gpu_vendor].sort_values('Product Name')
|
|
|
162 |
if len(gpu_info) == 0:
|
163 |
st.sidebar.error(f"**{gpu_vendor}** has no GPU in that RAM range")
|
164 |
st.stop()
|
165 |
+
gpu = st.sidebar.selectbox("GPU", gpu_info['Product Name'].index.tolist(), format_func=lambda x: gpu_specs.iloc[x]['Product Name'])
|
166 |
gpu_spec = gpu_specs.iloc[gpu]
|
167 |
gpu_spec.name = 'INFO'
|
168 |
|
|
|
172 |
|
173 |
memory_table = pd.DataFrame(st.session_state[model_name]).set_index('dtype')
|
174 |
memory_table['LoRA Fine-Tuning (GB)'] = (memory_table["Total Size (GB)"] +
|
175 |
+
(memory_table["Parameters (Billion)"] * lora_pct/100 * (16/8) * 4)) * 1.2
|
176 |
+
|
177 |
_memory_table = memory_table.copy()
|
178 |
memory_table = memory_table.round(2).T
|
179 |
_memory_table /= gpu_spec['RAM (GB)']
|
|
|
181 |
_memory_table.columns = ['Inference', 'Full Training Adam', 'LoRa Fine-tuning']
|
182 |
_memory_table = _memory_table.stack().reset_index()
|
183 |
_memory_table.columns = ['dtype', 'Variable', 'Number of GPUs']
|
184 |
+
col1, col2 = st.columns([1, 1.3])
|
185 |
|
186 |
if gpu_vendor == "Apple":
|
187 |
+
col.warning(
|
188 |
+
"""For M1/M2/M3 Apple chips, PyTorch uses [Metal Performance Shaders (MPS)](https://huggingface.co/docs/accelerate/usage_guides/mps) as backend.
|
189 |
+
Remember that Apple M1/M2/M3 chips share memory between CPU and GPU.""",
|
190 |
+
icon="⚠️",
|
191 |
+
)
|
192 |
with col1:
|
193 |
+
st.write(f"#### [{model_name}](https://huggingface.co/{model_name}) ({custom_ceil(memory_table.iloc[3, 0], 1):.1f}B)")
|
194 |
|
195 |
dtypes = memory_table.columns.tolist()[::-1]
|
196 |
tabs = st.tabs(dtypes)
|
|
|
207 |
if gpu_vendor == "Apple":
|
208 |
st.warning("This graph is irrelevant for M1/M2 chips as they can't run in parallel.", icon="⚠️")
|
209 |
extra = "⚠️"
|
210 |
+
num_colors = 4
|
211 |
+
colors = [px.colors.sequential.RdBu[int(i * (len(px.colors.sequential.RdBu) - 1) / (num_colors - 1))] for i in range(num_colors)]
|
212 |
+
fig = px.bar(
|
213 |
+
_memory_table,
|
214 |
+
x='Variable',
|
215 |
+
y='Number of GPUs',
|
216 |
+
color='dtype',
|
217 |
+
barmode='group',
|
218 |
+
color_discrete_sequence=colors,
|
219 |
+
)
|
220 |
+
fig.update_layout(
|
221 |
+
title=dict(text=f"{extra} Number of GPUs required for<br> {get_name(gpu)}", font=dict(size=25)),
|
222 |
+
xaxis_tickfont_size=14,
|
223 |
+
yaxis_tickfont_size=16,
|
224 |
+
yaxis_dtick='1'
|
225 |
+
)
|
226 |
+
st.plotly_chart(fig, use_container_width=True)
|