Remove default model selection on startup

#18
Files changed (1) hide show
  1. src/app.py +47 -33
src/app.py CHANGED
@@ -35,23 +35,24 @@ model_list = [
35
  "tiiuae/falcon-180B",
36
  "tiiuae/falcon-180B-Chat",
37
  ]
 
38
  st.title("Can you run it? LLM version")
39
 
40
  percentage_width_main = 80
41
  st.markdown(
42
- f"""<style>
43
- .appview-container .main .block-container{{
44
- max-width: {percentage_width_main}%;}}
45
- </style>
46
- """,
47
- unsafe_allow_html=True,
48
- )
49
 
50
  @st.cache_resource()
51
  def cache_model_list():
52
  model_list_info = {}
53
  for model_name in model_list:
54
- if not "tiiuae/falcon" in model_name: # Exclude Falcon models
55
  model = get_model(model_name, library="transformers", access_token="")
56
  model_list_info[model_name] = calculate_memory(model, ["float32", "float16/bfloat16", "int8", "int4"])
57
  del model
@@ -85,7 +86,7 @@ def show_gpu_info(info, trainable_params=0, vendor=""):
85
  if var == 'LoRa Fine-tuning':
86
  msg += f" ({trainable_params}%)"
87
  else:
88
- if _info['Number of GPUs']==1:
89
  msg = f"You can run **{var}**"
90
  func = st.success
91
  icon = "✅"
@@ -95,7 +96,6 @@ def show_gpu_info(info, trainable_params=0, vendor=""):
95
  icon = "⛔"
96
  func(msg, icon=icon)
97
 
98
-
99
  def get_name(index):
100
  row = gpu_specs.iloc[index]
101
  return f"{row['Product Name']} ({row['RAM (GB)']} GB, {row['Year']})"
@@ -106,16 +106,20 @@ def custom_ceil(a, precision=0):
106
  gpu_specs = get_gpu_specs()
107
  model_list_info = cache_model_list()
108
 
109
- _, col, _ = st.columns([1,3,1])
110
  with col.expander("Information", expanded=True):
111
- st.markdown("""- GPU information comes from [TechPowerUp GPU Specs](https://www.techpowerup.com/gpu-specs/)
 
112
  - Mainly based on [Model Memory Calculator by hf-accelerate](https://huggingface.co/spaces/hf-accelerate/model-memory-usage)
113
  using `transformers` library
114
  - Inference is calculated following [EleutherAI Transformer Math 101](https://blog.eleuther.ai/transformer-math/),
115
- where is estimated as """)
 
116
 
117
  st.latex(r"""\text{Memory}_\text{Inference} \approx \text{Model Size} \times 1.2""")
118
- st.markdown("""- For LoRa Fine-tuning, I'm asuming a **16-bit** dtype of trainable parameters. The formula (in terms of GB) is""")
 
 
119
  st.latex(r"\text{Memory}_\text{LoRa} \approx \left(\text{Model Size} + \text{ \# trainable Params}_\text{Billions}\times\frac{16}{8} \times 4\right) \times 1.2")
120
 
121
  access_token = st.sidebar.text_input("Access token")
@@ -123,9 +127,9 @@ access_token = st.sidebar.text_input("Access token")
123
  if access_token:
124
  login(token=access_token)
125
 
126
- #model_name = st.sidebar.text_input("Model name", value="mistralai/Mistral-7B-v0.1")
127
  with st.sidebar.container():
128
- model_name = stDatalist("Model name (Press Enter to apply)", model_list, index=0)
 
129
  if not model_name:
130
  st.info("Please enter a model name")
131
  st.stop()
@@ -145,7 +149,6 @@ if model_name not in st.session_state:
145
  gc.collect()
146
  st.session_state['actual_model'] = model_name
147
 
148
-
149
  gpu_vendor = st.sidebar.selectbox("GPU Vendor", ["NVIDIA", "AMD", "Intel", "Apple"])
150
  # year = st.sidebar.selectbox("Filter by Release Year", list(range(2014, 2024))[::-1], index=None)
151
  gpu_info = gpu_specs[gpu_specs['Vendor'] == gpu_vendor].sort_values('Product Name')
@@ -159,7 +162,7 @@ gpu_info = gpu_info[gpu_info["RAM (GB)"].between(ram[0], ram[1])]
159
  if len(gpu_info) == 0:
160
  st.sidebar.error(f"**{gpu_vendor}** has no GPU in that RAM range")
161
  st.stop()
162
- gpu = st.sidebar.selectbox("GPU", gpu_info['Product Name'].index.tolist(), format_func=lambda x : gpu_specs.iloc[x]['Product Name'])
163
  gpu_spec = gpu_specs.iloc[gpu]
164
  gpu_spec.name = 'INFO'
165
 
@@ -169,8 +172,8 @@ st.sidebar.dataframe(gpu_spec.T.astype(str))
169
 
170
  memory_table = pd.DataFrame(st.session_state[model_name]).set_index('dtype')
171
  memory_table['LoRA Fine-Tuning (GB)'] = (memory_table["Total Size (GB)"] +
172
- (memory_table["Parameters (Billion)"]* lora_pct/100 * (16/8)*4)) * 1.2
173
-
174
  _memory_table = memory_table.copy()
175
  memory_table = memory_table.round(2).T
176
  _memory_table /= gpu_spec['RAM (GB)']
@@ -178,13 +181,16 @@ _memory_table = _memory_table.apply(np.ceil).astype(int).drop(columns=['Paramete
178
  _memory_table.columns = ['Inference', 'Full Training Adam', 'LoRa Fine-tuning']
179
  _memory_table = _memory_table.stack().reset_index()
180
  _memory_table.columns = ['dtype', 'Variable', 'Number of GPUs']
181
- col1, col2 = st.columns([1,1.3])
182
 
183
  if gpu_vendor == "Apple":
184
- col.warning("""For M1/M2/M3 Apple chips, PyTorch uses [Metal Performance Shaders (MPS)](https://huggingface.co/docs/accelerate/usage_guides/mps) as backend.\\
185
- Remember that Apple M1/M2/M3 chips share memory between CPU and GPU.""", icon="⚠️")
 
 
 
186
  with col1:
187
- st.write(f"#### [{model_name}](https://huggingface.co/{model_name}) ({custom_ceil(memory_table.iloc[3,0],1):.1f}B)")
188
 
189
  dtypes = memory_table.columns.tolist()[::-1]
190
  tabs = st.tabs(dtypes)
@@ -201,12 +207,20 @@ with col2:
201
  if gpu_vendor == "Apple":
202
  st.warning("This graph is irrelevant for M1/M2 chips as they can't run in parallel.", icon="⚠️")
203
  extra = "⚠️"
204
- num_colors= 4
205
- colors = [px.colors.sequential.RdBu[int(i*(len(px.colors.sequential.RdBu)-1)/(num_colors-1))] for i in range(num_colors)]
206
- fig = px.bar(_memory_table, x='Variable', y='Number of GPUs', color='dtype', barmode='group', color_discrete_sequence=colors)
207
- fig.update_layout(title=dict(text=f"{extra} Number of GPUs required for<br> {get_name(gpu)}", font=dict(size=25))
208
- , xaxis_tickfont_size=14, yaxis_tickfont_size=16, yaxis_dtick='1')
209
- st.plotly_chart(fig, use_container_width=True)
210
-
211
-
212
-
 
 
 
 
 
 
 
 
 
35
  "tiiuae/falcon-180B",
36
  "tiiuae/falcon-180B-Chat",
37
  ]
38
+
39
  st.title("Can you run it? LLM version")
40
 
41
  percentage_width_main = 80
42
  st.markdown(
43
+ f"""<style>
44
+ .appview-container .main .block-container{{
45
+ max-width: {percentage_width_main}%;}}
46
+ </style>
47
+ """,
48
+ unsafe_allow_html=True,
49
+ )
50
 
51
  @st.cache_resource()
52
  def cache_model_list():
53
  model_list_info = {}
54
  for model_name in model_list:
55
+ if not "tiiuae/falcon" in model_name: # Exclude Falcon models
56
  model = get_model(model_name, library="transformers", access_token="")
57
  model_list_info[model_name] = calculate_memory(model, ["float32", "float16/bfloat16", "int8", "int4"])
58
  del model
 
86
  if var == 'LoRa Fine-tuning':
87
  msg += f" ({trainable_params}%)"
88
  else:
89
+ if _info['Number of GPUs'] == 1:
90
  msg = f"You can run **{var}**"
91
  func = st.success
92
  icon = "✅"
 
96
  icon = "⛔"
97
  func(msg, icon=icon)
98
 
 
99
  def get_name(index):
100
  row = gpu_specs.iloc[index]
101
  return f"{row['Product Name']} ({row['RAM (GB)']} GB, {row['Year']})"
 
106
  gpu_specs = get_gpu_specs()
107
  model_list_info = cache_model_list()
108
 
109
+ _, col, _ = st.columns([1, 3, 1])
110
  with col.expander("Information", expanded=True):
111
+ st.markdown(
112
+ """- GPU information comes from [TechPowerUp GPU Specs](https://www.techpowerup.com/gpu-specs/)
113
  - Mainly based on [Model Memory Calculator by hf-accelerate](https://huggingface.co/spaces/hf-accelerate/model-memory-usage)
114
  using `transformers` library
115
  - Inference is calculated following [EleutherAI Transformer Math 101](https://blog.eleuther.ai/transformer-math/),
116
+ where is estimated as """
117
+ )
118
 
119
  st.latex(r"""\text{Memory}_\text{Inference} \approx \text{Model Size} \times 1.2""")
120
+ st.markdown(
121
+ """- For LoRa Fine-tuning, I'm asuming a **16-bit** dtype of trainable parameters. The formula (in terms of GB) is"""
122
+ )
123
  st.latex(r"\text{Memory}_\text{LoRa} \approx \left(\text{Model Size} + \text{ \# trainable Params}_\text{Billions}\times\frac{16}{8} \times 4\right) \times 1.2")
124
 
125
  access_token = st.sidebar.text_input("Access token")
 
127
  if access_token:
128
  login(token=access_token)
129
 
 
130
  with st.sidebar.container():
131
+ model_name = stDatalist("Model name (Press Enter to apply)", model_list)
132
+
133
  if not model_name:
134
  st.info("Please enter a model name")
135
  st.stop()
 
149
  gc.collect()
150
  st.session_state['actual_model'] = model_name
151
 
 
152
  gpu_vendor = st.sidebar.selectbox("GPU Vendor", ["NVIDIA", "AMD", "Intel", "Apple"])
153
  # year = st.sidebar.selectbox("Filter by Release Year", list(range(2014, 2024))[::-1], index=None)
154
  gpu_info = gpu_specs[gpu_specs['Vendor'] == gpu_vendor].sort_values('Product Name')
 
162
  if len(gpu_info) == 0:
163
  st.sidebar.error(f"**{gpu_vendor}** has no GPU in that RAM range")
164
  st.stop()
165
+ gpu = st.sidebar.selectbox("GPU", gpu_info['Product Name'].index.tolist(), format_func=lambda x: gpu_specs.iloc[x]['Product Name'])
166
  gpu_spec = gpu_specs.iloc[gpu]
167
  gpu_spec.name = 'INFO'
168
 
 
172
 
173
  memory_table = pd.DataFrame(st.session_state[model_name]).set_index('dtype')
174
  memory_table['LoRA Fine-Tuning (GB)'] = (memory_table["Total Size (GB)"] +
175
+ (memory_table["Parameters (Billion)"] * lora_pct/100 * (16/8) * 4)) * 1.2
176
+
177
  _memory_table = memory_table.copy()
178
  memory_table = memory_table.round(2).T
179
  _memory_table /= gpu_spec['RAM (GB)']
 
181
  _memory_table.columns = ['Inference', 'Full Training Adam', 'LoRa Fine-tuning']
182
  _memory_table = _memory_table.stack().reset_index()
183
  _memory_table.columns = ['dtype', 'Variable', 'Number of GPUs']
184
+ col1, col2 = st.columns([1, 1.3])
185
 
186
  if gpu_vendor == "Apple":
187
+ col.warning(
188
+ """For M1/M2/M3 Apple chips, PyTorch uses [Metal Performance Shaders (MPS)](https://huggingface.co/docs/accelerate/usage_guides/mps) as backend.
189
+ Remember that Apple M1/M2/M3 chips share memory between CPU and GPU.""",
190
+ icon="⚠️",
191
+ )
192
  with col1:
193
+ st.write(f"#### [{model_name}](https://huggingface.co/{model_name}) ({custom_ceil(memory_table.iloc[3, 0], 1):.1f}B)")
194
 
195
  dtypes = memory_table.columns.tolist()[::-1]
196
  tabs = st.tabs(dtypes)
 
207
  if gpu_vendor == "Apple":
208
  st.warning("This graph is irrelevant for M1/M2 chips as they can't run in parallel.", icon="⚠️")
209
  extra = "⚠️"
210
+ num_colors = 4
211
+ colors = [px.colors.sequential.RdBu[int(i * (len(px.colors.sequential.RdBu) - 1) / (num_colors - 1))] for i in range(num_colors)]
212
+ fig = px.bar(
213
+ _memory_table,
214
+ x='Variable',
215
+ y='Number of GPUs',
216
+ color='dtype',
217
+ barmode='group',
218
+ color_discrete_sequence=colors,
219
+ )
220
+ fig.update_layout(
221
+ title=dict(text=f"{extra} Number of GPUs required for<br> {get_name(gpu)}", font=dict(size=25)),
222
+ xaxis_tickfont_size=14,
223
+ yaxis_tickfont_size=16,
224
+ yaxis_dtick='1'
225
+ )
226
+ st.plotly_chart(fig, use_container_width=True)