Spaces:
Sleeping
Sleeping
Update
Browse files
app.py
CHANGED
@@ -13,25 +13,16 @@ from llmcompressor.transformers import oneshot, wrap_hf_model_class
|
|
13 |
class CommitInfo:
|
14 |
repo_url: str
|
15 |
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
from transformers import AutoModelForCausalLM
|
24 |
-
return AutoModelForCausalLM
|
25 |
-
|
26 |
-
exec(f"from transformers import {class_name}")
|
27 |
-
return eval(class_name)
|
28 |
-
except Exception as e:
|
29 |
-
raise ValueError(f"Failed to import model class {class_name}: {str(e)}")
|
30 |
|
31 |
def parse_ignore_list(ignore_str: str) -> List[str]:
|
32 |
"""Parse comma-separated ignore list string into list"""
|
33 |
-
if not ignore_str:
|
34 |
-
return ["lm_head"] # Default ignore list
|
35 |
return [item.strip() for item in ignore_str.split(',') if item.strip()]
|
36 |
|
37 |
def create_quantized_model(
|
@@ -46,7 +37,8 @@ def create_quantized_model(
|
|
46 |
errors = []
|
47 |
try:
|
48 |
# Get the appropriate model class
|
49 |
-
|
|
|
50 |
wrapped_model_class = wrap_hf_model_class(model_class)
|
51 |
|
52 |
# Load model with ZeroGPU
|
@@ -257,17 +249,16 @@ The steps are:
|
|
257 |
2. Enter the model ID you want to quantize
|
258 |
3. (Optional) Customize ignored layers and model class
|
259 |
4. Click "Submit"
|
260 |
-
5. You'll get a link to your new quantized model repository! 🚀
|
261 |
|
262 |
## Advanced Options:
|
263 |
- **Ignore List**: Comma-separated list of layer patterns to ignore during quantization. Examples:
|
264 |
- Llama: `lm_head`
|
265 |
- Phi3v: `re:.*lm_head,re:model.vision_embed_tokens.*`
|
266 |
-
- Pixtral: `re:.*lm_head,re:multi_modal_projector.*`
|
267 |
- Llama Vision: `re:.*lm_head,re:multi_modal_projector.*,re:vision_model.*`
|
268 |
- **Model Class**: Specific model class from transformers (default: AutoModelForCausalLM). Examples:
|
|
|
269 |
- `MllamaForConditionalGeneration`
|
270 |
-
- `Qwen2VLForConditionalGeneration`
|
271 |
- `LlavaForConditionalGeneration`
|
272 |
|
273 |
Note:
|
@@ -299,8 +290,8 @@ with gr.Blocks(title=title) as demo:
|
|
299 |
ignore_str = gr.Text(
|
300 |
max_lines=1,
|
301 |
label="ignore_list (comma-separated)",
|
302 |
-
placeholder="lm_head,re:vision_model.*",
|
303 |
-
value="lm_head"
|
304 |
)
|
305 |
model_class_name = gr.Text(
|
306 |
max_lines=1,
|
|
|
13 |
class CommitInfo:
|
14 |
repo_url: str
|
15 |
|
16 |
+
# def get_model_class(class_name: str):
|
17 |
+
# """Dynamically import and return the specified model class from transformers"""
|
18 |
+
# try:
|
19 |
+
# exec(f"from transformers import {class_name}")
|
20 |
+
# return eval(class_name)
|
21 |
+
# except Exception as e:
|
22 |
+
# raise ValueError(f"Failed to import model class {class_name}: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
def parse_ignore_list(ignore_str: str) -> List[str]:
|
25 |
"""Parse comma-separated ignore list string into list"""
|
|
|
|
|
26 |
return [item.strip() for item in ignore_str.split(',') if item.strip()]
|
27 |
|
28 |
def create_quantized_model(
|
|
|
37 |
errors = []
|
38 |
try:
|
39 |
# Get the appropriate model class
|
40 |
+
exec(f"from transformers import {class_name}")
|
41 |
+
model_class = eval(class_name)
|
42 |
wrapped_model_class = wrap_hf_model_class(model_class)
|
43 |
|
44 |
# Load model with ZeroGPU
|
|
|
249 |
2. Enter the model ID you want to quantize
|
250 |
3. (Optional) Customize ignored layers and model class
|
251 |
4. Click "Submit"
|
252 |
+
5. You'll get a link to your new quantized model repository on your profile! 🚀
|
253 |
|
254 |
## Advanced Options:
|
255 |
- **Ignore List**: Comma-separated list of layer patterns to ignore during quantization. Examples:
|
256 |
- Llama: `lm_head`
|
257 |
- Phi3v: `re:.*lm_head,re:model.vision_embed_tokens.*`
|
|
|
258 |
- Llama Vision: `re:.*lm_head,re:multi_modal_projector.*,re:vision_model.*`
|
259 |
- **Model Class**: Specific model class from transformers (default: AutoModelForCausalLM). Examples:
|
260 |
+
- `AutoModelForCausalLM`
|
261 |
- `MllamaForConditionalGeneration`
|
|
|
262 |
- `LlavaForConditionalGeneration`
|
263 |
|
264 |
Note:
|
|
|
290 |
ignore_str = gr.Text(
|
291 |
max_lines=1,
|
292 |
label="ignore_list (comma-separated)",
|
293 |
+
placeholder="re:.*lm_head,re:vision_model.*",
|
294 |
+
value="re:.*lm_head"
|
295 |
)
|
296 |
model_class_name = gr.Text(
|
297 |
max_lines=1,
|