Upload folder using huggingface_hub

#4
by sharpenb - opened
Files changed (3) hide show
  1. README.md +4 -4
  2. config.json +3 -23
  3. smash_config.json +10 -25
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  thumbnail: "https://assets-global.website-files.com/646b351987a8d8ce158d1940/64ec9e96b4334c0e1ac41504_Logo%20with%20white%20text.svg"
3
- base_model: HuggingFaceTB/SmolLM2-1.7B-Instruct
4
  metrics:
5
  - memory_disk
6
  - memory_inference
@@ -52,7 +52,7 @@ tags:
52
 
53
  You can run the smashed model with these steps:
54
 
55
- 0. Check requirements from the original repo HuggingFaceTB/SmolLM2-1.7B-Instruct installed. In particular, check python, cuda, and transformers versions.
56
  1. Make sure that you have installed quantization related packages.
57
  ```bash
58
  pip install hqq
@@ -67,7 +67,7 @@ You can run the smashed model with these steps:
67
  model = HQQModelForCausalLM.from_quantized("PrunaAI/HuggingFaceTB-SmolLM2-1.7B-Instruct-HQQ-8bit-smashed", device_map='auto')
68
  except:
69
  model = AutoHQQHFModel.from_quantized("PrunaAI/HuggingFaceTB-SmolLM2-1.7B-Instruct-HQQ-8bit-smashed")
70
- tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM2-1.7B-Instruct")
71
 
72
  input_ids = tokenizer("What is the color of prunes?,", return_tensors='pt').to(model.device)["input_ids"]
73
 
@@ -81,7 +81,7 @@ The configuration info are in `smash_config.json`.
81
 
82
  ## Credits & License
83
 
84
- The license of the smashed model follows the license of the original model. Please check the license of the original model HuggingFaceTB/SmolLM2-1.7B-Instruct before using this model which provided the base model. The license of the `pruna-engine` is [here](https://pypi.org/project/pruna-engine/) on Pypi.
85
 
86
  ## Want to compress other models?
87
 
 
1
  ---
2
  thumbnail: "https://assets-global.website-files.com/646b351987a8d8ce158d1940/64ec9e96b4334c0e1ac41504_Logo%20with%20white%20text.svg"
3
+ base_model: ORIGINAL_REPO_NAME
4
  metrics:
5
  - memory_disk
6
  - memory_inference
 
52
 
53
  You can run the smashed model with these steps:
54
 
55
+ 0. Check requirements from the original repo ORIGINAL_REPO_NAME installed. In particular, check python, cuda, and transformers versions.
56
  1. Make sure that you have installed quantization related packages.
57
  ```bash
58
  pip install hqq
 
67
  model = HQQModelForCausalLM.from_quantized("PrunaAI/HuggingFaceTB-SmolLM2-1.7B-Instruct-HQQ-8bit-smashed", device_map='auto')
68
  except:
69
  model = AutoHQQHFModel.from_quantized("PrunaAI/HuggingFaceTB-SmolLM2-1.7B-Instruct-HQQ-8bit-smashed")
70
+ tokenizer = AutoTokenizer.from_pretrained("ORIGINAL_REPO_NAME")
71
 
72
  input_ids = tokenizer("What is the color of prunes?,", return_tensors='pt').to(model.device)["input_ids"]
73
 
 
81
 
82
  ## Credits & License
83
 
84
+ The license of the smashed model follows the license of the original model. Please check the license of the original model ORIGINAL_REPO_NAME before using this model which provided the base model. The license of the `pruna-engine` is [here](https://pypi.org/project/pruna-engine/) on Pypi.
85
 
86
  ## Want to compress other models?
87
 
config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "_attn_implementation_autoset": true,
3
- "_name_or_path": "/home/ubuntu/.cache/pruna/tmp5u4v4d_o/tmpzr51qsp0",
4
  "architectures": [
5
  "LlamaForCausalLM"
6
  ],
@@ -21,38 +21,18 @@
21
  "num_key_value_heads": 32,
22
  "pad_token_id": 2,
23
  "pretraining_tp": 1,
24
- "quantization_config": {
25
- "quant_config": {
26
- "offload_meta": false,
27
- "scale_quant_params": null,
28
- "weight_quant_params": {
29
- "axis": 1,
30
- "channel_wise": true,
31
- "group_size": 64,
32
- "nbits": 8,
33
- "optimize": true,
34
- "round_zero": false,
35
- "view_as_float": false
36
- },
37
- "zero_quant_params": null
38
- },
39
- "quant_method": "hqq",
40
- "skip_modules": [
41
- "lm_head"
42
- ]
43
- },
44
  "rms_norm_eps": 1e-05,
45
  "rope_scaling": null,
46
  "rope_theta": 130000,
47
  "tie_word_embeddings": true,
48
- "torch_dtype": "float16",
49
  "transformers.js_config": {
50
  "kv_cache_dtype": {
51
  "fp16": "float16",
52
  "q4f16": "float16"
53
  }
54
  },
55
- "transformers_version": "4.46.3",
56
  "use_cache": true,
57
  "vocab_size": 49152
58
  }
 
1
  {
2
  "_attn_implementation_autoset": true,
3
+ "_name_or_path": "HuggingFaceTB/SmolLM2-1.7B-Instruct",
4
  "architectures": [
5
  "LlamaForCausalLM"
6
  ],
 
21
  "num_key_value_heads": 32,
22
  "pad_token_id": 2,
23
  "pretraining_tp": 1,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  "rms_norm_eps": 1e-05,
25
  "rope_scaling": null,
26
  "rope_theta": 130000,
27
  "tie_word_embeddings": true,
28
+ "torch_dtype": "bfloat16",
29
  "transformers.js_config": {
30
  "kv_cache_dtype": {
31
  "fp16": "float16",
32
  "q4f16": "float16"
33
  }
34
  },
35
+ "transformers_version": "4.48.2",
36
  "use_cache": true,
37
  "vocab_size": 49152
38
  }
smash_config.json CHANGED
@@ -1,34 +1,19 @@
1
  {
2
- "comp_cgenerate_active": false,
3
- "comp_ctranslate_active": false,
4
- "comp_cwhisper_active": false,
5
- "comp_diffusers2_active": false,
6
- "comp_flux_caching_active": false,
7
- "comp_ifw_active": false,
8
- "comp_ipex_llm_active": false,
9
- "comp_onediff_active": false,
10
- "comp_step_caching_active": false,
11
- "comp_torch_compile_active": false,
12
- "comp_ws2t_active": false,
13
- "comp_x-fast_active": false,
14
- "prune_torch-structured_active": false,
15
- "prune_torch-unstructured_active": false,
16
- "quant_aqlm_active": false,
17
- "quant_awq_active": false,
18
- "quant_gptq_active": false,
19
- "quant_half_active": false,
20
- "quant_hqq_active": true,
21
- "quant_llm-int8_active": false,
22
- "quant_quanto_active": false,
23
- "quant_torch_dynamic_active": false,
24
- "quant_torch_static_active": false,
25
  "quant_hqq_backend": "torchao_int4",
26
  "quant_hqq_group_size": 64,
27
  "quant_hqq_weight_bits": 8,
28
  "max_batch_size": 1,
29
  "device": "cuda",
30
- "cache_dir": "/home/ubuntu/.cache/pruna/tmp5u4v4d_o",
31
  "task": "",
32
  "save_load_fn": "hqq",
33
- "save_load_fn_args": {}
 
34
  }
 
1
  {
2
+ "batchers": null,
3
+ "cachers": null,
4
+ "compilers": null,
5
+ "distillers": null,
6
+ "pruners": null,
7
+ "quantizers": "hqq",
8
+ "recoverers": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  "quant_hqq_backend": "torchao_int4",
10
  "quant_hqq_group_size": 64,
11
  "quant_hqq_weight_bits": 8,
12
  "max_batch_size": 1,
13
  "device": "cuda",
14
+ "cache_dir": "/tmp/models/tmpcdqz_pr1",
15
  "task": "",
16
  "save_load_fn": "hqq",
17
+ "save_load_fn_args": {},
18
+ "api_key": null
19
  }