Upload folder using huggingface_hub

by sharpenb - opened 5 days ago

base: refs/heads/main

←

from: refs/pr/4

Discussion Files changed

+17

-52

Files changed (3) hide show

README.md +4 -4
config.json +3 -23
smash_config.json +10 -25

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 thumbnail: "https://assets-global.website-files.com/646b351987a8d8ce158d1940/64ec9e96b4334c0e1ac41504_Logo%20with%20white%20text.svg"
-base_model: HuggingFaceTB/SmolLM2-1.7B-Instruct
 metrics:
 - memory_disk
 - memory_inference
@@ -52,7 +52,7 @@ tags:
 You can run the smashed model with these steps:
-0. Check requirements from the original repo HuggingFaceTB/SmolLM2-1.7B-Instruct installed. In particular, check python, cuda, and transformers versions.
 1. Make sure that you have installed quantization related packages.
     ```bash
     pip install hqq
@@ -67,7 +67,7 @@ You can run the smashed model with these steps:
      model = HQQModelForCausalLM.from_quantized("PrunaAI/HuggingFaceTB-SmolLM2-1.7B-Instruct-HQQ-8bit-smashed", device_map='auto')
     except:
      model = AutoHQQHFModel.from_quantized("PrunaAI/HuggingFaceTB-SmolLM2-1.7B-Instruct-HQQ-8bit-smashed")
-   tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM2-1.7B-Instruct")
    input_ids = tokenizer("What is the color of prunes?,", return_tensors='pt').to(model.device)["input_ids"]
@@ -81,7 +81,7 @@ The configuration info are in `smash_config.json`.
 ## Credits & License
-The license of the smashed model follows the license of the original model. Please check the license of the original model HuggingFaceTB/SmolLM2-1.7B-Instruct before using this model which provided the base model. The license  of the `pruna-engine` is [here](https://pypi.org/project/pruna-engine/) on Pypi.
 ## Want to compress other models?

 ---
 thumbnail: "https://assets-global.website-files.com/646b351987a8d8ce158d1940/64ec9e96b4334c0e1ac41504_Logo%20with%20white%20text.svg"
+base_model: ORIGINAL_REPO_NAME
 metrics:
 - memory_disk
 - memory_inference
 You can run the smashed model with these steps:
+0. Check requirements from the original repo ORIGINAL_REPO_NAME installed. In particular, check python, cuda, and transformers versions.
 1. Make sure that you have installed quantization related packages.
     ```bash
     pip install hqq
      model = HQQModelForCausalLM.from_quantized("PrunaAI/HuggingFaceTB-SmolLM2-1.7B-Instruct-HQQ-8bit-smashed", device_map='auto')
     except:
      model = AutoHQQHFModel.from_quantized("PrunaAI/HuggingFaceTB-SmolLM2-1.7B-Instruct-HQQ-8bit-smashed")
+   tokenizer = AutoTokenizer.from_pretrained("ORIGINAL_REPO_NAME")
    input_ids = tokenizer("What is the color of prunes?,", return_tensors='pt').to(model.device)["input_ids"]
 ## Credits & License
+The license of the smashed model follows the license of the original model. Please check the license of the original model ORIGINAL_REPO_NAME before using this model which provided the base model. The license  of the `pruna-engine` is [here](https://pypi.org/project/pruna-engine/) on Pypi.
 ## Want to compress other models?

config.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "_attn_implementation_autoset": true,
-  "_name_or_path": "/home/ubuntu/.cache/pruna/tmp5u4v4d_o/tmpzr51qsp0",
   "architectures": [
     "LlamaForCausalLM"
   ],
@@ -21,38 +21,18 @@
   "num_key_value_heads": 32,
   "pad_token_id": 2,
   "pretraining_tp": 1,
-  "quantization_config": {
-    "quant_config": {
-      "offload_meta": false,
-      "scale_quant_params": null,
-      "weight_quant_params": {
-        "axis": 1,
-        "channel_wise": true,
-        "group_size": 64,
-        "nbits": 8,
-        "optimize": true,
-        "round_zero": false,
-        "view_as_float": false
-      },
-      "zero_quant_params": null
-    },
-    "quant_method": "hqq",
-    "skip_modules": [
-      "lm_head"
-    ]
-  },
   "rms_norm_eps": 1e-05,
   "rope_scaling": null,
   "rope_theta": 130000,
   "tie_word_embeddings": true,
-  "torch_dtype": "float16",
   "transformers.js_config": {
     "kv_cache_dtype": {
       "fp16": "float16",
       "q4f16": "float16"
     }
   },
-  "transformers_version": "4.46.3",
   "use_cache": true,
   "vocab_size": 49152
 }

 {
   "_attn_implementation_autoset": true,
+  "_name_or_path": "HuggingFaceTB/SmolLM2-1.7B-Instruct",
   "architectures": [
     "LlamaForCausalLM"
   ],
   "num_key_value_heads": 32,
   "pad_token_id": 2,
   "pretraining_tp": 1,
   "rms_norm_eps": 1e-05,
   "rope_scaling": null,
   "rope_theta": 130000,
   "tie_word_embeddings": true,
+  "torch_dtype": "bfloat16",
   "transformers.js_config": {
     "kv_cache_dtype": {
       "fp16": "float16",
       "q4f16": "float16"
     }
   },
+  "transformers_version": "4.48.2",
   "use_cache": true,
   "vocab_size": 49152
 }

smash_config.json CHANGED Viewed

@@ -1,34 +1,19 @@
 {
-    "comp_cgenerate_active": false,
-    "comp_ctranslate_active": false,
-    "comp_cwhisper_active": false,
-    "comp_diffusers2_active": false,
-    "comp_flux_caching_active": false,
-    "comp_ifw_active": false,
-    "comp_ipex_llm_active": false,
-    "comp_onediff_active": false,
-    "comp_step_caching_active": false,
-    "comp_torch_compile_active": false,
-    "comp_ws2t_active": false,
-    "comp_x-fast_active": false,
-    "prune_torch-structured_active": false,
-    "prune_torch-unstructured_active": false,
-    "quant_aqlm_active": false,
-    "quant_awq_active": false,
-    "quant_gptq_active": false,
-    "quant_half_active": false,
-    "quant_hqq_active": true,
-    "quant_llm-int8_active": false,
-    "quant_quanto_active": false,
-    "quant_torch_dynamic_active": false,
-    "quant_torch_static_active": false,
     "quant_hqq_backend": "torchao_int4",
     "quant_hqq_group_size": 64,
     "quant_hqq_weight_bits": 8,
     "max_batch_size": 1,
     "device": "cuda",
-    "cache_dir": "/home/ubuntu/.cache/pruna/tmp5u4v4d_o",
     "task": "",
     "save_load_fn": "hqq",
-    "save_load_fn_args": {}
 }

 {
+    "batchers": null,
+    "cachers": null,
+    "compilers": null,
+    "distillers": null,
+    "pruners": null,
+    "quantizers": "hqq",
+    "recoverers": null,
     "quant_hqq_backend": "torchao_int4",
     "quant_hqq_group_size": 64,
     "quant_hqq_weight_bits": 8,
     "max_batch_size": 1,
     "device": "cuda",
+    "cache_dir": "/tmp/models/tmpcdqz_pr1",
     "task": "",
     "save_load_fn": "hqq",
+    "save_load_fn_args": {},
+    "api_key": null
 }