da65b3b1c34a40604ca2ed176b1de57c9c38f9ae973abd92e092d98335f13f9f

Files changed (5) hide show

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 thumbnail: "https://assets-global.website-files.com/646b351987a8d8ce158d1940/64ec9e96b4334c0e1ac41504_Logo%20with%20white%20text.svg"
-base_model: HuggingFaceTB/SmolLM2-1.7B-Instruct
 metrics:
 - memory_disk
 - memory_inference
@@ -52,7 +52,7 @@ tags:
 You can run the smashed model with these steps:
-0. Check requirements from the original repo HuggingFaceTB/SmolLM2-1.7B-Instruct installed. In particular, check python, cuda, and transformers versions.
 1. Make sure that you have installed quantization related packages.
     ```bash
     pip install transformers accelerate bitsandbytes>0.37.0
@@ -63,7 +63,7 @@ You can run the smashed model with these steps:
    model = AutoModelForCausalLM.from_pretrained("PrunaAI/HuggingFaceTB-SmolLM2-1.7B-Instruct-bnb-4bit-smashed", trust_remote_code=True, device_map='auto')
-   tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM2-1.7B-Instruct")
    input_ids = tokenizer("What is the color of prunes?,", return_tensors='pt').to(model.device)["input_ids"]
@@ -77,7 +77,7 @@ The configuration info are in `smash_config.json`.
 ## Credits & License
-The license of the smashed model follows the license of the original model. Please check the license of the original model HuggingFaceTB/SmolLM2-1.7B-Instruct before using this model which provided the base model. The license  of the `pruna-engine` is [here](https://pypi.org/project/pruna-engine/) on Pypi.
 ## Want to compress other models?

 ---
 thumbnail: "https://assets-global.website-files.com/646b351987a8d8ce158d1940/64ec9e96b4334c0e1ac41504_Logo%20with%20white%20text.svg"
+base_model: ORIGINAL_REPO_NAME
 metrics:
 - memory_disk
 - memory_inference
 You can run the smashed model with these steps:
+0. Check requirements from the original repo ORIGINAL_REPO_NAME installed. In particular, check python, cuda, and transformers versions.
 1. Make sure that you have installed quantization related packages.
     ```bash
     pip install transformers accelerate bitsandbytes>0.37.0
    model = AutoModelForCausalLM.from_pretrained("PrunaAI/HuggingFaceTB-SmolLM2-1.7B-Instruct-bnb-4bit-smashed", trust_remote_code=True, device_map='auto')
+   tokenizer = AutoTokenizer.from_pretrained("ORIGINAL_REPO_NAME")
    input_ids = tokenizer("What is the color of prunes?,", return_tensors='pt').to(model.device)["input_ids"]
 ## Credits & License
+The license of the smashed model follows the license of the original model. Please check the license of the original model ORIGINAL_REPO_NAME before using this model which provided the base model. The license  of the `pruna-engine` is [here](https://pypi.org/project/pruna-engine/) on Pypi.
 ## Want to compress other models?

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "/home/ubuntu/.cache/pruna/tmpbiu3po2h6i9trd70",
   "architectures": [
     "LlamaForCausalLM"
   ],
@@ -41,14 +41,14 @@
   "rope_scaling": null,
   "rope_theta": 130000,
   "tie_word_embeddings": true,
-  "torch_dtype": "float16",
   "transformers.js_config": {
     "kv_cache_dtype": {
       "fp16": "float16",
       "q4f16": "float16"
     }
   },
-  "transformers_version": "4.46.3",
   "use_cache": true,
   "vocab_size": 49152
 }

 {
+  "_name_or_path": "/tmp/models/tmpwv8vhyngdrgkv1ch",
   "architectures": [
     "LlamaForCausalLM"
   ],
   "rope_scaling": null,
   "rope_theta": 130000,
   "tie_word_embeddings": true,
+  "torch_dtype": "bfloat16",
   "transformers.js_config": {
     "kv_cache_dtype": {
       "fp16": "float16",
       "q4f16": "float16"
     }
   },
+  "transformers_version": "4.48.2",
   "use_cache": true,
   "vocab_size": 49152
 }

generation_config.json CHANGED Viewed

@@ -3,5 +3,5 @@
   "bos_token_id": 1,
   "eos_token_id": 2,
   "pad_token_id": 2,
-  "transformers_version": "4.46.3"
 }

   "bos_token_id": 1,
   "eos_token_id": 2,
   "pad_token_id": 2,
+  "transformers_version": "4.48.2"
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:705a3816f413e14fb47db8da7423c590c1972c0f4cd29ba6c60d3f1deab19bf7
-size 1107606904

 version https://git-lfs.github.com/spec/v1
+oid sha256:62d057cb8b22ccaed925cc822b17c891138c20e8189cb27d2098ed2e46b00015
+size 1107607120

smash_config.json CHANGED Viewed

@@ -1,27 +1,11 @@
 {
-    "comp_cgenerate_active": false,
-    "comp_ctranslate_active": false,
-    "comp_cwhisper_active": false,
-    "comp_diffusers2_active": false,
-    "comp_flux_caching_active": false,
-    "comp_ifw_active": false,
-    "comp_ipex_llm_active": false,
-    "comp_onediff_active": false,
-    "comp_step_caching_active": false,
-    "comp_torch_compile_active": false,
-    "comp_ws2t_active": false,
-    "comp_x-fast_active": false,
-    "prune_torch-structured_active": false,
-    "prune_torch-unstructured_active": false,
-    "quant_aqlm_active": false,
-    "quant_awq_active": false,
-    "quant_gptq_active": false,
-    "quant_half_active": false,
-    "quant_hqq_active": false,
-    "quant_llm-int8_active": true,
-    "quant_quanto_active": false,
-    "quant_torch_dynamic_active": false,
-    "quant_torch_static_active": false,
     "quant_llm-int8_compute_dtype": "bfloat16",
     "quant_llm-int8_double_quant": false,
     "quant_llm-int8_enable_fp32_cpu_offload": false,
@@ -31,8 +15,9 @@
     "quant_llm-int8_weight_bits": 4,
     "max_batch_size": 1,
     "device": "cuda",
-    "cache_dir": "/home/ubuntu/.cache/pruna/tmpbiu3po2h",
     "task": "",
     "save_load_fn": "llm-int8",
-    "save_load_fn_args": {}
 }

 {
+    "batchers": null,
+    "cachers": null,
+    "compilers": null,
+    "distillers": null,
+    "pruners": null,
+    "quantizers": "llm-int8",
+    "recoverers": null,
     "quant_llm-int8_compute_dtype": "bfloat16",
     "quant_llm-int8_double_quant": false,
     "quant_llm-int8_enable_fp32_cpu_offload": false,
     "quant_llm-int8_weight_bits": 4,
     "max_batch_size": 1,
     "device": "cuda",
+    "cache_dir": "/tmp/models/tmpwv8vhyng",
     "task": "",
     "save_load_fn": "llm-int8",
+    "save_load_fn_args": {},
+    "api_key": null
 }