Spaces:

Tousifahamed
/

smol-lm2-demo

Sleeping

Tousifahamed commited on Jan 22

Commit

a7f53d7

verified ·

1 Parent(s): 7d3f5e9

Upload 2 files

Files changed (1) hide show

app.py CHANGED Viewed

@@ -22,15 +22,17 @@ def load_quantized_model(checkpoint_path):
         tie_word_embeddings=True,
     )
-    # Set the quantization configuration for the embedding layer
-    model.embed_tokens.qconfig = quantization.float_qparams_weight_only_qconfig
     # Apply static quantization to the rest of the model
     model.qconfig = quantization.default_qconfig
     model = quantization.prepare(model, inplace=False)
     model = quantization.convert(model, inplace=False)
-    # Load the quantized checkpoint
     checkpoint = torch.load(checkpoint_path, map_location="cpu")
     model.load_state_dict(checkpoint["model_state_dict"])

         tie_word_embeddings=True,
     )
+    # Set quantization config for ALL embedding layers
+    for name, module in model.named_modules():
+        if isinstance(module, nn.Embedding):
+            module.qconfig = quantization.float_qparams_weight_only_qconfig
     # Apply static quantization to the rest of the model
     model.qconfig = quantization.default_qconfig
     model = quantization.prepare(model, inplace=False)
     model = quantization.convert(model, inplace=False)
+    # Load the checkpoint
     checkpoint = torch.load(checkpoint_path, map_location="cpu")
     model.load_state_dict(checkpoint["model_state_dict"])