Spaces:

Tousifahamed
/

smol-lm2-demo

Sleeping

Tousifahamed commited on Jan 22

Commit

632a181

verified ·

1 Parent(s): a7f53d7

Upload app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,7 +1,8 @@
 import torch
 import torch.ao.quantization as quantization
 from transformers import AutoTokenizer
-from model import TransformerModel  # Replace with your model class
 import gradio as gr
 # Load the tokenizer
@@ -24,7 +25,7 @@ def load_quantized_model(checkpoint_path):
     # Set quantization config for ALL embedding layers
     for name, module in model.named_modules():
-        if isinstance(module, nn.Embedding):
             module.qconfig = quantization.float_qparams_weight_only_qconfig
     # Apply static quantization to the rest of the model
@@ -39,7 +40,6 @@ def load_quantized_model(checkpoint_path):
     model.eval()
     return model
-import gradio as gr
 # Load the quantized model
 model = load_quantized_model("checkpoint_quantized.pt")

 import torch
+import torch.nn as nn  # Added missing import
 import torch.ao.quantization as quantization
 from transformers import AutoTokenizer
+from model import TransformerModel
 import gradio as gr
 # Load the tokenizer
     # Set quantization config for ALL embedding layers
     for name, module in model.named_modules():
+        if isinstance(module, nn.Embedding):  # Now works because `nn` is imported
             module.qconfig = quantization.float_qparams_weight_only_qconfig
     # Apply static quantization to the rest of the model
     model.eval()
     return model
 # Load the quantized model
 model = load_quantized_model("checkpoint_quantized.pt")