Spaces:

Tousifahamed
/

smol-lm2-demo

Sleeping

Tousifahamed commited on Jan 22

Commit

7d3f5e9

verified ·

1 Parent(s): ad95929

Upload 2 files

Files changed (1) hide show

app.py CHANGED Viewed

@@ -42,14 +42,6 @@ import gradio as gr
 # Load the quantized model
 model = load_quantized_model("checkpoint_quantized.pt")
-# Set the quantization configuration for the embedding layer
-model.embed_tokens.qconfig = quantization.float_qparams_weight_only_qconfig
-# Apply static quantization to the rest of the model
-model.qconfig = quantization.default_qconfig
-model = quantization.prepare(model, inplace=False)
-model = quantization.convert(model, inplace=False)
 # Function to generate text
 def generate_text(prompt, max_length=50, temperature=1.0, top_k=50):
     input_ids = tokenizer.encode(prompt, return_tensors="pt")

 # Load the quantized model
 model = load_quantized_model("checkpoint_quantized.pt")
 # Function to generate text
 def generate_text(prompt, max_length=50, temperature=1.0, top_k=50):
     input_ids = tokenizer.encode(prompt, return_tensors="pt")