api-smollm135m

Sleeping

App Files Files Community

khurrameycon commited on Jan 1

Commit

9e09549

verified ·

1 Parent(s): 481c951

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -14

app.py CHANGED Viewed

@@ -1,6 +1,8 @@
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
 # Define the input schema
 class ModelInput(BaseModel):
@@ -10,12 +12,25 @@ class ModelInput(BaseModel):
 # Initialize FastAPI app
 app = FastAPI()
-# Load your model and tokenizer
-model_path = "khurrameycon/SmolLM-135M-Instruct-qa_pairs_converted.json-25epochs"
-# "khurrameycon/SmolLM-135M-Instruct-qa_pairs_converted.json-25epochs"
-tokenizer = AutoTokenizer.from_pretrained(model_path)
-model = AutoModelForCausalLM.from_pretrained(model_path)
 # Initialize the pipeline
 generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
@@ -24,17 +39,13 @@ generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
 def generate_response(model, tokenizer, instruction, max_new_tokens=128):
     """Generate a response from the model based on an instruction."""
     try:
-        # Format the input as chat messages if necessary
-        messages = [{"role": "user", "content": instruction}]
-        input_text = tokenizer.apply_chat_template(
-            messages, tokenize=False, add_generation_prompt=True
-        )
         # Tokenize and generate the output
-        inputs = tokenizer.encode(input_text, return_tensors="pt")
         outputs = model.generate(
-            inputs,
             max_new_tokens=max_new_tokens,
-            temperature=0.2,
             top_p=0.9,
             do_sample=True,
         )
@@ -58,4 +69,4 @@ def generate_text(input: ModelInput):
 @app.get("/")
 def root():
-    return {"message": "Welcome to the Hugging Face Model API!"}

 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
+from safetensors.torch import load_file
+import torch
 # Define the input schema
 class ModelInput(BaseModel):
 # Initialize FastAPI app
 app = FastAPI()
+# Load the base model and tokenizer
+base_model_path = "HuggingFaceTB/SmolLM2-135M-Instruct"  # Base model
+adapter_weights_path = "https://huggingface.co/khurrameycon/SmolLM-135M-Instruct-qa_pairs_converted.json-25epochs/resolve/main/adapter_model.safetensors"
+# Path to the adapter weights
+tokenizer = AutoTokenizer.from_pretrained(base_model_path)
+model = AutoModelForCausalLM.from_pretrained(base_model_path)
+# Load the adapter weights
+def load_adapter_weights(model, adapter_weights_path):
+    adapter_weights = load_file(adapter_weights_path)
+    model.load_state_dict(adapter_weights, strict=False)  # Apply the weights
+    return model
+# Apply adapter weights to the model
+model = load_adapter_weights(model, adapter_weights_path)
+# Ensure the model is in evaluation mode
+model.eval()
 # Initialize the pipeline
 generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
 def generate_response(model, tokenizer, instruction, max_new_tokens=128):
     """Generate a response from the model based on an instruction."""
     try:
         # Tokenize and generate the output
+        inputs = tokenizer(instruction, return_tensors="pt")
+        inputs = {key: value.to(model.device) for key, value in inputs.items()}  # Move tensors to the model's device
         outputs = model.generate(
+            **inputs,
             max_new_tokens=max_new_tokens,
+            temperature=0.7,
             top_p=0.9,
             do_sample=True,
         )
 @app.get("/")
 def root():
+    return {"message": "Welcome to the Hugging Face Model API with Adapter Support!"}