api-smollm135m

Sleeping

App Files Files Community

khurrameycon commited on Jan 1

Commit

8dc137a

verified ·

1 Parent(s): be89a8a

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -35

app.py CHANGED Viewed

@@ -1,56 +1,48 @@
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
-from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
-from safetensors.torch import load_file
-import torch
-# Define the input schema
 class ModelInput(BaseModel):
     prompt: str
-    max_new_tokens: int = 50  # Optional: Defaults to 50 tokens
-# Initialize FastAPI app
 app = FastAPI()
-# Load the base model and tokenizer
-base_model_path = "HuggingFaceTB/SmolLM2-135M-Instruct"  # Base model
-adapter_weights_path = "khurrameycon/SmolLM-135M-Instruct-qa_pairs_converted.json-25epochs/resolve/main/adapter_model.safetensors"
-# Path to the adapter weights
 tokenizer = AutoTokenizer.from_pretrained(base_model_path)
-model = AutoModelForCausalLM.from_pretrained(base_model_path)
-# Load the adapter weights
-def load_adapter_weights(model, adapter_weights_path):
-    adapter_weights = load_file(adapter_weights_path)
-    model.load_state_dict(adapter_weights, strict=False)  # Apply the weights
-    return model
-# Apply adapter weights to the model
-model = load_adapter_weights(model, adapter_weights_path)
-# Ensure the model is in evaluation mode
-model.eval()
-# Initialize the pipeline
 generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
-# Helper function to generate a response
 def generate_response(model, tokenizer, instruction, max_new_tokens=128):
-    """Generate a response from the model based on an instruction."""
     try:
-        # Tokenize and generate the output
-        inputs = tokenizer(instruction, return_tensors="pt")
-        inputs = {key: value.to(model.device) for key, value in inputs.items()}  # Move tensors to the model's device
         outputs = model.generate(
-            **inputs,
             max_new_tokens=max_new_tokens,
-            temperature=0.7,
             top_p=0.9,
             do_sample=True,
         )
-        # Decode the output
         response = tokenizer.decode(outputs[0], skip_special_tokens=True)
         return response
     except Exception as e:
@@ -58,11 +50,12 @@ def generate_response(model, tokenizer, instruction, max_new_tokens=128):
 @app.post("/generate")
 def generate_text(input: ModelInput):
-    """API endpoint to generate text."""
     try:
-        # Call the helper function
         response = generate_response(
-            model=model, tokenizer=tokenizer, instruction=input.prompt, max_new_tokens=input.max_new_tokens
         )
         return {"generated_text": response}
     except Exception as e:
@@ -70,4 +63,4 @@ def generate_text(input: ModelInput):
 @app.get("/")
 def root():
-    return {"message": "Welcome to the Hugging Face Model API with Adapter Support!"}

 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
+from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer, PeftModel
 class ModelInput(BaseModel):
     prompt: str
+    max_new_tokens: int = 50
 app = FastAPI()
+# Load base model and tokenizer
+base_model_path = "HuggingFaceTB/SmolLM2-135M-Instruct"
+adapter_path = "khurrameycon/SmolLM-135M-Instruct-qa_pairs_converted.json-25epochs"
+# Initialize tokenizer from base model
 tokenizer = AutoTokenizer.from_pretrained(base_model_path)
+# Load base model
+base_model = AutoModelForCausalLM.from_pretrained(
+    base_model_path,
+    device_map="auto",
+    trust_remote_code=True
+)
+# Load and merge adapter weights
+model = PeftModel.from_pretrained(base_model, adapter_path)
+model = model.merge_and_unload()
+# Initialize pipeline
 generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
 def generate_response(model, tokenizer, instruction, max_new_tokens=128):
     try:
+        messages = [{"role": "user", "content": instruction}]
+        input_text = tokenizer.apply_chat_template(
+            messages, tokenize=False, add_generation_prompt=True
+        )
+        inputs = tokenizer.encode(input_text, return_tensors="pt").to(model.device)
         outputs = model.generate(
+            inputs,
             max_new_tokens=max_new_tokens,
+            temperature=0.2,
             top_p=0.9,
             do_sample=True,
         )
         response = tokenizer.decode(outputs[0], skip_special_tokens=True)
         return response
     except Exception as e:
 @app.post("/generate")
 def generate_text(input: ModelInput):
     try:
         response = generate_response(
+            model=model,
+            tokenizer=tokenizer,
+            instruction=input.prompt,
+            max_new_tokens=input.max_new_tokens
         )
         return {"generated_text": response}
     except Exception as e:
 @app.get("/")
 def root():
+    return {"message": "Welcome to the Hugging Face Model API!"}