api-smollm135m / app.py
khurrameycon's picture
Update app.py
0ef7e23 verified
raw
history blame
2.12 kB
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
class ModelInput(BaseModel):
prompt: str
max_new_tokens: int = 50
app = FastAPI()
# Load base model and tokenizer
base_model_path = "HuggingFaceTB/SmolLM2-135M-Instruct"
adapter_path = "khurrameycon/SmolLM-135M-Instruct-qa_pairs_converted.json-25epochs"
# Initialize tokenizer from base model
tokenizer = AutoTokenizer.from_pretrained(base_model_path)
# Load base model
base_model = AutoModelForCausalLM.from_pretrained(
base_model_path,
device_map="auto",
trust_remote_code=True
)
# Load and merge adapter weights
model = PeftModel.from_pretrained(base_model, adapter_path)
model = model.merge_and_unload()
# Initialize pipeline
generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
def generate_response(model, tokenizer, instruction, max_new_tokens=128):
try:
messages = [{"role": "user", "content": instruction}]
input_text = tokenizer.apply_chat_template(
messages, tokenize=False, add_generation_prompt=True
)
inputs = tokenizer.encode(input_text, return_tensors="pt").to(model.device)
outputs = model.generate(
inputs,
max_new_tokens=max_new_tokens,
temperature=0.2,
top_p=0.9,
do_sample=True,
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
return response
except Exception as e:
raise ValueError(f"Error generating response: {e}")
@app.post("/generate")
def generate_text(input: ModelInput):
try:
response = generate_response(
model=model,
tokenizer=tokenizer,
instruction=input.prompt,
max_new_tokens=input.max_new_tokens
)
return {"generated_text": response}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.get("/")
def root():
return {"message": "Welcome to the Hugging Face Model API!"}