Spaces:
Sleeping
Sleeping
File size: 2,101 Bytes
b405fea 8dc137a b405fea 8dc137a b405fea 8dc137a 83f8a3e 8dc137a 9e09549 8dc137a 9e09549 8dc137a b405fea 8dc137a b405fea 1749217 8dc137a 1749217 8dc137a 1749217 8dc137a 1749217 5eb8313 1749217 b405fea 1749217 8dc137a 1749217 b405fea 8dc137a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer, PeftModel
class ModelInput(BaseModel):
prompt: str
max_new_tokens: int = 50
app = FastAPI()
# Load base model and tokenizer
base_model_path = "HuggingFaceTB/SmolLM2-135M-Instruct"
adapter_path = "khurrameycon/SmolLM-135M-Instruct-qa_pairs_converted.json-25epochs"
# Initialize tokenizer from base model
tokenizer = AutoTokenizer.from_pretrained(base_model_path)
# Load base model
base_model = AutoModelForCausalLM.from_pretrained(
base_model_path,
device_map="auto",
trust_remote_code=True
)
# Load and merge adapter weights
model = PeftModel.from_pretrained(base_model, adapter_path)
model = model.merge_and_unload()
# Initialize pipeline
generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
def generate_response(model, tokenizer, instruction, max_new_tokens=128):
try:
messages = [{"role": "user", "content": instruction}]
input_text = tokenizer.apply_chat_template(
messages, tokenize=False, add_generation_prompt=True
)
inputs = tokenizer.encode(input_text, return_tensors="pt").to(model.device)
outputs = model.generate(
inputs,
max_new_tokens=max_new_tokens,
temperature=0.2,
top_p=0.9,
do_sample=True,
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
return response
except Exception as e:
raise ValueError(f"Error generating response: {e}")
@app.post("/generate")
def generate_text(input: ModelInput):
try:
response = generate_response(
model=model,
tokenizer=tokenizer,
instruction=input.prompt,
max_new_tokens=input.max_new_tokens
)
return {"generated_text": response}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.get("/")
def root():
return {"message": "Welcome to the Hugging Face Model API!"} |