Spaces:
Sleeping
Sleeping
File size: 2,212 Bytes
b405fea 0ef7e23 b405fea 8dc137a b405fea 9196e30 83f8a3e 9196e30 9e09549 9196e30 b405fea 1749217 9196e30 1749217 9196e30 8dc137a 9196e30 8dc137a 1749217 8dc137a 1749217 8dc137a 1749217 9196e30 1749217 9196e30 1749217 5eb8313 1749217 9196e30 b405fea 1749217 8dc137a 1749217 9196e30 b405fea 9196e30 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
class ModelInput(BaseModel):
prompt: str
max_new_tokens: int = 50
app = FastAPI()
# Since we're getting config errors with PEFT, let's load the fine-tuned model directly
model_path = "khurrameycon/SmolLM-135M-Instruct-qa_pairs_converted.json-25epochs"
try:
# Load the model and tokenizer directly from your fine-tuned version
model = AutoModelForCausalLM.from_pretrained(
model_path,
trust_remote_code=True,
device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
print("Model loaded successfully!")
except Exception as e:
print(f"Error loading model: {e}")
raise
def generate_response(model, tokenizer, instruction, max_new_tokens=128):
"""Generate a response from the model based on an instruction."""
try:
# Format the input
messages = [{"role": "user", "content": instruction}]
input_text = tokenizer.apply_chat_template(
messages, tokenize=False, add_generation_prompt=True
)
# Generate
inputs = tokenizer.encode(input_text, return_tensors="pt").to(model.device)
outputs = model.generate(
inputs,
max_new_tokens=max_new_tokens,
temperature=0.2,
top_p=0.9,
do_sample=True,
)
# Decode
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
return response
except Exception as e:
raise ValueError(f"Error generating response: {e}")
@app.post("/generate")
async def generate_text(input: ModelInput):
"""API endpoint to generate text."""
try:
response = generate_response(
model=model,
tokenizer=tokenizer,
instruction=input.prompt,
max_new_tokens=input.max_new_tokens
)
return {"generated_text": response}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.get("/")
async def root():
return {"message": "Welcome to the Model API!"} |