Spaces:
Build error
Build error
File size: 1,342 Bytes
9c08c69 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
from fastapi import FastAPI
from pydantic import BaseModel
from llama_cpp import Llama
import os
# Define the FastAPI app
app = FastAPI()
# Path to the GGUF model file
MODEL_NAME = "SmolVLM-500M-Instruct-GGUF.Q4_K_M.gguf"
MODEL_PATH = f"./{MODEL_NAME}"
# Download the model from the Hub if it's not present
if not os.path.exists(MODEL_PATH):
from huggingface_hub import hf_hub_download
hf_hub_download(
repo_id="ggml-org/SmolVLM-500M-Instruct-GGUF",
filename=MODEL_NAME,
local_dir=".",
local_dir_use_symlinks=False
)
# Load the Llama model
try:
llm = Llama(model_path=MODEL_PATH, n_ctx=2048, verbose=False)
except Exception as e:
print(f"Error loading model: {e}")
llm = None
class InferenceRequest(BaseModel):
prompt: str
@app.post("/generate")
def generate_text(request: InferenceRequest):
if llm is None:
return {"error": "Model not loaded"}, 500
try:
output = llm.create_completion(
prompt=request.prompt,
max_tokens=256,
stop=["<|im_end|>", "</s>"],
temperature=0.7
)
return {"text": output["choices"][0]["text"].strip()}
except Exception as e:
return {"error": str(e)}, 500
@app.get("/")
def health_check():
return {"status": "ok", "model_loaded": llm is not None} |