Spaces:
Build error
Build error
from fastapi import FastAPI | |
from pydantic import BaseModel | |
from llama_cpp import Llama | |
import os | |
# Define the FastAPI app | |
app = FastAPI() | |
# Path to the GGUF model file | |
MODEL_NAME = "SmolVLM-500M-Instruct-GGUF.Q4_K_M.gguf" | |
MODEL_PATH = f"./{MODEL_NAME}" | |
# Download the model from the Hub if it's not present | |
if not os.path.exists(MODEL_PATH): | |
from huggingface_hub import hf_hub_download | |
hf_hub_download( | |
repo_id="ggml-org/SmolVLM-500M-Instruct-GGUF", | |
filename=MODEL_NAME, | |
local_dir=".", | |
local_dir_use_symlinks=False | |
) | |
# Load the Llama model | |
try: | |
llm = Llama(model_path=MODEL_PATH, n_ctx=2048, verbose=False) | |
except Exception as e: | |
print(f"Error loading model: {e}") | |
llm = None | |
class InferenceRequest(BaseModel): | |
prompt: str | |
def generate_text(request: InferenceRequest): | |
if llm is None: | |
return {"error": "Model not loaded"}, 500 | |
try: | |
output = llm.create_completion( | |
prompt=request.prompt, | |
max_tokens=256, | |
stop=["<|im_end|>", "</s>"], | |
temperature=0.7 | |
) | |
return {"text": output["choices"][0]["text"].strip()} | |
except Exception as e: | |
return {"error": str(e)}, 500 | |
def health_check(): | |
return {"status": "ok", "model_loaded": llm is not None} |