Spaces:

code4startup
/

smolvlm

Build error

leotrieu commited on 7 days ago

Commit

9c08c69

1 Parent(s): a7a80ff

Initialize app

Files changed (3) hide show

Dockerfile ADDED Viewed

+FROM python:3.10-slim
+# Set the working directory
+WORKDIR /app
+# Install system dependencies for llama-cpp-python
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    libopenblas-dev \
+    libssl-dev \
+    ca-certificates \
+    && rm -rf /var/lib/apt/lists/*
+# Copy the requirements file
+COPY requirements.txt .
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy the application code
+COPY . .
+# Set up the environment variables
+ENV HF_HUB_ENABLE_HF_TOKEN=1
+# Expose the port
+EXPOSE 7860
+# Command to run the application
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py ADDED Viewed

+from fastapi import FastAPI
+from pydantic import BaseModel
+from llama_cpp import Llama
+import os
+# Define the FastAPI app
+app = FastAPI()
+# Path to the GGUF model file
+MODEL_NAME = "SmolVLM-500M-Instruct-GGUF.Q4_K_M.gguf"
+MODEL_PATH = f"./{MODEL_NAME}"
+# Download the model from the Hub if it's not present
+if not os.path.exists(MODEL_PATH):
+    from huggingface_hub import hf_hub_download
+    hf_hub_download(
+        repo_id="ggml-org/SmolVLM-500M-Instruct-GGUF",
+        filename=MODEL_NAME,
+        local_dir=".",
+        local_dir_use_symlinks=False
+    )
+# Load the Llama model
+try:
+    llm = Llama(model_path=MODEL_PATH, n_ctx=2048, verbose=False)
+except Exception as e:
+    print(f"Error loading model: {e}")
+    llm = None
+class InferenceRequest(BaseModel):
+    prompt: str
+@app.post("/generate")
+def generate_text(request: InferenceRequest):
+    if llm is None:
+        return {"error": "Model not loaded"}, 500
+    try:
+        output = llm.create_completion(
+            prompt=request.prompt,
+            max_tokens=256,
+            stop=["<|im_end|>", "</s>"],
+            temperature=0.7
+        )
+        return {"text": output["choices"][0]["text"].strip()}
+    except Exception as e:
+        return {"error": str(e)}, 500
+@app.get("/")
+def health_check():
+    return {"status": "ok", "model_loaded": llm is not None}

requirements.txt ADDED Viewed

+fastapi
+uvicorn
+llama-cpp-python
+huggingface_hub