leotrieu commited on
Commit
9c08c69
·
1 Parent(s): a7a80ff

Initialize app

Browse files
Files changed (3) hide show
  1. Dockerfile +30 -0
  2. app.py +51 -0
  3. requirements.txt +4 -0
Dockerfile ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ # Set the working directory
4
+ WORKDIR /app
5
+
6
+ # Install system dependencies for llama-cpp-python
7
+ RUN apt-get update && apt-get install -y --no-install-recommends \
8
+ build-essential \
9
+ libopenblas-dev \
10
+ libssl-dev \
11
+ ca-certificates \
12
+ && rm -rf /var/lib/apt/lists/*
13
+
14
+ # Copy the requirements file
15
+ COPY requirements.txt .
16
+
17
+ # Install Python dependencies
18
+ RUN pip install --no-cache-dir -r requirements.txt
19
+
20
+ # Copy the application code
21
+ COPY . .
22
+
23
+ # Set up the environment variables
24
+ ENV HF_HUB_ENABLE_HF_TOKEN=1
25
+
26
+ # Expose the port
27
+ EXPOSE 7860
28
+
29
+ # Command to run the application
30
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
+ from llama_cpp import Llama
4
+ import os
5
+
6
+ # Define the FastAPI app
7
+ app = FastAPI()
8
+
9
+ # Path to the GGUF model file
10
+ MODEL_NAME = "SmolVLM-500M-Instruct-GGUF.Q4_K_M.gguf"
11
+ MODEL_PATH = f"./{MODEL_NAME}"
12
+
13
+ # Download the model from the Hub if it's not present
14
+ if not os.path.exists(MODEL_PATH):
15
+ from huggingface_hub import hf_hub_download
16
+ hf_hub_download(
17
+ repo_id="ggml-org/SmolVLM-500M-Instruct-GGUF",
18
+ filename=MODEL_NAME,
19
+ local_dir=".",
20
+ local_dir_use_symlinks=False
21
+ )
22
+
23
+ # Load the Llama model
24
+ try:
25
+ llm = Llama(model_path=MODEL_PATH, n_ctx=2048, verbose=False)
26
+ except Exception as e:
27
+ print(f"Error loading model: {e}")
28
+ llm = None
29
+
30
+ class InferenceRequest(BaseModel):
31
+ prompt: str
32
+
33
+ @app.post("/generate")
34
+ def generate_text(request: InferenceRequest):
35
+ if llm is None:
36
+ return {"error": "Model not loaded"}, 500
37
+
38
+ try:
39
+ output = llm.create_completion(
40
+ prompt=request.prompt,
41
+ max_tokens=256,
42
+ stop=["<|im_end|>", "</s>"],
43
+ temperature=0.7
44
+ )
45
+ return {"text": output["choices"][0]["text"].strip()}
46
+ except Exception as e:
47
+ return {"error": str(e)}, 500
48
+
49
+ @app.get("/")
50
+ def health_check():
51
+ return {"status": "ok", "model_loaded": llm is not None}
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ llama-cpp-python
4
+ huggingface_hub