Spaces:
Build error
Build error
Commit
·
613c8f7
1
Parent(s):
16815f0
Update Dockerfile and improve error handling
Browse files- Dockerfile +13 -3
- app.py +5 -0
- llm.py +19 -6
Dockerfile
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
FROM python:3.9
|
2 |
|
3 |
WORKDIR /app
|
4 |
|
@@ -6,12 +6,21 @@ WORKDIR /app
|
|
6 |
RUN apt-get update && apt-get install -y \
|
7 |
build-essential \
|
8 |
curl \
|
|
|
9 |
software-properties-common \
|
10 |
&& rm -rf /var/lib/apt/lists/*
|
11 |
|
12 |
# Copy requirements first to leverage Docker cache
|
13 |
COPY requirements.txt .
|
14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
# Copy the rest of the application
|
17 |
COPY . .
|
@@ -19,9 +28,10 @@ COPY . .
|
|
19 |
# Set environment variables
|
20 |
ENV HOST=0.0.0.0
|
21 |
ENV PORT=7860
|
|
|
22 |
|
23 |
# Expose the port HF Spaces expects
|
24 |
EXPOSE 7860
|
25 |
|
26 |
# Start the FastAPI app
|
27 |
-
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
|
|
1 |
+
FROM python:3.9
|
2 |
|
3 |
WORKDIR /app
|
4 |
|
|
|
6 |
RUN apt-get update && apt-get install -y \
|
7 |
build-essential \
|
8 |
curl \
|
9 |
+
git \
|
10 |
software-properties-common \
|
11 |
&& rm -rf /var/lib/apt/lists/*
|
12 |
|
13 |
# Copy requirements first to leverage Docker cache
|
14 |
COPY requirements.txt .
|
15 |
+
|
16 |
+
# Install Python packages
|
17 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
18 |
+
|
19 |
+
# Pre-download the model
|
20 |
+
RUN python -c "from transformers import AutoTokenizer, AutoModelForCausalLM; \
|
21 |
+
model_id='mradermacher/Huihui-gemma-3n-E4B-it-abliterated-GGUF'; \
|
22 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id); \
|
23 |
+
model = AutoModelForCausalLM.from_pretrained(model_id, device_map='auto')"
|
24 |
|
25 |
# Copy the rest of the application
|
26 |
COPY . .
|
|
|
28 |
# Set environment variables
|
29 |
ENV HOST=0.0.0.0
|
30 |
ENV PORT=7860
|
31 |
+
ENV PYTHONUNBUFFERED=1
|
32 |
|
33 |
# Expose the port HF Spaces expects
|
34 |
EXPOSE 7860
|
35 |
|
36 |
# Start the FastAPI app
|
37 |
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]
|
app.py
CHANGED
@@ -22,6 +22,11 @@ app.add_middleware(
|
|
22 |
allow_headers=["*"],
|
23 |
)
|
24 |
|
|
|
|
|
|
|
|
|
|
|
25 |
# Initialize Supabase client
|
26 |
supabase_url = os.getenv("SUPABASE_URL")
|
27 |
supabase_key = os.getenv("SUPABASE_SERVICE_KEY")
|
|
|
22 |
allow_headers=["*"],
|
23 |
)
|
24 |
|
25 |
+
@app.get("/health")
|
26 |
+
async def health_check():
|
27 |
+
"""Health check endpoint"""
|
28 |
+
return {"status": "healthy", "version": "1.0.0"}
|
29 |
+
|
30 |
# Initialize Supabase client
|
31 |
supabase_url = os.getenv("SUPABASE_URL")
|
32 |
supabase_key = os.getenv("SUPABASE_SERVICE_KEY")
|
llm.py
CHANGED
@@ -8,12 +8,25 @@ load_dotenv()
|
|
8 |
class LLMPipeline:
|
9 |
def __init__(self):
|
10 |
model_id = os.getenv("HF_MODEL_ID", "mradermacher/Huihui-gemma-3n-E4B-it-abliterated-GGUF")
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
async def generate(self, prompt: str, max_length: int = 100) -> str:
|
19 |
"""Generate text using the local Gemma model."""
|
|
|
8 |
class LLMPipeline:
|
9 |
def __init__(self):
|
10 |
model_id = os.getenv("HF_MODEL_ID", "mradermacher/Huihui-gemma-3n-E4B-it-abliterated-GGUF")
|
11 |
+
try:
|
12 |
+
# Try to use CUDA if available
|
13 |
+
if torch.cuda.is_available():
|
14 |
+
device = "cuda"
|
15 |
+
dtype = torch.float16
|
16 |
+
else:
|
17 |
+
device = "cpu"
|
18 |
+
dtype = torch.float32
|
19 |
+
|
20 |
+
self.pipeline = pipeline(
|
21 |
+
"text-generation",
|
22 |
+
model=model_id,
|
23 |
+
torch_dtype=dtype,
|
24 |
+
device_map="auto" if device == "cuda" else None,
|
25 |
+
model_kwargs={"low_cpu_mem_usage": True}
|
26 |
+
)
|
27 |
+
except Exception as e:
|
28 |
+
print(f"Error loading model: {e}")
|
29 |
+
raise
|
30 |
|
31 |
async def generate(self, prompt: str, max_length: int = 100) -> str:
|
32 |
"""Generate text using the local Gemma model."""
|