Spaces:

jblast94
/

gemma-personal-assistant

Build error

ameliakris commited on 11 days ago

Commit

613c8f7

1 Parent(s): 16815f0

Update Dockerfile and improve error handling

Files changed (3) hide show

Dockerfile CHANGED Viewed

@@ -1,4 +1,4 @@
-FROM python:3.9-slim
 WORKDIR /app
@@ -6,12 +6,21 @@ WORKDIR /app
 RUN apt-get update && apt-get install -y \
     build-essential \
     curl \
     software-properties-common \
     && rm -rf /var/lib/apt/lists/*
 # Copy requirements first to leverage Docker cache
 COPY requirements.txt .
-RUN pip install -r requirements.txt
 # Copy the rest of the application
 COPY . .
@@ -19,9 +28,10 @@ COPY . .
 # Set environment variables
 ENV HOST=0.0.0.0
 ENV PORT=7860
 # Expose the port HF Spaces expects
 EXPOSE 7860
 # Start the FastAPI app
-CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

+FROM python:3.9
 WORKDIR /app
 RUN apt-get update && apt-get install -y \
     build-essential \
     curl \
+    git \
     software-properties-common \
     && rm -rf /var/lib/apt/lists/*
 # Copy requirements first to leverage Docker cache
 COPY requirements.txt .
+# Install Python packages
+RUN pip install --no-cache-dir -r requirements.txt
+# Pre-download the model
+RUN python -c "from transformers import AutoTokenizer, AutoModelForCausalLM; \
+    model_id='mradermacher/Huihui-gemma-3n-E4B-it-abliterated-GGUF'; \
+    tokenizer = AutoTokenizer.from_pretrained(model_id); \
+    model = AutoModelForCausalLM.from_pretrained(model_id, device_map='auto')"
 # Copy the rest of the application
 COPY . .
 # Set environment variables
 ENV HOST=0.0.0.0
 ENV PORT=7860
+ENV PYTHONUNBUFFERED=1
 # Expose the port HF Spaces expects
 EXPOSE 7860
 # Start the FastAPI app
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]

app.py CHANGED Viewed

@@ -22,6 +22,11 @@ app.add_middleware(
     allow_headers=["*"],
 )
 # Initialize Supabase client
 supabase_url = os.getenv("SUPABASE_URL")
 supabase_key = os.getenv("SUPABASE_SERVICE_KEY")

     allow_headers=["*"],
 )
+@app.get("/health")
+async def health_check():
+    """Health check endpoint"""
+    return {"status": "healthy", "version": "1.0.0"}
 # Initialize Supabase client
 supabase_url = os.getenv("SUPABASE_URL")
 supabase_key = os.getenv("SUPABASE_SERVICE_KEY")

llm.py CHANGED Viewed

@@ -8,12 +8,25 @@ load_dotenv()
 class LLMPipeline:
     def __init__(self):
         model_id = os.getenv("HF_MODEL_ID", "mradermacher/Huihui-gemma-3n-E4B-it-abliterated-GGUF")
-        self.pipeline = pipeline(
-            "text-generation",
-            model=model_id,
-            torch_dtype=torch.float16,
-            device_map="auto"
-        )
     async def generate(self, prompt: str, max_length: int = 100) -> str:
         """Generate text using the local Gemma model."""

 class LLMPipeline:
     def __init__(self):
         model_id = os.getenv("HF_MODEL_ID", "mradermacher/Huihui-gemma-3n-E4B-it-abliterated-GGUF")
+        try:
+            # Try to use CUDA if available
+            if torch.cuda.is_available():
+                device = "cuda"
+                dtype = torch.float16
+            else:
+                device = "cpu"
+                dtype = torch.float32
+            self.pipeline = pipeline(
+                "text-generation",
+                model=model_id,
+                torch_dtype=dtype,
+                device_map="auto" if device == "cuda" else None,
+                model_kwargs={"low_cpu_mem_usage": True}
+            )
+        except Exception as e:
+            print(f"Error loading model: {e}")
+            raise
     async def generate(self, prompt: str, max_length: int = 100) -> str:
         """Generate text using the local Gemma model."""