# Use python:3.11-slim as the base image
FROM python:3.11-slim

# Set the working directory inside the container
WORKDIR /code

# Install system dependencies
RUN apt-get update && apt-get install -y \
    build-essential \
    && rm -rf /var/lib/apt/lists/*

# Copy requirements.txt first for better caching
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

# Install sentencepiece for tokenization
RUN pip install sentencepiece

# Create and set permissions for the NLTK data directory
RUN mkdir -p /code/nltk_data && chmod -R 777 /code/nltk_data
ENV NLTK_DATA=/code/nltk_data

# Download punkt data for NLTK
RUN python -c "import nltk; nltk.download('punkt')"
RUN python3 -m nltk.downloader punkt_tab

# Create and set permissions for the Transformers cache directory
RUN mkdir -p /code/transformers_cache && chmod -R 777 /code/transformers_cache
ENV TRANSFORMERS_CACHE=/code/transformers_cache

# Set HF_HOME for Hugging Face cache
ENV HF_HOME=/code/transformers_cache

# Download sentence-transformers model to avoid recreating it at runtime
RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')"
RUN python -c "from sentence_transformers import CrossEncoder; CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')"

# Fix permissions for the entire cache and model directories to ensure all subdirectories are writable
RUN chmod -R 777 /code/transformers_cache

# Copy your application code into the container
COPY . .

# Expose port 7860 for FastAPI
EXPOSE 7860

# Command to run FastAPI using Uvicorn
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

# Set environment variable for cache location
ENV HF_HOME=/code/transformers_cache