Spaces:

thechaiexperiment
/

TeaRAG

Sleeping

File size: 1,328 Bytes

ffda1b5
 
 
 
 
 
 
 
 
 
 
 
 
6209c8b
 
 
2b643f9
 
 
 
1dbe00a
 
e48e5e1
 
 
fe819e8
 
6209c8b
 
 
 
fe819e8
6209c8b
ffda1b5
 
 
 
cd779ad
 
6209c8b

FROM python:3.9-slim

WORKDIR /code

# Install system dependencies
RUN apt-get update && apt-get install -y \
    build-essential \
    && rm -rf /var/lib/apt/lists/*

# Copy requirements first for better caching
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

# Install sentencepiece
RUN pip install sentencepiece

# Create the nltk_data directory and set permissions
RUN mkdir -p /code/nltk_data && chmod -R 777 /code/nltk_data

# Set the NLTK_DATA environment variable to the created directory
ENV NLTK_DATA=/code/nltk_data

# Download punkt data for NLTK
RUN python -c "import nltk; nltk.download('punkt')"

# Ensure the transformer cache directory exists and set correct permissions
RUN mkdir -p /code/transformers_cache && chmod -R 777 /code/transformers_cache
ENV TRANSFORMERS_CACHE=/code/transformers_cache

# Download the sentence-transformers model to avoid recreating it
RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')"
RUN python -c "from sentence_transformers import CrossEncoder; CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')"

# Copy your code and models
COPY . .

# Run FastAPI
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

ENV TRANSFORMERS_CACHE=/code/transformers_cache