evals-for-every-language / Dockerfile.eval
davidpomerenke's picture
Upload from GitHub Actions: Merge pull request #9 from datenlabor-bmz/jn-dev
7c06aef verified
FROM python:3.12-slim
# Set working directory
WORKDIR /app
# Install system dependencies
RUN apt-get update && apt-get install -y \
curl \
&& rm -rf /var/lib/apt/lists/*
# Copy requirements and install Python dependencies
COPY pyproject.toml uv.lock ./
RUN pip install uv && uv sync --frozen
# Copy application code
COPY . .
# Verify dependencies are installed
RUN .venv/bin/python -c "import pandas, datasets, evaluate, fastapi, uvicorn, google.cloud.storage, google.cloud.translate, dotenv, elevenlabs, huggingface_hub, joblib, language_data, openai, requests, scipy, aiolimiter, sentencepiece, langcodes, rich, tqdm; print('βœ… All dependencies verified')"
# Set environment variables with conservative limits
ENV N_SENTENCES=20
ENV MAX_LANGUAGES=150
ENV COST_LIMIT_USD=20
# Create a startup script with cost monitoring and HTTP server
RUN echo '#!/bin/bash\n\
\n\
# Force immediate log flushing for Cloud Run visibility\n\
export PYTHONUNBUFFERED=1\n\
export PYTHONIOENCODING=utf-8\n\
\n\
echo "πŸš€ Starting AI Language Evaluation..."\n\
echo "πŸ“Š Configuration: $N_SENTENCES sentences, $MAX_LANGUAGES languages"\n\
echo "πŸ’° Cost limit: $COST_LIMIT_USD USD"\n\
echo "πŸ›‘οΈ Cost protection enabled"\n\
echo "πŸ”§ Logging: Unbuffered Python output enabled"\n\
\n\
# Start a simple HTTP server to satisfy Cloud Run requirements\n\
python -m http.server 8080 &\n\
HTTP_SERVER_PID=$!\n\
\n\
# Start cost monitoring in background\n\
(\n\
start_time=$(date +%s)\n\
while true; do\n\
current_time=$(date +%s)\n\
elapsed_hours=$(( (current_time - start_time) / 3600 ))\n\
if [ $elapsed_hours -ge 24 ]; then\n\
echo "⚠️ MAX RUNTIME REACHED! Stopping evaluation..."\n\
pkill -f "python evals/main_gcs.py"\n\
break\n\
fi\n\
sleep 300 # Check every 5 minutes\n\
done\n\
) &\n\
\n\
# Run the evaluation with forced log flushing\n\
cd /app && .venv/bin/python -u evals/main_gcs.py\n\
\n\
# Stop the HTTP server\n\
kill $HTTP_SERVER_PID\n\
\n\
echo "βœ… Evaluation completed!"\n\
' > /app/start.sh && chmod +x /app/start.sh
# Expose port (for Cloud Run requirements)
EXPOSE 8080
# Run the evaluation with resource limits
CMD ["/app/start.sh"]