Spaces:

fair-forward
/

evals-for-every-language

Running

App Files Files Community

evals-for-every-language / Dockerfile.eval

davidpomerenke

Upload from GitHub Actions: Merge pull request #9 from datenlabor-bmz/jn-dev

7c06aef verified 1 day ago

raw

history blame contribute delete

2.25 kB

	FROM python:3.12-slim

	# Set working directory
	WORKDIR /app

	# Install system dependencies
	RUN apt-get update && apt-get install -y \
	curl \
	&& rm -rf /var/lib/apt/lists/*

	# Copy requirements and install Python dependencies
	COPY pyproject.toml uv.lock ./
	RUN pip install uv && uv sync --frozen

	# Copy application code
	COPY . .

	# Verify dependencies are installed
	RUN .venv/bin/python -c "import pandas, datasets, evaluate, fastapi, uvicorn, google.cloud.storage, google.cloud.translate, dotenv, elevenlabs, huggingface_hub, joblib, language_data, openai, requests, scipy, aiolimiter, sentencepiece, langcodes, rich, tqdm; print('✅ All dependencies verified')"

	# Set environment variables with conservative limits
	ENV N_SENTENCES=20
	ENV MAX_LANGUAGES=150
	ENV COST_LIMIT_USD=20

	# Create a startup script with cost monitoring and HTTP server
	RUN echo '#!/bin/bash\n\
	\n\
	# Force immediate log flushing for Cloud Run visibility\n\
	export PYTHONUNBUFFERED=1\n\
	export PYTHONIOENCODING=utf-8\n\
	\n\
	echo "🚀 Starting AI Language Evaluation..."\n\
	echo "📊 Configuration: $N_SENTENCES sentences, $MAX_LANGUAGES languages"\n\
	echo "💰 Cost limit: $COST_LIMIT_USD USD"\n\
	echo "🛡️ Cost protection enabled"\n\
	echo "🔧 Logging: Unbuffered Python output enabled"\n\
	\n\
	# Start a simple HTTP server to satisfy Cloud Run requirements\n\
	python -m http.server 8080 &\n\
	HTTP_SERVER_PID=$!\n\
	\n\
	# Start cost monitoring in background\n\
	(\n\
	start_time=$(date +%s)\n\
	while true; do\n\
	current_time=$(date +%s)\n\
	elapsed_hours=$(( (current_time - start_time) / 3600 ))\n\
	if [ $elapsed_hours -ge 24 ]; then\n\
	echo "⚠️ MAX RUNTIME REACHED! Stopping evaluation..."\n\
	pkill -f "python evals/main_gcs.py"\n\
	break\n\
	fi\n\
	sleep 300 # Check every 5 minutes\n\
	done\n\
	) &\n\
	\n\
	# Run the evaluation with forced log flushing\n\
	cd /app && .venv/bin/python -u evals/main_gcs.py\n\
	\n\
	# Stop the HTTP server\n\
	kill $HTTP_SERVER_PID\n\
	\n\
	echo "✅ Evaluation completed!"\n\
	' > /app/start.sh && chmod +x /app/start.sh

	# Expose port (for Cloud Run requirements)
	EXPOSE 8080

	# Run the evaluation with resource limits
	CMD ["/app/start.sh"]