File size: 2,039 Bytes
ee86994
 
ebea37f
 
 
 
ee86994
 
 
 
ebea37f
 
 
 
 
ee86994
 
 
 
ebea37f
 
 
 
 
 
 
 
ee86994
 
ebea37f
 
 
 
 
 
 
 
 
 
ee86994
 
 
ebea37f
ee86994
ebea37f
 
 
ee86994
 
 
ebea37f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/bin/bash

echo "=== Starting NuMarkdown-8B-Thinking Space ==="
echo "Starting vLLM server with optimized settings..."

# Start vLLM with HF Spaces optimizations
python3 -m vllm.entrypoints.openai.api_server \
    --model numind/NuMarkdown-8B-Thinking \
    --port 8000 \
    --host 0.0.0.0 \
    --max-model-len 8048 \
    --gpu-memory-utilization 0.9 \
    --disable-log-requests \
    --tensor-parallel-size 1 \
    --trust-remote-code > $HOME/app/vllm.log 2>&1 &

VLLM_PID=$!
echo "vLLM started with PID: $VLLM_PID"

# More aggressive waiting with health checks
echo "Waiting for vLLM server to start (this may take 5-10 minutes)..."
for i in {1..180}; do  # Wait up to 6 minutes
    if curl -s --connect-timeout 5 http://localhost:8000/health > /dev/null 2>&1; then
        echo "βœ“ vLLM health check passed!"
        break
    elif curl -s --connect-timeout 5 http://localhost:8000/v1/models > /dev/null 2>&1; then
        echo "βœ“ vLLM server is ready!"
        break
    fi
    
    # Show progress every 10 seconds
    if [ $((i % 10)) -eq 0 ]; then
        echo "Still waiting... ($i/180) - checking vLLM process"
        if ! ps -p $VLLM_PID > /dev/null; then
            echo "❌ vLLM process died! Checking logs:"
            tail -20 $HOME/app/vllm.log
            exit 1
        fi
    fi
    sleep 2
done

# Final check
if ! curl -s http://localhost:8000/v1/models > /dev/null; then
    echo "❌ vLLM server failed to start after 6 minutes!"
    echo "Last 50 lines of vLLM logs:"
    tail -50 $HOME/app/vllm.log
    exit 1
fi

echo "βœ… vLLM server is ready!"
echo "=== Starting Gradio App ==="
echo "Port 7860 status before launching Gradio:"
netstat -tuln | grep :7860 || echo "Port 7860 is free"

echo "Environment check:"
echo "PORT=${PORT:-7860}"
echo "PWD=$(pwd)"
echo "USER=$(whoami)"

# Launch Gradio with explicit error handling
echo "Launching Gradio..."
python3 $HOME/app/app.py || {
    echo "❌ Gradio failed to start!"
    echo "Checking if port is in use:"
    netstat -tuln | grep :7860
    exit 1
}