File size: 989 Bytes
d187b57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
#!/bin/bash

# Basic configuration
export CUDA_VISIBLE_DEVICES="0,1"
export PYTHONWARNINGS="ignore"
export PYTHONPATH="${PYTHONPATH}:${PWD}"  # Add current directory to Python path

# Create directories
mkdir -p logs weights cache

# Get timestamp for error log only
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
ERROR_LOG="logs/error_${TIMESTAMP}.log"

# Print configuration
echo "Starting training with configuration:"
echo "======================================"
echo "Error log: $ERROR_LOG"
echo "PYTHONPATH: $PYTHONPATH"
echo "======================================"

# Start training with nohup, only redirecting stderr
echo "Starting training in background..."
nohup python model/train.py 2> "$ERROR_LOG" &

# Save process ID
pid=$!
echo $pid > "logs/train_${TIMESTAMP}.pid"
echo "Training process started with PID: $pid"
echo
echo "Monitor commands:"
echo "1. View error log:         tail -f $ERROR_LOG"
echo "2. Check process status:   ps -p $pid"
echo "3. Stop training:          kill $pid"