Skip to content

Nvidia:MemoryRestartBashExample

메모리 크기를 확인하여 재시작하는 예제.

Code

#!/usr/bin/env bash

MAX_MEMORY_SIZE=15000
LOG_PATH=$HOME/rest_restart.log
SLEEP_SECONDS=10

echo "Restarter BEGIN: $(date)" | tee -a $LOG_PATH

while true; do
    memory_size=`free -m | grep Mem | awk '{print $3}'`
    find_pid=`nvidia-smi | grep c2node | awk '{print $3}' | sed -e 's/ //g'`
    echo "PID(${find_pid}) MEMORY SIZE: $memory_size / $MAX_MEMORY_SIZE"

    if [[ $memory_size -lt $MAX_MEMORY_SIZE ]]; then
        echo 'Ok.'
    else
        echo 'Error!'
        kill -s SIGKILL $find_pid
        echo "Restart (PID:${find_pid}) alert [${memory_size}/${MAX_MEMORY_SIZE}]: $(date)" | tee -a $LOG_PATH
    fi
    sleep $SLEEP_SECONDS
done

echo "Restarter END: $(date)" | tee -a $LOG_PATH

See also