Nvidia:MemoryRestartBashExample
메모리 크기를 확인하여 재시작하는 예제.
Code
#!/usr/bin/env bash
MAX_MEMORY_SIZE=15000
LOG_PATH=$HOME/rest_restart.log
SLEEP_SECONDS=10
echo "Restarter BEGIN: $(date)" | tee -a $LOG_PATH
while true; do
memory_size=`free -m | grep Mem | awk '{print $3}'`
find_pid=`nvidia-smi | grep c2node | awk '{print $3}' | sed -e 's/ //g'`
echo "PID(${find_pid}) MEMORY SIZE: $memory_size / $MAX_MEMORY_SIZE"
if [[ $memory_size -lt $MAX_MEMORY_SIZE ]]; then
echo 'Ok.'
else
echo 'Error!'
kill -s SIGKILL $find_pid
echo "Restart (PID:${find_pid}) alert [${memory_size}/${MAX_MEMORY_SIZE}]: $(date)" | tee -a $LOG_PATH
fi
sleep $SLEEP_SECONDS
done
echo "Restarter END: $(date)" | tee -a $LOG_PATH