我在下面的脚本中使用了 SAR,并尝试在各个服务器上运行以进行 nagios 监控。有人可以建议我如何在下面的检查中获得交换警告并交换临界值吗?我尝试在每台服务器上运行以下命令时动态计算它,但它报告错误。
(交换警告/严重需要分别计算为总交换的 20%/总交换的 40%)
请建议。
# 2 = CRITICAL (SWAP usage higher than CRITICAL)
# 3 = UNKNOWN (Wrong usage)
SWAP_WARN=
SWAP_CRIT=
SWAPOUT_ACTIVITY_TEST=
## GET SWAP Warning and Critical values from the Machine
# 2 = CRITICAL (SWAP usage higher than CRITICAL)
# 3 = UNKNOWN (Wrong usage)
SWAP_WARN=
SWAP_CRIT=
SWAPOUT_ACTIVITY_TEST=
**## GET SWAP Warning and Critical values from the Machine
temp=$(swapon -s | tail -n 1 | awk '{print $3}' )
SWAP_WARN=$(echo '$(temp)*.20' | bc)
SWAP_CRIT=$(echo '$(temp)*.40' | bc)**
## FETCH ARGUMENTS
##while getopts "h:" OPTION; do
# case "${h}" in
# h)
# usage
# exit 3
# ;;
# ?)
# usage
# exit 3
# ;;
# esac
#done
## CHECK ARGUMENTS
if [ -z ${SWAP_WARN} ] || [ -z ${SWAP_CRIT} ] || [ ${SWAP_WARN} -gt ${SWAP_CRIT} ] ; then
usage
exit 3
fi
## GET SWAP INFO FROM MACHINE
cd /var/log/sa
FOR_VALUE=sa
FOR_DATE=$(date | awk '{print $3}')
SA=$FOR_VALUE$FOR_DATE
SWAPOUT_ACTIVITY_TEST=$(sar -S -f $SA | tail -n 2| head -n 1 | awk '{print $4}')
## CHECK SWAPPING ON MACHINE
if [ ${SWAPOUT_ACTIVITY_TEST} -lt ${SWAP_WARN} ]; then
## SWAP IS OK
LINE="OK! Swapout size in last 10 minutes : ${SWAPOUT_ACTIVITY_TEST} | swapout_size=${SWAPOUT_ACTIVITY_TEST}B;${SWAP_WARN};${SWAP_CRIT};"
echo $LINE
exit 0
elif [ ${SWAPOUT_ACTIVITY_TEST} -gt ${SWAP_WARN} ] && [ ${SWAPOUT_ACTIVITY_TEST} -lt ${SWAP_CRIT} ] || [ ${SWAPOUT_ACTIVITY_TEST} -eq ${SWAP_WARN} ]; then
## SWAP IS IN WARNING STATE
LINE="WARNING! Swapout size in last 10 minutes: ${SWAPOUT_ACTIVITY_TEST} | swapout_size=${SWAPOUT_ACTIVITY_TEST}B;${SWAP_WARN};${SWAP_CRIT};"
:
## FETCH ARGUMENTS
##while getopts "h:" OPTION; do
# case "${h}" in
# h)
# usage
# exit 3
# ;;
# ?)
# usage
# exit 3
# ;;
# esac
#done
## CHECK ARGUMENTS
if [ -z ${SWAP_WARN} ] || [ -z ${SWAP_CRIT} ] || [ ${SWAP_WARN} -gt ${SWAP_CRIT} ] ; then
usage
exit 3
fi
## GET SWAP INFO FROM MACHINE
cd /var/log/sa
FOR_VALUE=sa
FOR_DATE=$(date | awk '{print $3}')
SA=$FOR_VALUE$FOR_DATE
SWAPOUT_ACTIVITY_TEST=$(sar -S -f $SA | tail -n 2| head -n 1 | awk '{print $4}')
## CHECK SWAPPING ON MACHINE
if [ ${SWAPOUT_ACTIVITY_TEST} -lt ${SWAP_WARN} ]; then
## SWAP IS OK
LINE="OK! Swapout size in last 10 minutes : ${SWAPOUT_ACTIVITY_TEST} | swapout_size=${SWAPOUT_ACTIVITY_TEST}B;${SWAP_WARN};${SWAP_CRIT};"
echo $LINE
exit 0
elif [ ${SWAPOUT_ACTIVITY_TEST} -gt ${SWAP_WARN} ] && [ ${SWAPOUT_ACTIVITY_TEST} -lt ${SWAP_CRIT} ] || [ ${SWAPOUT_ACTIVITY_TEST} -eq ${SWAP_WARN} ]; then
## SWAP IS IN WARNING STATE
LINE="WARNING! Swapout size in last 10 minutes: ${SWAPOUT_ACTIVITY_TEST} | swapout_size=${SWAPOUT_ACTIVITY_TEST}B;${SWAP_WARN};${SWAP_CRIT};"
答案1
check_swap
您无法使用的任何具体原因nagios 插件?
check_swap -w 80% -c 60%
如果您坚持使用sar
,为什么要计算阈值的绝对值,难道您不能只使用%swpused
列中的值来确定检查状态吗?
SWPUSED=$(sar -S -f $SA | tail -2 | head -1 | awk '{print $5}')
SWPUSED_ROUNDED=$(printf "%.0f" $SWPUSED)
if [ $SWPUSED_ROUNDED -gt 40 ]; then
echo "CRITICAL: ${SWPUSED}% of swap space used"
exit 2
elif [ $SWPUSED_ROUNDED -gt 20 ]; then
echo "WARNING: ${SWPUSED}% of swap space used"
exit 1
else
echo "OK: ${SWPUSED}% of swap space used"
exit 0
fi