241 lines
6.9 KiB
Bash
Executable File
241 lines
6.9 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
################################################################################
|
|
# #
|
|
# Copyright (C) 2011 Jack-Benny Persson <jack-benny@cyberinfo.se> #
|
|
# #
|
|
# This program is free software; you can redistribute it and/or modify #
|
|
# it under the terms of the GNU General Public License as published by #
|
|
# the Free Software Foundation; either version 2 of the License, or #
|
|
# (at your option) any later version. #
|
|
# #
|
|
# This program is distributed in the hope that it will be useful, #
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
# GNU General Public License for more details. #
|
|
# #
|
|
# You should have received a copy of the GNU General Public License #
|
|
# along with this program; if not, write to the Free Software #
|
|
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #
|
|
# #
|
|
################################################################################
|
|
|
|
###############################################################################
|
|
# #
|
|
# Nagios plugin to monitor GPU temperature with aticonfig. #
|
|
# This only works on ATI cards with the proprietary driver (fglrx). #
|
|
# Written in Bash (and uses sed & awk). #
|
|
# Latest version of check_gputemp can be found at the below URL: #
|
|
# https://bitbucket.org/jackbenny/check_gputemp #
|
|
# #
|
|
###############################################################################
|
|
|
|
VERSION="Version 1.3"
|
|
AUTHOR="(c) 2011 Jack-Benny Persson (jack-benny@cyberinfo.se)"
|
|
|
|
# Sensor program
|
|
SENSORPROG=/usr/bin/aticonfig
|
|
|
|
# Exit codes
|
|
STATE_OK=0
|
|
STATE_WARNING=1
|
|
STATE_CRITICAL=2
|
|
STATE_UNKNOWN=3
|
|
|
|
shopt -s extglob
|
|
|
|
#### Functions ####
|
|
|
|
# Print version information
|
|
print_version()
|
|
{
|
|
printf "\n\n$0 - $VERSION\n"
|
|
}
|
|
|
|
#Print help information
|
|
print_help()
|
|
{
|
|
print_version
|
|
printf "$AUTHOR\n"
|
|
printf "Monitor GPU temperatur with the use of aticonfig (fglrx)\n"
|
|
/bin/cat <<EOT
|
|
|
|
Options:
|
|
-h
|
|
Print detailed help screen
|
|
-V
|
|
Print version information
|
|
-v
|
|
Verbose output
|
|
|
|
--adapter NUM
|
|
Set which GPU adapter to monitor, for example 0 or 1. Default is 0
|
|
|
|
-w INTEGER
|
|
Exit with WARNING status if above INTEGER degres
|
|
-c INTEGER
|
|
Exit with CRITICAL status if above INTEGER degres
|
|
EOT
|
|
}
|
|
|
|
|
|
###### MAIN ########
|
|
|
|
# Warning threshold
|
|
thresh_warn=
|
|
# Critical threshold
|
|
thresh_crit=
|
|
# Hardware to monitor
|
|
adapter=0
|
|
|
|
# See if we have the aticonfig program installed and can execute it
|
|
if [[ ! -x "$SENSORPROG" ]]; then
|
|
printf "\nIt appears you don't have aticonfig installed \
|
|
in $SENSORPROG\n"
|
|
exit $STATE_UNKOWN
|
|
fi
|
|
|
|
# Parse command line options
|
|
while [[ -n "$1" ]]; do
|
|
case "$1" in
|
|
|
|
-h | --help)
|
|
print_help
|
|
exit $STATE_OK
|
|
;;
|
|
|
|
-V | --version)
|
|
print_version
|
|
exit $STATE_OK
|
|
;;
|
|
|
|
-v | --verbose)
|
|
: $(( verbosity++ ))
|
|
shift
|
|
;;
|
|
|
|
-w | --warning)
|
|
if [[ -z "$2" ]]; then
|
|
# Threshold not provided
|
|
printf "\nOption $1 requires an argument"
|
|
print_help
|
|
exit $STATE_UNKNOWN
|
|
elif [[ "$2" = +([0-9]) ]]; then
|
|
# Threshold is an integer
|
|
thresh=$2
|
|
else
|
|
# Threshold is not an integer
|
|
printf "\nThreshold must be an integer"
|
|
print_help
|
|
exit $STATE_UNKNOWN
|
|
fi
|
|
thresh_warn=$thresh
|
|
shift 2
|
|
;;
|
|
|
|
-c | --critical)
|
|
if [[ -z "$2" ]]; then
|
|
# Threshold not provided
|
|
printf "\nOption '$1' requires an argument"
|
|
print_help
|
|
exit $STATE_UNKNOWN
|
|
elif [[ "$2" = +([0-9]) ]]; then
|
|
# Threshold is an integer
|
|
thresh=$2
|
|
else
|
|
# Threshold is not an integer
|
|
printf "\nThreshold must be an integer"
|
|
print_help
|
|
exit $STATE_UNKNOWN
|
|
fi
|
|
thresh_crit=$thresh
|
|
shift 2
|
|
;;
|
|
|
|
-\?)
|
|
print_help
|
|
exit $STATE_OK
|
|
;;
|
|
|
|
--adapter)
|
|
if [[ -z "$2" ]]; then
|
|
printf "\nOption $1 requires an argument"
|
|
print_help
|
|
exit $STATE_UNKNOWN
|
|
fi
|
|
adapter=$2
|
|
shift 2
|
|
;;
|
|
|
|
*)
|
|
printf "\nInvalid option '$1'"
|
|
print_help
|
|
exit $STATE_UNKNOWN
|
|
;;
|
|
esac
|
|
done
|
|
|
|
|
|
# Check if a sensor were specified
|
|
if [[ -z "$adapter" ]]; then
|
|
# No sensor to monitor were specified
|
|
printf "\nNo sensor specified"
|
|
print_help
|
|
exit $STATE_UNKNOWN
|
|
fi
|
|
|
|
|
|
#Get the temperature
|
|
TEMP=`${SENSORPROG} --adapter=${adapter} --od-gettemperature \
|
|
| grep "Temperature" | awk '{print $5}' | cut -c1-2`
|
|
|
|
|
|
# Check if the tresholds has been set correctly
|
|
if [[ -z "$thresh_warn" || -z "$thresh_crit" ]]; then
|
|
# One or both thresholds were not specified
|
|
printf "\nThreshold not set"
|
|
print_help
|
|
exit $STATE_UNKNOWN
|
|
elif [[ "$thresh_crit" -lt "$thresh_warn" ]]; then
|
|
# The warning threshold must be lower than the critical threshold
|
|
printf "\nWarning temperature should be lower than critical"
|
|
print_help
|
|
exit $STATE_UNKNOWN
|
|
fi
|
|
|
|
|
|
# Verbose outpu2t
|
|
if [[ "$verbosity" -ge 2 ]]; then
|
|
/bin/cat <<__EOT
|
|
Debugging information:
|
|
Warning threshold: $thresh_warn
|
|
Critical threshold: $thresh_crit
|
|
Verbosity level: $verbosity
|
|
Current GPU $adapter temperature: $TEMP
|
|
__EOT
|
|
printf "\n Temperature lines directly from aticonfig:\n"
|
|
${SENSORPROG} --adapter=${adapter} --od-gettemperature | grep "Temperature"
|
|
printf "\n\n"
|
|
fi
|
|
|
|
# Get performance data for Nagios "Performance Data" field
|
|
PERFDATA=`${SENSORPROG} --adapter=${adapter} --od-gettemperature | grep "Temperature"`
|
|
|
|
# And finally check the temperature against our thresholds
|
|
if [[ "$TEMP" -gt "$thresh_crit" ]]; then
|
|
# Temperature is above critical threshold
|
|
echo "GPU $adapter CRITICAL - Temperature is $TEMP | $PERFDATA"
|
|
exit $STATE_CRITICAL
|
|
|
|
elif [[ "$TEMP" -gt "$thresh_warn" ]]; then
|
|
# Temperature is above warning threshold
|
|
echo "GPU $adapter WARNING - Temperature is $TEMP | $PERFDATA"
|
|
exit $STATE_WARNING
|
|
|
|
else
|
|
# Temperature is ok
|
|
echo "GPU $adapter OK - Temperature is $TEMP | $PERFDATA"
|
|
exit $STATE_OK
|
|
fi
|
|
exit 3
|