#!/bin/sh
#
# Plugin to monitor CPU usage.
#
# Usage: Place in /etc/munin/node.d/ (or link it there  using ln -s)
#
# Parameters understood:
#
# 	config   (required)
# 	autoconf (optional - used by munin-config)
#
# Magic markers - optional - used by installation scripts and
# munin-config:
#
#%# family=auto
#%# capabilities=autoconf
#
# Environment variables:
#
#      HZ      Some combinations of hardware and Linux (probably only
#              2.4 kernels) use 1000 units/second in /proc/stat
#              corresponding to the systems HZ. (see
#              /usr/src/linux/include/asm/param.h). But Almost all
#              systems use 100 units/second and this is our default.
#              Even if Documentation/proc.txt in the kernel source says
#              otherwise. - Finding and fix by dz@426.ch
#
# $Id: cpu.in 1266 2007-01-04 21:26:59Z bjorn $

if [ "$1" = "autoconf" ]; then
	if [ -r /proc/stat ]; then
		echo yes
		exit 0
	else
		echo no
		exit 1
	fi
fi

HZ=${HZ:-100}

extinfo=""

if egrep -q '^cpu +[0-9]+ +[0-9]+ +[0-9]+ +[0-9]+ +[0-9]+ +[0-9]+ +[0-9]+' /proc/stat; then
        extinfo="iowait irq softirq"
	if egrep -q '^cpu +[0-9]+ +[0-9]+ +[0-9]+ +[0-9]+ +[0-9]+ +[0-9]+ +[0-9]+ +[0-9]+' /proc/stat; then
	    extextinfo="steal"
	fi
fi

if [ "$1" = "config" ]; then

	NCPU=$(egrep '^cpu[0-9]+ ' /proc/stat | wc -l)
	PERCENT=$(($NCPU * 100))
	MAX=$(($NCPU * 100))
	if [ "$scaleto100" = "yes" ]; then
		graphlimit=100
	else
		graphlimit=$PERCENT
	fi
	SYSWARNING=$(($PERCENT * 30 / 100))
	SYSCRITICAL=$(($PERCENT * 50 / 100))
	USRWARNING=$(($PERCENT * 80 / 100))
	echo 'graph_title CPU usage'
	echo "graph_order system user nice idle" $extinfo
	echo "graph_args --base 1000 -r --lower-limit 0 --upper-limit $graphlimit"
	echo 'graph_vlabel %'
	echo 'graph_scale no'
	echo 'graph_info This graph shows how CPU time is spent.'
	echo 'graph_category system'
	echo 'graph_period second'
	echo 'system.label system'
	echo 'system.draw AREA'
	echo "system.max $MAX"
	echo 'system.min 0'
	echo 'system.type DERIVE'
	echo "system.warning $SYSWARNING" 
	echo "system.critical $SYSCRITICAL" 
	echo "system.info CPU time spent by the kernel in system activities" 
	echo 'user.label user'
	echo 'user.draw STACK'
	echo 'user.min 0'
	echo "user.max $MAX"
	echo "user.warning $USRWARNING"
	echo 'user.type DERIVE'
	echo 'user.info CPU time spent by normal programs and daemons'
	echo 'nice.label nice'
	echo 'nice.draw STACK'
	echo 'nice.min 0'
	echo "nice.max $MAX"
	echo 'nice.type DERIVE'
	echo 'nice.info CPU time spent by nice(1)d programs'
	echo 'idle.label idle'
	echo 'idle.draw STACK'
	echo 'idle.min 0'
	echo "idle.max $MAX"
	echo 'idle.type DERIVE'
	echo 'idle.info Idle CPU time'
	if [ "$scaleto100" = "yes" ]; then
		echo "system.cdef system,$NCPU,/"
		echo "user.cdef user,$NCPU,/"
		echo "nice.cdef nice,$NCPU,/"
		echo "idle.cdef idle,$NCPU,/"
	fi
	if [ ! -z "$extinfo" ]
	then
		echo 'iowait.label iowait'
		echo 'iowait.draw STACK'
		echo 'iowait.min 0'
		echo "iowait.max $MAX"
		echo 'iowait.type DERIVE'
		echo 'iowait.info CPU time spent waiting for I/O operations to finish when there is nothing else to do.'
		echo 'irq.label irq'
		echo 'irq.draw STACK'
		echo 'irq.min 0'
		echo "irq.max $MAX"
		echo 'irq.type DERIVE'
		echo 'irq.info CPU time spent handling interrupts'
		echo 'softirq.label softirq'
		echo 'softirq.draw STACK'
		echo 'softirq.min 0'
		echo "softirq.max $MAX"
		echo 'softirq.type DERIVE'
		echo 'softirq.info CPU time spent handling "batched" interrupts'
		if [ "$scaleto100" = "yes" ]; then
			echo "iowait.cdef iowait,$NCPU,/"
			echo "irq.cdef irq,$NCPU,/"
			echo "softirq.cdef softirq,$NCPU,/"
		fi
	fi

        if [ ! -z "$extextinfo" ]
        then
                echo 'steal.label steal'
                echo 'steal.draw STACK'
                echo 'steal.min 0'
                echo "steal.max $MAX"
                echo 'steal.type DERIVE'
                echo 'steal.info The time that a virtual CPU had runnable tasks, but the virtual CPU itself was not running'
                if [ "$scaleto100" = "yes" ]; then
                        echo "steal.cdef steal,$NCPU,/"
		fi
	fi
	exit 0
fi

# Note: Counters/derive need to report integer values.  Also we need
# to avoid 10e+09 and the like %.0f should do this.

if [ ! -z "$extextinfo" ]; then
	awk -v hz=$HZ '/^cpu / { printf "user.value %.0f\nnice.value %.0f\nsystem.value %.0f\nidle.value %.0f\niowait.value %.0f\nirq.value %.0f\nsoftirq.value %.0f\nsteal.value %.0f\n", $2*100/hz, $3*100/hz, $4*100/hz, $5*100/hz, $6*100/hz, $7*100/hz, $8*100/hz, $9*100/hz }' < /proc/stat
elif [ ! -z "$extinfo" ]; then
	awk -v hz=$HZ '/^cpu / { printf "user.value %.0f\nnice.value %.0f\nsystem.value %.0f\nidle.value %.0f\niowait.value %.0f\nirq.value %.0f\nsoftirq.value %.0f\n", $2*100/hz, $3*100/hz, $4*100/hz, $5*100/hz, $6*100/hz, $7*100/hz, $8*100/hz }' < /proc/stat
else
	awk -v hz=$HZ '/^cpu / { printf "user.value %.0f\nnice.value %.0f\nsystem.value %.0f\nidle.value %.0f\n", $2*100/hz, $3*100/hz, $4*100/hz, $5*100/hz }' < /proc/stat
fi
