#!/bin/sh
#
#	Basic tests of sanity for a newly-built version of
#	linux-HA software (heartbeat)
#
#	Conditions for running:
#
#	Heartbeat must be installed.
#
#	Must be root.
#
#	CANNOT have a real heartbeat configuration.
#
#	Must have networking configured with one working
#		network interface.
#
#	should not have $TESTIP below used for anything ;-)
#
#	should have multicast address $MCASTIP port 694
#		available
#		(you don't need a multicast capable router).
#
#
# Note: you might have to change TESTIP and MCASTIP
#
TESTIP=10.253.252.251
MCASTIP=225.0.0.2
#
IFCONFIG="/sbin/ifconfig "
HADIR=/etc/ha.d
INITDIR=/etc/rc.d/init.d
STONITH=/usr/sbin/stonith
LIBDIR=/usr/lib/
HBLIB=$LIBDIR/heartbeat
APPHBD=$HBLIB/apphbd
APPHBTEST=$HBLIB/apphbtest
IPCTEST=$HBLIB/ipctest
#
DEFAULTINTERFACE=eth0	# But we really guess it...
#
IDENTSTRING="Linux-HA TEST configuration file - REMOVEME!!"
DUMMYNODE=ImAlwaysDead.com
LOCALNODE=`uname -n`
LOGFILE=/tmp/linux-ha.testlog
RSCDIR=$HADIR/resource.d
errcount=0

cd /etc/ha.d
ulimit -c unlimited

GetAllIFNames() {
  $IFCONFIG | grep '^[a-zA-Z]' | cut -d' ' -f1
}

GuessIFname() {
  GetAllIFNames | grep -v '^lo' | head -n 1
}

INTERFACE=`GuessIFname`

case $INTERFACE in
  "")	INTERFACE=$DEFAULTINTERFACE;;
esac

#
#	Is it safe to overwrite this config file?
#
CheckConfigFile() {
  if
    [ ! -f $1 ]
  then
    return 0
  fi
  if
    grep "$IDENTSTRING" $1 >/dev/null 2>&1
  then
    return 0
  else
    return 1
  fi
}

GenerateAuthKeys() {
  cat <<-! >$1
	#	$IDENTSTRING
	#
	auth 1
	1 sha1 SuperSecretKey--SHHH!!!
	!
  chmod 600 $1
}

GenerateHaCf() {
  cat <<-! >$1
	#	$IDENTSTRING
	logfile   $LOGFILE
	debugfile $LOGFILE
	keepalive 500ms
	debug 1
	deadtime 10
	initdead 10
	auto_failback on
	stonith_host * null $LOCALNODE $DUMMYNODE
	mcast $INTERFACE $MCASTIP 694 0 0
	#bcast $INTERFACE
	node $LOCALNODE $DUMMYNODE
	!
  chmod 644 $1
}

GenerateHaResources() {
  cat <<-! >$1
	#	$IDENTSTRING
	$DUMMYNODE IPaddr::$TESTIP/30
	!
  chmod 644 $1
}
CONFIGFILES="ha.cf authkeys haresources"

SetUpConfigFiles() {
  if
    HBStatus
  then
    echo "Cannot run tests with heartbeat already running."
  fi
  SaveConfigFiles
  for j in $CONFIGFILES
  do
    if
      CheckConfigFile $HADIR/$j
    then
      : OK
    else
      echo "OOPS! $HADIR/$j already exists!"
      echo "Real configuration already set up."
      echo "Sorry..."
      exit 1
    fi
  done

  GenerateAuthKeys $HADIR/authkeys
  GenerateHaCf $HADIR/ha.cf
  GenerateHaResources $HADIR/haresources
  rm -f $HADIR/core
}

RemoveConfigFiles() {
  for j in $CONFIGFILES
  do
    if
      CheckConfigFile $HADIR/$j
    then
      rm -f $HADIR/$j
    else
      echo "OOPS! Cannot remove real config file $HADIR/$j!"
    fi
  done
  RestoreConfigFiles
}

SaveConfigFiles() {
  cd $HADIR
  if
    [ ! -d .cfsave ]
  then
    mkdir .cfsave
  fi
  mv $CONFIGFILES .cfsave
}

RestoreConfigFiles() {
  mv $HADIR/.cfsave/* $HADIR
}

HBStart() {
  echo "Starting heartbeat"
  $INITDIR/heartbeat start
}

HBStop() {
  echo "Stopping heartbeat"
  $INITDIR/heartbeat stop
}

HBReload() {
  echo "Reloading heartbeat"
  $INITDIR/heartbeat reload >/dev/null 2>&1
  rc=$?
  sleep 5
  return $rc
}

HBStatus() {
  case `$INITDIR/heartbeat status 2>&1` in
    *running*)	true;;
    *)		false;;
  esac
}

#
#	Search the log file for the given grep pattern
#
LookForString() {
  count=1
  while
    if
      grep "$1" $LOGFILE
    then
      return 0
    fi
   [ $count -lt 60 ]
  do
    count=`expr $count + 1`
    sleep 1
  done
  return 1
}

#	Check for the given count of the given string
#	Complain unless the right number are there.
CheckPat()
{
  count=`egrep -c "$1" $LOGFILE`
  min=$2
  if
    [ $# -gt 2 ]
  then
    max=$3
  else
    max=$2
  fi
  if
    [ $count -lt $min -o $count -gt $max ]
  then
    echo "ERROR: Did not find [$2:$3] occurances of $1 in $LOGFILE `date`" 2>&1 | tee -a $LOGFILE
    echo "ERROR: Found $count instead." | tee -a $LOGFILE
    errcount=`expr $errcount + 1`
  fi
}

TestHeartbeat() {
  if
    HBStatus
  then
    echo "That's weird.  Heartbeat seems to be running..."
    errcount=`expr $errcount + 1`
    HBStatus
    HBStop
  fi
  if
    HBStart
  then
    if
      HBStatus
    then
     : COOL!
    else
     echo "Heartbeat did not start." | tee -a $LOGFILE
     exit 1
    fi
  fi

  #
  # Heartbeat seems to be running...
  #

  if
    LookForString "node $DUMMYNODE.*is dead" >/dev/null
  then
    : OK
  else
    echo "Does not look like we noticed $DUMMYNODE was dead" | tee -a $LOGFILE
    errcount=`expr $errcount + 1`
  fi

  if
    LookForString "Resetting node $DUMMYNODE with" >/dev/null &&
    LookForString "node $DUMMYNODE now reset" >/dev/null
  then
    : OK
  else
    echo "Does not look like we STONITHed $DUMMYNODE" | tee -a $LOGFILE
    errcount=`expr $errcount + 1`
  fi
  
  if
    LookForString "IPaddr.*$TESTIP" >/dev/null
  then
    :  Looks good
  else
    echo "Does not look like we took over the IP address" | tee -a $LOGFILE
    errcount=`expr $errcount + 1`
  fi

  if
    LookForString "[Aa][Rr][Pp]" >/dev/null
  then
    :  Looks good
  else
    echo "Does not look like we ARPed the address" | tee -a $LOGFILE
    errcount=`expr $errcount + 1`
  fi

  sleep 5

  if
    $RSCDIR/IPaddr $TESTIP status >/dev/null 2>&1 &&
    $RSCDIR/IPaddr $TESTIP monitor >/dev/null 2>&1
  then
    : COOL!
  else
    echo "Looks like monitor operation failed" | tee -a $LOGFILE
    errcount=`expr $errcount + 1`
  fi

  #
  #	Reload test -- ha.cf changed
  #
  echo "Performing ha.cf reload test" >> $LOGFILE
  touch $HADIR/ha.cf
  if
    HBReload
  then
    : OK! reload after touching ha.cf worked!
  else
    echo "Heartbeat reload operation returned $?" | tee -a $LOGFILE
    errcount=`expr $errcount + 1`
  fi
  if
    LookForString "restart exec" >/dev/null
  then
    :  Looks good
  else
    echo "Does not look like we did a restart exec." | tee -a $LOGFILE
    errcount=`expr $errcount + 1`
  fi
  if
    HBStatus
  then
    : "OK - reload didn't kill anything ;-)"
  fi
  #
  #	Reload test -- authkeys changed
  #
  echo "Performing authkeys reload test" >> $LOGFILE
  touch $HADIR/authkeys
  if
    HBReload
  then
    : OK! reload after touching authkeys worked!
  else
    echo "Heartbeat reload operation returned $?" | tee -a $LOGFILE
    errcount=`expr $errcount + 1`
  fi
  if
    CheckPat "Signalling.* to reread config files" 2 >/dev/null
  then
    : OK
  else
    echo "Heartbeat did not reread config files exactly twice" | tee -a $LOGFILE
    errcount=`expr $errcount + 1`
  fi
  sleep 2
  if
    CheckPat "restart exec" 1 >/dev/null
  then
    :  Looks good -- did not do another exec
  else
    echo "Looks like we did an extra exec" | tee -a $LOGFILE
    errcount=`expr $errcount + 1`
  fi
  if
    HBStatus
  then
    : "OK - reload didn't kill anything ;-)"
  fi
  
  echo "Stopping heartbeat." >> $LOGFILE
  if
    HBStop
  then
    : OK!
  else
    echo "Heartbeat stop operation returned $?" | tee -a $LOGFILE
    errcount=`expr $errcount + 1`
  fi
  if
    HBStatus
  then
    echo "Looks like heartbeat did not really stop." | tee -a $LOGFILE
    echo "You\'ll probably need to kill some processes yourself."
    errcount=`expr $errcount + 1`
  fi

  if
    $RSCDIR/IPaddr $TESTIP status >/dev/null 2>&1 &&
    $RSCDIR/IPaddr $TESTIP monitor >/dev/null 2>&1
  then
    echo "Looks like the test IP address is still live..."
    errcount=`expr $errcount + 1`
  fi

}

StonithCheck() {

  serrcount=0

  echo "Checking STONITH basic sanity." | tee -a $LOGFILE
  if
    $STONITH -h >/dev/null
  then
    : OK
  else
    echo "$STONITH -h failed" | tee -a $LOGFILE
    serrcount=`expr $serrcount + 1`
  fi
  wc=`$STONITH -h | wc -l`
  if
    [ $wc -lt 100 ]
  then
    echo "$STONITH -h help message is too short" | tee -a $LOGFILE
    serrcount=`expr $serrcount + 1`
  fi
  if
    FOOBARLIST=`$STONITH -t null -p "foo bar" -l`
  then
    : FOOBARLIST OK
  else
    echo "$STONITH -t null list option failed" | tee -a $LOGFILE
    serrcount=`expr $serrcount + 1`
  fi
  if
    echo $FOOBARLIST | grep foo >/dev/null &&
    echo $FOOBARLIST | grep bar >/dev/null
  then
    : OK null list
  else
    echo "$STONITH -t null list option incorrect" | tee -a $LOGFILE
    serrcount=`expr $serrcount + 1`
  fi

  if
    RESETOUT=`$STONITH -t null -p "foo bar" foo 2>&1`
  then
    case $RESETOUT in
     *"Host foo null-reset"*)	: NULL Stonith output OK;;
     *)				echo "NULL reset failed."
  				  serrcount=`expr $serrcount + 1`;;
    esac
  else
    echo "$STONITH -t null reset failed." | tee -a $LOGFILE
  fi
  errcount=`expr $errcount + $serrcount`
}


AppHBCheck() {
	CFFILE=/tmp/$$.apphbd.cf
	clientcount=5
	cat <<-! >$CFFILE
		realtime yes
		debug_level	1
		debugfile	$LOGFILE
		logfile		$LOGFILE
	!
	echo "Performing apphbd success case tests" | tee -a $LOGFILE
	if
          $APPHBD -s >/dev/null 2>&1
        then
          echo "That's odd, $APPHBD is already running."
	  killcount=2
	  $APPHBD -k >/dev/null 2>&1
        else
          killcount=1
        fi
	$APPHBD -c $CFFILE
	sleep 5
	$APPHBTEST -i 1000 -p $clientcount -n 5 >> $LOGFILE 2>&1 
	for pat in "apphb_client_register:" "type=setint" "info:.*apphb_client_remove:"
	do
	  CheckPat "$pat" $clientcount
	done
	CheckPat "failed to heartbeat|resumed heartbeats" 0
	
	echo "Performing apphbd failure case tests" | tee -a $LOGFILE
	$APPHBTEST -F -i 1000 -p 1 -n 5 >>$CFFILE 2>&1
	for pat in "'failtest'.* failed to heartbeat" "'failtest'.* resumed heartbeats"
	do
          CheckPat "$pat" 1 2
	done
	CheckPat "WARN:.*hangup" 1
	$APPHBD -k $CFFILE
	CheckPat "info:.*apphbd.*stopped" $killcount
	if
          $APPHBD -s >/dev/null 2>&1
        then
          echo "ERROR: $APPHBD is still running!" | tee -a $LOGFILE
        fi
}

IPCtest() {
	echo "Starting IPC tests" | tee -a $LOGFILE
	$IPCTEST >>$LOGFILE 2>&1
	errcount=`expr $errcount + $?`
}

#
#	Check our identity.
#	Set Up Config Files.
#	Run Tests.
#	Remove Config Files.
#

ID=`/usr/bin/whoami`
case $ID in
  root)	: OK;;
  *)	echo "Must be root to run this.  Sorry."
	exit 1;;
esac

SetUpConfigFiles
> $LOGFILE

TestHeartbeat
RemoveConfigFiles
StonithCheck
AppHBCheck
IPCtest

if
  [ -f $HADIR/core ]
then
  errcount=`expr $errcount + 1`
  echo "OOPS! We generated a core file!"
  ls -l $HADIR/core
  file $HADIR/core
fi

if
  egrep  'CRIT|ERROR' $LOGFILE
then
  echo "OOPS! Looks like we had some errors come up."
  errcount=`expr $errcount + 1`
fi

echo "$errcount errors. Log file is stored in $LOGFILE"
exit $errcount
