#!/bin/ksh

#
# pnm - Script to start up Public Network Management
#
# pnm <clustname> <cmmstep> <allnodes> <currnodes> <localnodeid>

function log_trace
{
  if [ -n "$TRACE_PDB" ]; then
    echo "# + $cmd: $*" >&3;
  fi
}
 
function log_trace_end
{
  if [ -n "$TRACE_PDB" ]; then
    echo "# - $cmd: $*" >&3;
  fi
}

function usage
{
  print "usage: ${prog} <clustname> <cmmstep> <allnodes> <currnodes> <localnodeid> "
  exit 2
}

function init
{
  log_trace init
  myetc=/etc/opt/SUNWcluster
  myvar=/var/opt/SUNWpnm
  mybin=/opt/SUNWpnm/bin
 
  cdbfile=${myetc}/conf/${clustname}.cdb
  log_trace_end init
}

getpid()
{
        pid=`/usr/bin/ps -e | /usr/bin/grep -w $1 | \
			/usr/bin/sed -e 's/^  *//' -e 's/ .*//'`
}

kill_daemons()
{

  log_trace kill_daemons
  
  typeset pid

  #
  # Kill all pnmd processes including the ones we may have forked to do
  # arp retransmissions.  One effect of the while loop is that, as our main
  # daemon takes some time to clean up the CCD rows before dying, we'll keep
  # getting the pid of this daemon repeatedly till it dies.  I am of the
  # opinion that this is harmless because, after pnmd receives its first
  # SIGTERM it will block all further SIGTERMS.  Thus, the rest of the
  # signals have no effect.  On the other hand we guarantee that our daemon
  # will never miss the SIGTERM signal.
  #
  let count=0
  while ((count < 10))
  do
  	getpid pnmd
  	if [ -n "${pid}" ]; then
		kill -15 ${pid}
		sleep 2		# reconf_ener runs as an RT process.  This
				# sleep will ensure that we release the cpu
				# for the process to actually get killed.
				# This is esp. important in single-cpu
				# machines.
	else
		break
	fi
	let count=count+1
  done

  #
  # If the process is still hanging around, then there is some problem
  # which is preventing pnmd to clean up its act and die.  Now send it
  # a SIGKILL which will kill it for sure.  Even if the CCD is left in
  # an unclean state, during the next reconfiguration the lowest numbered
  # host in the set of remaining cluster members will clean it up.
  #
  getpid pnmd
  if [ -n "${pid}" ]; then
	kill -9 ${pid}
  fi

  log_trace_end kill_daemons
}

#
#	main()
#

if [[ "$#" -lt 5 ]]; then
  usage
fi

prog=$0
clustname=$1
step=$2
allnodes=$3
currnodes=$4
localnodeid=$5

PATH=/sbin:/usr/sbin:/bin:/opt/SUNWcluster/bin
pre="SUNWcluster.reconf.pnm"

init
log_trace pnm

localhostname=$(cdbmatch cluster.node.${localnodeid}.hostname ${cdbfile})

if [[ "${step}" = "step1" ]]; then

	${mybin}/pnminit || retval=$?
	
	if [ "${retval}" -ne 0 ]; then
		log_info "${pre}.3009" "pnminit faced problems"
		exit 200
	fi

elif [[ "${step}" = "step2" ]]; then
	
	getpid pnmd

	if [ -z "${pid}" ]; then
		/usr/bin/rm -f ${myvar}/pnmd.non_sc
		${mybin}/pnmd -s || retval=$?
	else
		if [ -f ${myvar}/pnmd.non_sc ]; then
			/usr/bin/rm -f ${myvar}/pnmd.non_sc
			kill_daemons
			${mybin}/pnmd -s || retval=$?
		else
			retval=0
		fi
	fi

	if [ "${retval}" -ne 0 ]; then
		log_info "${pre}.3010" "Could not start pnmd"
		exit 200
	fi

#	Unbind pnmd from the highest processor.

	getpid pnmd
	/usr/sbin/pbind -u ${pid}

#
# 	Add  the return step later for CCD purposes
#
# elif [[ "${step}" = "return" ]]; then

elif [[ "${step}" = "stop" || "${step}" = "abort" ]]; then

	kill_daemons
	> ${myvar}/pnmd.non_sc
	/opt/SUNWpnm/bin/pnmd

fi


log_trace_end ip_failover
exit 0
