#!/bin/ksh
#
#pragma ident   "@(#)reconf_ener.sh 1.19   01/08/23  SMI"
#Copyright (C) 1997 Sun Microsystems, Inc.
#All rights reserved.
#

#
# reconf_ener - Energizer Cluster Reconfiguration Programs
#
# The reconf_ener provides framework for adding "reconfiguration
# programs" required by the PDD modules and by other applications
# involved in the Cluster Framework.
#

# should be used only the reconf_framework.
pre="SUNWcluster.reconf"
export RECONF_DIR=/opt/SUNWcluster/etc/reconf/conf.d
export RECONF_SCRIPTS=/opt/SUNWcluster/etc/reconf/scripts/
reconf_error_file=/opt/SUNWcluster/etc/reconf/.reconf_ener_error

INCLUDE=.
${INCLUDE} ${RECONF_SCRIPTS}/reconf_ener.disks
${INCLUDE} ${RECONF_SCRIPTS}/reconf_ener.common
${INCLUDE} ${RECONF_SCRIPTS}/reconf_ener.networks

#
# This Error Number needs to be consistent with other
# components, a common include file should be used
# by energizer components.
RECONF_REQUIRED_ERR=205


##########################################################
# Appmatch:                                              #
#    If the application bit map matches from CDB.        #
##########################################################
function appmatch
{
	integer flag
	integer app
	integer bit

if [[ $# -lt 2 ]]; then
	exit 1
fi

flag=$1
app=$2

bit=1
bit=$((bit<<$app))
if (( flag & bit ))
then
    print 1
else
    print 0
fi
}


######################################################################
# init - Initialize the Environment for execution of reconf programs.#
######################################################################
function init
{
	# set require path/common directories for execution	
	#
	# XXX set -a would automatically export the vairable.
	set -a

        LD_LIBRARY_PATH=/opt/SUNWcluster/lib
	CLUSTERBIN=/opt/SUNWcluster/bin
	CLUSTERETC=/etc/opt/SUNWcluster
	CLUSTERVAR=/var/opt/SUNWcluster
	SSACLI=${CLUSTERBIN}/pdbssa
	PATH=/usr/sbin:/usr/bin/:${CLUSTERBIN}:/usr/ccs/bin/:/bin/:/sbin

	# export CDB and LOGfile path.
	cdbfile=${CLUSTERETC}/conf/${clustname}.cdb
        ccdfile=${CLUSTERETC}/conf/ccd.database.init
        cdbfilter=${CLUSTERETC}/conf/cdb.filter
        tmpdir=`enmatch env.tmpdir`
        admindir=${CLUSTERVAR}/admindir
	logfile=${CLUSTERVAR}/scadmin.log
	# needs to be in sync with pdbadmin

	# export variable w.r.t nodes/network interface which are 
	# statically determined from CDB/CCD file.

	numnodes=`enmatch cmm.nodes`
	net_if_type=`enmatch cluster.net_if_type`

	# set core SC applications available.(pdbapps)
	# Convert the Bit Mapping into Modules. At a Later
	# stage we can use module names only and remove bit mapping.
	# can do a ccdmatch to get the modules listed , instead
	# of doing multiple Appmatches.

	# Assume All modules configured.
	MODNOTCONFIG=""

        pdbapps=`enmatch cluster.pdbapps`
	vm=`appmatch ${pdbapps} ${CVM}`
        if [ "${vm}" = "1" ]; then
                vm=cvm
        else
                vm=`appmatch ${pdbapps} ${VxVM}`
                if [ "${vm}" = "1" ]; then
                        vm=vxvm
                else
			MODNOTCONFIG="${MODNOTCONFIG} cvm"
		fi
        fi

        udlm=`appmatch ${pdbapps} ${OPS}`
	if [ ${udlm} -eq 0 ]; then
		MODNOTCONFIG="${MODNOTCONFIG} udlm"
	fi

        nd=`appmatch ${pdbapps} ${Netdisk}`
	if [ ${nd} -eq 0 ]; then
		MODNOTCONFIG="${MODNOTCONFIG} netdisk cti"
	fi

        ccd=`appmatch ${pdbapps} ${CCD}`
	if [ ${ccd} -eq 0 ]; then
		MODNOTCONFIG="${MODNOTCONFIG} ccd"
	fi

        # Note that for cmm, ccm and quorum flags, the
        # components they represent are considered to be on
        # if they have a value of 0.
        ccm=`appmatch ${pdbapps} ${CCM}`
	if [ ${ccm} -eq 1 ]; then
		MODNOTCONFIG="${MODNOTCONFIG} ccm"
	fi

	# set it in a array to once so that allocation is reduced
	# while checking. 
	set -A MODULES ${MODNOTCONFIG}

	# Create static files pathnames used in synchronization.
	# of various reconf programs
	# export CCM selected files.
        ccm_selected_net_file=`${CLUSTERBIN}/cdbmatch ccm.script.net.file ${cdbfile}`
        ccm_top_net_file=`${CLUSTERBIN}/cdbmatch ccm.script.topnet.file ${cdbfile}`

        # export variables used by cluster applications.
        DOINGSTOPFLAG=${tmpdir}/didstopprogs
        	# hooks for execution of user cluster applications
        RESERVEDCTLS=${tmpdir}/ssa_is_reserved
                # control disk reservation
        FORCESTARTFLAG=${tmpdir}/do_not_use_ssa_reservations
                # ok to continue without majority quorum?
        ISRUNNINGFLAG=${tmpdir}/cluster_is_running
                # is cluster is already online
	CMMSEQNUM=`${CLUSTERBIN}/cdbmatch cmm.sequencefile ${cdbfile}`
        if [ -z "$CMMSEQNUM"   ]; then   
		CMMSEQNUM=/var/opt/SUNWcluster/cmm/reconf-seqnum.${clustname}; 
	fi

	# create temprorary file locations
	if [ -z "$tmpdir" ]; then 
		tmpdir=${CLUSTERVAR}; 
	fi
	if [ ! -d $tmpdir ]; then 
		mkdir -p $tmpdir; 
	fi
	set +a
}

#####################################################################
# validate_env_vars:                                                #
#  This function verifies that the required env variables are set by#
#  clustd, Otherwise it will get it from clustm.                    #
#####################################################################
function validate_env_vars
{
	set -a 

	if [ "X${CURRNODES}" = "X" ]; then
                currnodes=`${CLUSTERBIN}/clustm getcurrmembers $clustname`
        else
                currnodes=${CURRNODES}
        fi
        if [ "X${LOCALNODEID}" = "X" ]; then
                localnodeid=`${CLUSTERBIN}/clustm getlocalnodeid $clustname`
        else
                localnodeid=${LOCALNODEID}
        fi
        if [ "X${SEQNUM}" = "X" ]; then
                seqnum=`${CLUSTERBIN}/clustm getseqnum $clustname`
        else
                seqnum=${SEQNUM}
        fi
        if [ "X${ALLNODES}" = "X" ]; then
                allnodes=`${CLUSTERBIN}/clustm getallnodes ${clustname}`
        else
                allnodes=${ALLNODES}
        fi
	set +a
}

##########################################################
# Usage:  Echo the correct usage of the program.         #
##########################################################
_usage() {
        echo "Usage: ${prog} [-a] [-f] startnode cluster_name"
        echo "       ${prog} [-a]      stopnode  cluster_name"
        echo ""
        echo "       The [startnode|stoptnode] sub-commands are invoked"
        echo "       manually by the system administrator or automatically"
        echo "       from /etc/rc3.d scripts via the 'pdbadmin' script."
        echo ""
        echo "       The reldisks sub-command is invoked manually by the"
        echo "       system administrator, usually by the 'pdbadmin' script."
        exit 2

        # The following are not public entrypoints
        echo ""
        echo "       ${prog} [cmmstart|cmmstop|cmmabort]  cluster_name"
        echo "       ${prog} [cmmstep0-9|cmmreturn]       cluster_name"
        echo "       ${prog} reldisks cluster_name"
        echo "       ${prog} resdisks cluster_name"
        echo ""
        echo "       The [cmmstart|cmmstepN|cmmstop|cmmabort|cmmreturn]"
        echo "       commands are invoked by the cluster membership"
        echo "       monitor during cluster state transitions."
        exit 2
}

###################################################################
# Reconfiguration FrameWork for executing reconf programs         #
###################################################################
function check_execution_error
{
        case $2 in

           0)   # success

                #log_info "$pre.4050"  \
                #        "$1 completed successfully in ${CURRSTEP}";
		# for now store at pdbadmin.log
		echo "`/bin/date`" "$pre.1050" \
			"$1 completed successfully in ${CURRSTEP}" ;
                return 0;;
 
           200)   # status ignore
                log_info "$pre.5001"  \
                        "$1 errors status ignored in ${CURRSTEP}" ;
                echo $1 >> ${reconf_error_file} 2>/dev/null;
                return 0;;

           ${RECONF_REQUIRED_ERR})
		log_info "$pre.5002" \
			"$1 requests reconfiguration in ${CURRSTEP}";
		if [[	"${CURRSTEP}" = "cmmabort" || \
			"${CURRSTEP}" = "stopnode" ]] ; then
		    log_info "$pre.4051" "$1 exited with 205 in ${CURRSTEP}" ;
		    sleep 1
		    exit 1
		elif [[	"${CURRSTEP}" != "cmmstart" && \
			"${CURRSTEP}" != "cmmreturn" && \
			"${CURRSTEP}" != "startnode" ]] ; then
                   # Reconfigure Status
                   # Means that after the step completes, send
                   # 205 to cmm so that it can reconfigure.
		    reconf_required=1
		fi
		return 0;;
 
           *)   # fail all other cases.
                log_info "$pre.4051" "$1 exited with $2 in ${CURRSTEP}" ;
		# to make sure that it is logged in syslog.
		sleep 1
                exit 1;;
        esac
 
}

##############################################################
#  Check if the module needs to be executed.                 #
#  return 1 if module needs to be  executed.                 #
#  return 0 if module is not configured to be executed OR    #
#           is in reconfiguration error file                 #
##############################################################
function check_module_configured
{
	integer i
	integer found

	set +e

	modname=$1
	if [ -z ${modname} ]; then
		return 0
	fi

	i=0
	found=1
	while [ ! -z ${MODULES[i]} ]
	do
	   if [ ${MODULES[$i]} = ${modname} ]; then
		# name found in not configured list.
		return 0;
	   fi
	   i=i+1
	done

	grep ${modname} ${reconf_error_file}  2>/dev/null
	if [ $? -eq 0 ]
	then
		found=0
	fi

	return ${found};
}

########################################################################
#								       #
# Execute Reconfiguration Programs:                                    #
#                                                                      #
# Parameter 1: Directory From which we have to execute the             #
#              Reconfiguration Programs.                               #
# Variables  :                                                         #
#   reconf_prog:                                                       #
#        Reconfiguring Programs for StepN                              #
#   current_reconf_prog:                                               #
#        Reconfiguration Programs for Current executionsequence        #
#   cur_seqno,seqno: Execution sequence Numbers.                       #
#   count: count is the number of programs for concurrent execution.   #
#                                                                      #
########################################################################
function execute_reconf
{
	integer i
	integer count

	reconfdir=$1
	set +e

	#
	# check for the existence of the script file.
	#
	if [ ! -d ${reconfdir} ]
	then
   		#echo "No actions for this step"
   		return 0
	fi
	#
	# Get  the complete listing og the actions to be 
	# to be executed for step N
	cd $reconfdir   
	CURDIR=${PWD}
	set -A reconf_prog `/usr/bin/ls ??_* 2>/dev/null`

	reconf_required=0


	# set the index to start of the array
	i=0
	while [ ! -z  ${reconf_prog[i]} ]
	do
        	prog=${reconf_prog[i]}
        	#
        	# extract sequence number and module name.
        	#
        	component=${prog#*_}
        	cur_seqno=${prog%%_*}
		count=1
		current_reconf_prog=${reconf_prog[i]}
		i=i+1
		check_module_configured ${component}
		if [ $? -eq 0 ]; then
			 continue;
		fi
		# collect the items with the same sequence number.
		while [ ! -z ${reconf_prog[i]} ]
        	do
            		prog=${reconf_prog[i]}
	    		seqno=${prog%%_*}
			component=${prog#*_}
            		if [ ${cur_seqno} -eq ${seqno} ]
			then
			# check for error in reconf_error_file.
			   check_module_configured ${component}
			   if [ $? -ne 0 ]; then
                 	       current_reconf_prog="${current_reconf_prog} ${reconf_prog[$i]}"
                 	       count=count+1
                	   fi
                        else
		           # No more reconf programs with same seqno.
		          break;
	                fi
	                i=i+1
	        done

		# Now execute the reconf programs.
		if [ ${count} -eq 1 ] 
		then
                    component=${current_reconf_prog#*_}
		    echo "`/bin/date`" "$pre.1340" \
			"${component} started in ${CURRSTEP}" ;
	            eval ${CURDIR}/${current_reconf_prog}
		    error=$?
                    check_execution_error ${component} ${error}
		else
		    # Multiple reconfiguration programs to be 
		    # executed.
	   	    set -A execute_items ${current_reconf_prog}
                    integer execute=0
                    while [ ! -z ${execute_items[execute]} ]
                    do 
		       prog=${execute_items[execute]}
                       component=${prog#*_}
		       # execute the items
		       rm -rf ${tmpdir}/${component}.${CURRSTEP}
		    	echo "`/bin/date`" "$pre.1340" \
				"${component} started in ${CURRSTEP}" ;
                       (eval ${CURDIR}/${prog} ||  \
                            echo $? > ${tmpdir}/${component}.${CURRSTEP}) &
                       execute=execute+1
                    done   
	            # wait for the result
                    wait
                    # check for the results using the log file.
                    execute=0
                    while [ ! -z ${execute_items[execute]} ]
                    do 
	               prog=${execute_items[execute]}
                       component=${prog#*_}
		       error=0
                       if [ -f ${tmpdir}/${component}.${CURRSTEP} ]
                       then
			     error=`cat ${tmpdir}/${component}.${CURRSTEP}`
                       fi
                       check_execution_error ${component} ${error}
                       execute=execute+1
                    done
	     fi
	     # 
             # if reconfiguration was required by the component
             # then do exeuctue any more components in the step
             # and exit with RECONF_REQUIRED_ERR;
             #
             if [ ${reconf_required} -eq 1 ]; then
                  exit ${RECONF_REQUIRED_ERR};
             fi

      done
	set -e
}

############################################################
#  Handle All CMM-Transactions requested by clustd         #
############################################################
# join the localnode to the cluster.
function startnode_cmd
{
	log_trace  startnode
	
	#
	# XXX sanity checks for a well configured Node.
	# It could also include <timeout-Calculation>.
	# 
	if [ -f ${ISRUNNINGFLAG} ]; then
           	# check if clustd is really running
                ${CLUSTERBIN}/timed_run -q 3 ${CLUSTERBIN}/clustm getstate \
			${clustname} >/dev/null 2>&1 &&  ( echo \
" This node is already running as part of the ${clustname} cluster" >&3
			/bin/rm -f ${tmpdir}/startcluster
                        exit 1)
        fi
	#
	#  But if they are installed by packages then it must be 
	#  Ok.
	execute_reconf ${RECONF_DIR}/rcI.d/

	#
	# Make Sure that the node is present, in the database.
	#
        nodename=`eval /bin/uname -n`
        integer i=0;
        while [ $i -lt $numnodes ]; do
                if [ `enmatch cluster.node.$i.hostname` = ${nodename} ]; then
                        break;
                fi
                i=i+1
        done

	touch ${ISRUNNINGFLAG}
	log_info "$pre.1150" "Starting Sun Cluster: node $i ($nodename) joining the ${clustname} cluster"
	echo "Starting Sun Cluster software - joining the ${clustname} cluster" >&3

	# Clean Up Stale Files.
	if [ -f ${ccm_selected_net_file} ]; then
		/bin/rm -f ${ccm_selected_net_file}
	fi
	/usr/bin/rm -f ${CMMSEQNUM}
	/usr/bin/rm -f ${tmpdir}/nodelock.$i

	log_trace start_networks
	start_networks
	log_trace_end start_networks

	# stdout/err are redirected by caller to the logfile
        ${CLUSTERBIN}/clustd ${newcluster} -f ${cdbfile}  # rjw

	/bin/rm -f ${DOINGSTOPFLAG}

	# unless the '-a' flag was specified, wait for the node to do
        # the first reconfiguration.
        if [ "${async}" != 1 ]; then
                while [ -f  ${ISRUNNINGFLAG} ]; do
                        # check if clustd is still running
                        state=`${CLUSTERBIN}/timed_run -q 20 ${CLUSTERBIN}/clustm getstate ${clustname} 2>/dev/null` ||\
                                 exit 1
                        if [ "$state" = "end" ]; then
                                break
                        else
                                sleep 3
                        fi
                done
        fi
        if [ ! -f  ${ISRUNNINGFLAG} ]; then
                exit 1
        fi
	log_trace_end startnode
}

# pdb cluster graceful shutdown
function stopnode_cmd
{
	log_trace stopnode

        if [ ! -f ${ISRUNNINGFLAG} ]; then
                echo "The Sun Cluster software is not currently running on this node." >&3
                exit 1  # for bugID 1166404
        fi
	echo "Stopping the Sun Cluster software - leaving the ${clustname} cluster" >&3

	set -a
	currnodes=`${CLUSTERBIN}/clustm getcurrmembers ${clustname}`
        localnodeid=`${CLUSTERBIN}/clustm getlocalnodeid ${clustname}`
        allnodes=`${CLUSTERBIN}/clustm getallnodes ${clustname}`
	set +a
	touch ${DOINGSTOPFLAG}

	# handle rcK.d stopnode transaction.
	execute_reconf ${RECONF_DIR}/rcK.d/

	${CLUSTERBIN}/clustm stop ${clustname} this

        # unless the '-a' flag was specified, wait for the node to shutdown
        if [ "${async}" != 1 ]; then
                while [ -f ${ISRUNNINGFLAG} ]; do
                        sleep 1
                done
        fi

	if [ "$currnodes" = "$localnodeid" ]; then
		echo "The ${clustname} cluster has no active hosts." >&3
	fi
        #log_info "$pre.1070" "${clustname} cluster is stopped on this node"
        log_trace_end stopnode
}

########################################################################
# called from cluster membership monitor "abort" and "stop" transitions
########################################################################
function cmmabort_cmd
{
	log_trace cmmabort_cmd

	log_info "${pre}.1200" \
		"Reconfiguration step abort started"
	validate_env_vars
	export CURRSTEP=cmmabort

	rm -rf ${reconf_error_file}

	# Handle RcA.d Trasactions
	execute_reconf ${RECONF_DIR}/rcA.d/

	log_trace stop_networks
        stop_networks
	log_trace_end stop_networks

        /bin/rm -rf ${ISRUNNINGFLAG}
 
        # start up post-reconfiguration scripts (e.g. user defined scripts)
        priocntl -c TS -p 59 -e \
                ${CLUSTERVAR}/system.scripts/${clustname}.reconfig.sys_script \
                ${cmd} ${SEQNUM} `/usr/bin/date +\%Y\%m\%d\%H\%M\%S` &

	log_info "${pre}.1201" \
		"Reconfiguration step abort completed"
        log_trace_end cmmabort_cmd
}

function cmmreturn_cmd
{
	log_trace cmmreturn_cmd

	log_info "${pre}.1200" \
		"Reconfiguration step return started"

	validate_env_vars
	# XXX. Handle rcR.d
	execute_reconf ${RECONF_DIR}/rcR.d/

	log_info "${pre}.1200" \
		"Reconfiguration step return completed"

	log_trace cmmreturn_cmd
}

function cmmstart_cmd
{
        log_trace cmmstart_cmd

        validate_env_vars
	rm -rf ${reconf_error_file}

	log_info "${pre}.1200" \
		"Reconfiguration step start started"
	# Handle rcS.d transactions
	execute_reconf ${RECONF_DIR}/rcS.d/

	log_info "${pre}.1201" \
		"Reconfiguration step start completed"
        log_trace_end cmmstart_cmd
}

function cmmstep1_cmd
{
	validate_env_vars

	log_info "${pre}.1200" \
		"Reconfiguration Step 1 started"
	set -A nodes `echo ${currnodes}`
	integer i=0
	names=""
	while [ ! -z ${nodes[i]} ]
	do
		thisname=`enmatch cluster.node.${nodes[i]}.hostname`
		names="$names ${thisname}"
		i=i+1
	done
	log_info "$pre.1120" \
		"${clustname} reconfiguration ${seqnum} started on ${names}"

	if [ "${currnodes}" = "${localnodeid}" ]; then
		# the cluster has only one node. We can no longer 
		# tell which  nets are active.
		/usr/bin/rm -f ${ccm_top_net_file}
	fi

	execute_reconf ${RECONF_DIR}/rc1.d/

	if [ -f ${admindir}/reserve.pid ]; then
		pid=`cat ${admindir}/reserve.pid`
		kill -KILL ${pid} || echo $? > /dev/null 2>&1
		/bin/rm -fr ${admindir}
	fi

	log_info "${pre}.1201" \
		"Reconfiguration Step 1 completed"
}

function common_cmmstep
{
	log_trace  common_cmmstep


	step=$1
	validate_env_vars
	CMMPREFIX=cmmstep
	#
	# Extract "number"
	#
	step_number=${step##$CMMPREFIX}
	#
	# Extract the number of Steps for CDB
	#
	log_info "${pre}.1200" \
		"Reconfiguration Step ${step_number} started"
	last_step=`enmatch cmm.transition.steps`
	reconfdir=${RECONF_DIR}/rc${step_number}.d/
	execute_reconf ${reconfdir}

	log_info "${pre}.1201" \
		"Reconfiguration Step ${step_number} completed"
	if [ $step_number -eq $last_step ]
	then
		eval cmmend_step;
	fi
	log_trace_end common_cmmstep
}

function cmmend_step
{
	integer _thisnode;

        set +e
        validate_env_vars

	# start up post-reconfiguration scripts (e.g. user defined scripts)
        priocntl -c TS -p 59 -e \
                ${CLUSTERVAR}/system.scripts/${clustname}.reconfig.sys_script \
                up ${seqnum} `/usr/bin/date +/%Y/%m/%d/%H/%M/%S` ${CLNODEUP} &
        set -e
	# Handle rcP.d (post reconfiguration script).
	reconfdir=${RECONF_DIR}/rcP.d/
	execute_reconf ${reconfdir}
}

#
#  End of All Functions 
#
# get program options
set -- `getopt afn $*`  # rjw
if [ $? != 0 ]; then
        _usage
fi

scluster=0
for i in  $*; do
        case $i in
        -a) async=1; shift ;;
        -f) forcestart=1; shift ;;
        -n) newcluster="-n"; scluster=1; shift ;;  # rjw
        --) shift; break;;
        esac
done
 
prog=$0
cmd=$1
export clustname=$2
export CURRSTEP=$cmd

# Cluster Application Bit Assignment. This is done here
# so that we can use symbolic application names throughout the
# reconfiguration script.
# XXX. Not really required ..
# OPS = bit 0, Informix = bit 1, Sybase = bit 2 , CVM = bit 3
# VxVM = bit 4,  SDS = bit5, Netdisk = bit6 , CCD = bit7
# CMM = bit 8,  CCM = bit 9  Quorum = bit 10
OPS=0
Informix=1
Sybase=2
CVM=3
VxVM=4
SDS=5
Netdisk=6
CCD=7
CMM=8
CCM=9
Quorum=10

set -e
init $*

if [ $scluster -eq  1 ]; then
	/bin/touch ${tmpdir}/startcluster
fi

#
# Dispatch the call.
#
# We are carefully passing file descriptor 2 from "startnode" to clustd
# and then from clustd to the "cmm???" calls.
# file descriptor #3 is the original stdout if you need to send
# messages to the interactive user.
#
case ${cmd} in
        # interactive commands
        startnode)              startnode_cmd    3>&1 1>>${logfile} 2>&1 ;;
        stopnode)               stopnode_cmd     3>&1 1>>${logfile} 2>&1 ;;
 
        # async commands - not interactive
        cmmstop | cmmabort)     cmmabort_cmd     3>&1 1>>${logfile} 2>&1 ;;

	cmmstart | cmmreturn | cmmstep1) \
				eval ${cmd}_cmd  3>&1 1>>${logfile} 2>&1 ;;
	cmmstep[2-9]| cmmstep[0-9][0-9])
                                common_cmmstep $cmd 3>&1 1>>${logfile} 2>&1 ;;
        resdisks)               reserve_all_shared_devs 3>&1 1>>${logfile} 2>&1
;;
        reldisks)               release_all_shared_devs 3>&1 1>>${logfile} 2>&1
;;
 
        *)                      _usage                              ;;
esac
exit 0
