#!/bin/ksh
#
# @(#)pdbadmin 1.54 98/07/09 SMI
#
# 	Copyright (c) 1994 Sun Microsystems, Inc.
#
# scadmin - SPARCcluster SC administration script.
#

prog=`basename $0`

B=/opt/SUNWcluster/bin
E=/etc/opt/SUNWcluster
V=/var/opt/SUNWcluster
PKGINFO=/bin/pkginfo
GREP=/bin/grep
SED=/bin/sed

# print usage and exit
usage() {
	echo "Usage:"
	echo "	$0 [-a] [-f] startcluster localnodename clustname"
	echo "	$0 [-a] [-f] startnode [clustname]"
	echo "	$0 [-a] stopnode [clustname]"
	echo "	$0 abortpartition localnodename clustname"
	echo "	$0 continuepartition localnodename clustname"
	echo "	$0 reldisks [clustname]"
	echo "	$0 resdisks [clustname]"
	echo "	$0 switch   clustname [-m] logical-hosts ..."
	echo "	$0 switch   clustname dest-host logical-hosts ..."
	echo "	$0 switch   clustname -r"
	exit 2
}

# lookup a value in the configuration file
enmatch() {
	${B}/cdbmatch $* ${cdbfile} || \
		(echo "enmatch" "cdbmatch $* ${cdbfile} failed" 1>&2; return 1) 
}

# find the nodeid of the local node
get_nodeids () {
	myuname=`uname -n`
	if [ "`enmatch cluster.node.0.hostname`" = "${myuname}" ]
	then
		myid=0
	elif [ "`enmatch cluster.node.1.hostname`" = "${myuname}" ]
	then
		myid=1
	elif [ "`enmatch cluster.node.2.hostname`" = "${myuname}" ]
	then
		myid=2
	elif [ "`enmatch cluster.node.3.hostname`" = "${myuname}" ]
	then
		myid=3
	else
		echo "Host ${myuname} is not a cluster member"
		exit 1
	fi
}
# check the nodename entered for a valid cluster member.
nodename_in_cluster () {
        typeset -i status
        typeset -i node
        typeset -i numofnodes

        nodename=$1
        let status=0
        let numofnodes=$(enmatch cluster.number.nodes)

        let node=0
        while (( node < numofnodes )); do
                if [ "`enmatch cluster.node.${node}.hostname`" = "${nodename}" ]
                then
                        let status=1
                fi
                let node=node+1
        done
        if (( status == 0 )); then
                echo "Error: Host ${nodename} is not a potential cluster member."
                exit 1
        fi
        return ${status}
}

function reconfigure_cluster
{
	${B}/scccd ${clustname} "" tryattach
	if [ "$?" -ne "0" ]; then
		print "The CCD is not ready for cluster reconfiguration."
		print "Attempt the reconfiguration later."
		exit 1
	fi
	${B}/clustm reconfigure ${clustname}
}

function obtain_switchover_lock
{
    let wait=1
    while (( wait != 0 )); do
        ${B}/scccd ${clustname} LOCKNODE add lnode ${clustname} > /dev/null
        if (( $? != 0 )); then
	    let wait=wait+1
	    p=$(get_node_status | grep sc: | awk '{print $2}')
	    if [[ ${p} == "aborted"  || ${p} != "included" ]]; then
			return 1
	    fi
	    sleep 5
        else
	    let wait=0 
        fi
    done
    return 0
}

function release_switchover_lock
{
	let wait=1
	while (( wait != 0 )); do
	   ${B}/scccd ${clustname} LOCKNODE remove lnode ${clustname}
	   if (( $? == 0 )); then
		let wait=0
	   fi
	   p=$(get_node_status | grep sc: | awk '{print $2}')
	   if [[ ${p} != "included" ]]; then
		print "Unable to release switchover lock."
		print "Exiting switch attempt without abort."
		exit 1
	   fi
	done
	return 0
}

function check_for_reconfiguration
{
	typeset node_status cmm_state pdb_state

	node_status=$(${B}/get_node_status)
	node_status=$(print ${node_status})

	cmm_state=${node_status##sc: }
	cmm_state=${cmm_state%% node id:*}
	pdb_status=${cmm_state% \(*\)}
 
	if [[ "${pdb_status}" = "in_transition reconfiguration" ]]; then
		print "The cluster undergoing a reconfiguration."
		print "The switchover cannot be performed at this time."
		release_switchover_lock
		exit 1
	fi

}

function check_mstate_format
{
    typeset node fmt_present
    typeset mstatefile

    mstatefile=/var/tmp/$$mstate

#
# make sure only one node tries to add the format
#
    for node in ${currnodes}; do
        if [ "${node}" != "${localnodeid}" ]; then
                return 0
        else
                break
        fi
    done

#
# check if format already exists in the CCD. If it does
# not then we will:
#
# 1. store any existing rows from the current format
# 2. remove the current format
# 3. add the new format 
# 4. restore the rows
#
    ccdfile=$(${B}/ccdadm ${clustname} -w)
    fmt_present=$(/bin/egrep 'LOGHOST_MSTATE_sync:.*ccd_nofreeze' ${ccdfile})
    if [[ -n ${fmt_present} ]]; then
	print "Modifying ccd to use new LOGHOST_MSTATE"
# But first, make a backup copy just in case.
	${B}/ccdadm -p ${ccdfile} >/dev/null 2>&1
	${B}/scccd ${clustname} LOGHOST_MSTATE query lname '' \
		> ${mstatefile} 2>/dev/null
	while read row; do
		lname=$(print ${row} | sed 's/^.*:\(.*\):.*$/\1/')
		${B}/scccd ${clustname} LOGHOST_MSTATE remove \
			 lname "${lname}"
	done <${mstatefile}
	${B}/scccd ${clustname} LOGHOST_MSTATE remove_format
        ${B}/scccd ${clustname} LOGHOST_MSTATE add_format \
		'lname:mmode'  ${B}/mstate_sync
	while read row; do
		row=$(print ${row} |  sed 's/^.*:\(.*:.*\)/\1/')
		${B}/scccd ${clustname} LOGHOST_MSTATE add \
			'lname:mmode' "${row}"
	done < ${mstatefile}
    fi

    /bin/rm -rf ${mstatefile}

    return 0
}

function switchover_cmd
{
typeset dest_host	# destination host
typeset l lhlist	# logical host list and iterator
typeset row		# row retrieved from the CCD
typeset n curr_members	# current cluster membership and iterator
typeset nodename curr_nodes curr_nodeid dest_nodeid
typeset nodenames
integer found
typeset node
typeset -i m_mode=0
typeset -i r_mode=0

curr_nodes=""
curr_members=$(${B}/get_node_status | /bin/grep membership \
		| sed -e 's/membership: //')
if [[ "${curr_members}" = "unknown" ]]; then
  print "This node does not appear to be in the cluster membership."
  print "This command cannot be run from this node."
  exit 1
else
  for n in ${curr_members}; do
    nodename=$(${B}/cdbmatch cluster.node.${n}.hostname ${cdbfile})
    curr_nodes="${curr_nodes} ${nodename}"
  done
fi

while getopts mr c
do
 case $c in
    m) let m_mode=1
       ;;
    r) let r_mode=1
       ;;
    \?) echo 2>&1 "$Myname: $OPTARG is not a valid option."
        exit 2
        ;;
    esac
done

((positions_occupied_by_switches = OPTIND - 1))
shift $positions_occupied_by_switches

if (( r_mode == 1 )); then
	if (( $# >= 1 )) || (( m_mode == 1 )); then
		print "No arguments are required with the '-r' option."
		usage
	else
		reconfigure_cluster
		exit $? 
	fi
fi

if ( (( m_mode == 0 )) && (( $# < 2 )) ) || 
	( (( m_mode == 1 )) && (( $# < 1 )) ); then
	print "$0: Required parameters missing."
	usage; exit 1
fi

dest_host=""
if (( m_mode == 0 )); then
  dest_host=$1
  shift
  # check whether destination host is a cluster member
  let found=0
  for n in ${curr_nodes}; do
    if [[ "${n}" = "${dest_host}" ]]; then
      let found=1
      break
    fi
  done
  
  if (( found == 0 )); then
    print "Destination host ${dest_host} is not a cluster member."
    exit 1
  fi
fi

lhlist=$*

#
# Obtain switchover lock to serialize switchover attempts
#
obtain_switchover_lock

if (( $? != 0 )); then
	print "Unable to obtain switchover lock."
	print "Exiting switchover attempt."
	exit 1
fi

for l in ${lhlist}; do

  # check for the validity of the logical host
  # using a static query to the CCD.
  ccdfile=$(${B}/ccdadm ${clustname} -w)
  row=$(${B}/scccd -f ${ccdfile} ${clustname} LOGHOST query lname ${l})
  if [[ -z "${row}" ]]; then
    print "The logical host ${l} is not defined in the Cluster Configuration Database."
    print "Ignoring ${l}."
    continue
  fi

  # Get nnodelist and check if the destination node exists
  # only if we are not in maintenance mode.
  if (( m_mode == 0 )); then
        nodenames=`echo ${row} | /usr/bin/awk -F: '{ print $3}' | \
                          /usr/bin/tr ',' ' ' `
        found=0
        for node in ${nodenames}
        do   
                if [ "${node}"  = "${dest_host}" ]; then
                        found=1
                        break; 
                fi
        done   
        if (( found == 0 )); then
           print "The specified destination host - ${dest_host} - is not configured to master "
           print "this logical host ${l}."
           continue
        fi
  fi 

  # query for the current master of the logical host
  row=$(${B}/scccd ${clustname} LOGHOST_CM query lname ${l})
  curr_master=${row#*:*:}

  if [[ -n "${curr_master}" ]]; then
    
    if [[ -n "${dest_host}" && "${curr_master}" = "${dest_host}" ]]; then
      print "The specified destination host - ${dest_host} - is the current"
      print "master of logical host ${l}. Ignoring ${l}."
      continue
    fi


    # find the node id of the current master

    for n in ${curr_members}; do
      nodename=$(${B}/cdbmatch cluster.node.${n}.hostname ${cdbfile})
      if [[ "${nodename}" = "${curr_master}" ]]; then
	curr_nodeid=${n}
      fi
      if [[ "${nodename}" = "${dest_host}" ]]; then
	dest_nodeid=${n}
      fi
    done

    check_for_reconfiguration

    ${B}/scccd ${clustname} LOGHOST_CM remove lname ${l}
    
    if (( $? != 0 )); then
      #
      # Add it back so that the Logical Host
      # is brought to sane state again..
      #
      print "Unable to bring down logical host ${l} on ${curr_master}."
      print "Check system console logs on ${curr_master} for detailed"
      print "error messages. Trying to re-master it on the original"
      print "node"

      if (( m_mode == 1 )); then
         set_loghost_mstate ${l} 1
      fi

      ${B}/scccd ${clustname} LOGHOST_CM add "lname:curr_master" \
		"${l}:${curr_master}"

      if (( $? != 0 )); then
	print "Unable to re-master the logical host ${l} on ${curr_master}"
	print "Aborting node to prevent data service inconsistencies."
	release_switchover_lock
        ${B}/clustm abort ${clustname} ${curr_nodeid}
        exit 1
      fi

      # we got the logical host re-mastered and hence continue
      continue
    fi
    if (( m_mode == 1 )); then
         set_loghost_mstate ${l} 0
    fi
  fi

  if [[ -n "${dest_host}" ]]; then

    if (( m_mode == 0 )); then
        set_loghost_mstate ${l} 1
    fi

    ${B}/scccd ${clustname} LOGHOST_CM add "lname:curr_master" \
		"${l}:${dest_host}"

    if (( $? != 0 )); then
      #
      # clean up the stuff if required, so that logical host is
      # not mastered anywhere.
      #

      print "Unable to bring up logical host ${l} on ${dest_host}."
      print "Check system console logs on ${dest_host} for detailed"
      print "error messages."

      ${B}/scccd ${clustname} LOGHOST_CM remove lname ${l}

      if (( $? != 0 )); then

	 print "Unable to clean up logical host ${l} on ${dest_host}."
	 print "Aborting node to prevent data service inconsistencies."

	 release_switchover_lock
         ${B}/clustm abort ${clustname} ${dest_nodeid}
         exit 1

      else

	#
	# we try to re-master the logical host back to its
	# original node where it was being mastered before.
	# If this fails , do a clean up and exit  the command.
	#

	if [[ ${m_mode} -eq 0  && -n "${curr_master}" ]]; then

     	    ${B}/scccd ${clustname} LOGHOST_CM add "lname:curr_master" \
		"${l}:${curr_master}"

       	    if [[ "$?" -ne 0 ]]; then
		print "Unable to Re-Master the Logical Host ${l} on ${curr_master}"
		print "Logical Host ${l} will not be mastered anywhere"

      		${B}/scccd ${clustname} LOGHOST_CM remove lname ${l}

		if [[ "$?" -ne 0 ]]; then
	 	    release_switchover_lock
         	    ${B}/clustm abort ${clustname} ${curr_nodeid}
         	    exit 1
		fi
	    fi
	fi
      fi
    fi
  fi
done

#
# Release lock so other switchover attempts can proceed.
#
release_switchover_lock
exit 0
}


set_loghost_mstate()
{
    typeset logname value

    logname=$1
    value=$2

	check_mstate_format

    mstate=$(${B}/scccd ${clustname} LOGHOST_MSTATE query lname ${logname})
    if (( $? == 0 )); then
    	mode=${mstate#*:*:}
    	if [[ -n ${mode} && ${mode} != ${value} ]]; then

        	${B}/scccd ${clustname} LOGHOST_MSTATE remove lname ${logname}

        	${B}/scccd ${clustname} LOGHOST_MSTATE add "lname:mmode" "${logname}:${value}"

        	if [[ "$?" -ne 0 ]]; then
               	 print "Unable to set the State of Logical Host to ${value}"
               	 print "Check system console logs for detailed error messages."
        	fi
    	fi
    else
	print "Unable to get the current state for logical host ${logname}."
	print "Check system logs for detailed error messages."
    fi
}



# get program options
set -- `getopt af $*`
if [ $? != 0 ]
then
        usage
fi
for i in  $*
do
        case $i in
        -a) async="-a"; shift ;;
        -f) forcestart="-f"; shift ;;
        --) shift; break;;
        esac
done

if [ $# -lt 1 ]; then
 echo "Missing required parameter!"
 usage
 exit 2
fi

cmd=$1
if [  ${cmd} != "startnode" -a ${cmd} != "stopnode" -a ${cmd} != "reldisks" -a\
      ${cmd} != "startcluster" -a ${cmd} != "continuepartition" -a \
      ${cmd} != "abortpartition" -a ${cmd} != "resdisks" -a\
      ${cmd} != "switch" ]
then
	echo "invalid command: ${cmd}"
	usage
	exit 2
fi

# rjw +
nodename=""
if [ ${cmd} = "continuepartition" -o ${cmd} = "abortpartition" -o\
		${cmd} = "startcluster" ]; then
	nodename="$2"
	clustname="$3"
	if [ -z "${nodename}" -o -z "${clustname}" ]; then
		usage
	else
		echo "Node specified is ${nodename}"
		echo "Cluster specified is ${clustname}"
	fi
else
	clustname=$2
fi
# rjw -

if [ -z "${clustname}" ]; then
	if [ -f ${E}/conf/default_clustername ]; then
		clustname=`cat ${E}/conf/default_clustername`
		echo "Assuming a default cluster name of ${clustname}"
	else
		usage
	fi
fi

cdbfile=${E}/conf/${clustname}.cdb

if [ ! -f $cdbfile ]; then
	echo "Error: Invalid cluster name: $clustname"
	echo "       The file \"$cdbfile\" was not found"
	usage
fi

# set variables related to node ids
get_nodeids
mynode=`eval enmatch cluster.node.${myid}.hostname`
### this is a throwback to the 2 node cluster days.  needs to be cleaned up

# check if nodename is the name of my node  rjw
if [ -n "${nodename}" ]; then
# return status from nodename_in_cluster really does not
# have to be checked since if nodename is not defined to be
# a cluster member we will not return.
        nodename_in_cluster $nodename
 	if [ ${nodename} != ${mynode} ]; then
		print "Error: Attempted to start Node ${nodename}."
	  	print "       This is node ${mynode}."
		print "       Cannot start a cluster from a remote node."
		exit 2
	fi
fi


# -f option is legal only with the 'startnode' command
if [ "${forcestart}" = -f -a ${cmd} != "startnode" -a ${cmd} != "startcluster" ] ; then
	echo "Error: The -f option is only legal when used with the 'startcluster'"
	echo "or 'startnode' sub-command"
	usage
fi

# warn operator about the consequences of the -f option
if [ "${forcestart}" = -f ] ; then

	# XXX - allow running a customized script here

	echo "=========================== WARNING ================================="
	echo "=      Multiple Failures have been detected in this cluster         ="
	echo "====================================================================="
	echo
	echo "You are attempting to start up the cluster node '${mynode}' using"
	echo "the -f option.  The -f option allows '${mynode}' to come online when"
	echo "other node(s) and the quorum devices shared with them are not reachable."
	echo "This action could corrupt the database and/or otherwise compromise"
	echo "cluster integrity if used incorrectly. Please read the following"
	echo "instructions carefully and refer to XXX documentation."
	echo 
	echo "Before you proceed, you must verify that:"
	echo "	(1) that the other node(s) are offline (i.e. halted)"
	echo "	(2) the quorum device(s) is offline (i.e. power is off)"
	echo
	echo "Note that you *must* bring the node '${mynode}' offline before"
	echo "restarting the quorum device(s) or any of the other nodes'." \
		" This can be"
	echo "done by executing the command"
	echo
	echo "	/opt/SUNWcluster/bin/scadmin stopnode ${clustname}"
	echo
	echo "Please enter \"yes\" only after you have verified (1) and (2)."

	reply=`ckyorn -Q -d "no" -p " Do you want to continue"`
	case $reply in
			[Nn]|[Nn][Oo])
				echo "exiting..." >& 2 ; exit 0 ;;
	esac
fi

# rjw+
# warn operator about the consequences of the startcluster command  
if [ "${cmd}" = startcluster ] ; then
	# XXX - allow running a customized script here


	echo >& 2
	echo "=========================== WARNING =================================" >& 2
	echo "=                     Creating a new cluster                        =" >& 2
	echo "=====================================================================" >& 2
	echo >& 2
	echo "You are attempting to start up the cluster node '${mynode}' as the" >& 2
	echo "only node in a new cluster.  It is important that no other cluster" >& 2
	echo "nodes be active at this time.  If this node hears from other cluster" >& 2
	echo "nodes, this node will abort.  Other nodes may only join after this" >& 2
	echo "command has completed successfully.  Data corruption may occur if" >& 2
	echo "more than one cluster is active." >& 2
	echo >& 2

	reply=`ckyorn -Q -d "no" -p " Do you want to continue"`
	case $reply in
			[Nn]|[Nn][Oo])
				echo "exiting..." >& 2 ; exit 0 ;;
	esac
fi

# change current working directory to /opt/SUNWcluster/bin to avoid
# all kinds of problems with the 'rm(1)' command when filesystems
# disappear from underneath it or root does not have privileges to
# open the current working directory.

cd /opt/SUNWcluster/bin

# rjw-

# dispatch into reconf_ener

case ${cmd} in
	startnode | stopnode)
		/bin/rm -f ${V}/${clustname}/startcluster
		${B}/reconf_ener ${forcestart} ${async} ${cmd} \
		       ${clustname} \
		      || (echo "$0: errors encountered."; \
			exit 1) || exit 1;;
# rjw+
	startcluster)
		${B}/reconf_ener ${forcestart} ${async} -n startnode ${clustname} \
		      || (echo "$0: errors encountered."; \
			exit 1) || exit 1;;
	continuepartition)
		echo "*** Node ${mynode} will continue participating cluster ${clustname}"
		${B}/clustm continuepartition ${clustname} || \
			(echo "$0: errors encountered."; \
			exit 1) || exit 1;;
	abortpartition)
		echo "*** Node ${mynode} will abort from cluster ${clustname}"
		${B}/clustm abortpartition ${clustname} || \
			(echo "$0: errors encountered."; \
			exit 1) || exit 1;;
	resdisks | reldisks)
		${B}/reconf_ener ${cmd} ${clustname} || \
			(echo "$0: errors encountered."; \
			exit 1) || exit 1;;
	switch)
		if (( $# < 2 )); then
			print "$0: Required parameters missing."
			usage; exit 1
		else
			shift 2
			set -m
			( switchover_cmd $* || \
			  (echo "$0: errors encountered."; exit 1) || exit 1)&
		fi
		;;
# rjw-
	*)
		usage; exit 1 ;;
esac
exit 0

