#!/bin/ksh
#
#pragma ident	"@(#)loghostreconfig.sh	1.21 98/02/23"
#Copyright (C) 1997 Sun Microsystems, Inc.
#
#

# loghostreconfig - Energizer Cluster Reconfiguration Programs for
#                   logical hosts.
#
# Input:
#               All environment variables like "currnodes"/
#               "allnodes"/"localnodeid", CURRSTEP
#
# Action:       Run Reconfiguration Programs based on current
#               "cmmstepN"|"cmmabort"|"cmmstart"|"cmmreturn".
#
# Output:       Return 0 if success.
#               Return 1 if failure
#               Return 200 if reconfigurtion program result is to
#               be ignored.
 
# should be used only the reconf_framework.
pre="SUNWcluster.reconf.loghost"

RECONF_SCRIPTS=${RECONF_SCRIPTS:-/opt/SUNWcluster/etc/reconf/scripts}
CLUSTERBIN=${CLUSTERBIN:-/opt/SUNWcluster/bin/}
CLUSTERVAR=${CLUSTERVAR:-/var/opt/SUNWcluster/}
CLUSTERETC=${CLUSTERETC:-/etc/opt/SUNWcluster/}

DEFAULT_TIMEOUT=180
DEFAULT_WAIT=5
DEFAULT_RETRIES=35
RETRIES=5

INCLUDE=.

# Required for failure fencing code
${INCLUDE} ${RECONF_SCRIPTS}/reconf_ener.disks

#
# set the number of retries for CCD operations.
#
function set_retries_for_ccd_operation
{
  typeset timeout

  set +a

  action=$1

  # For stopnode there is no timeout and hence we will fallback
  # to default timeout since only this node might be in transition
  # stopnode.
  if [[ ${action} != "stopnode" ]]; then
  	timeout=$(${CLUSTERBIN}/cdbmatch cmm.transition.${action}.timeout \
			 ${cdbfile})
  fi

  if [[ "${timeout}" = "" ]]; then
	timeout=${DEFAULT_TIMEOUT}
  fi

  # retries  is equal to timeout / wait_perios which is fixed to 5 seconds

  RETRIES=`expr ${timeout} / ${DEFAULT_WAIT}`

  if [[ ${RETRIES} -lt ${DEFAULT_RETRIES} ]]; then
	RETRIES=${DEFAULT_RETRIES}
  fi

}


########################################################################
#
# function is_member i_curr_nodes i_nodename
#
# i_curr_nodes	- list of space separated node names which are in the
#		  cluster membership
#
# i_nodename	- The nodename to be checked for in the current membership.
#
# Returns 0 (success) if nodename is in the current membership, otherwise
# returns 1 (failure).

function is_member
{
typeset i_curr_nodes n
typeset i_nodename

i_curr_nodes=$1
i_nodename=$2

for n in ${i_curr_nodes}; do
  if [[ "${n}" = "${i_nodename}" ]]; then
    return 0
  fi
done
return 1
}

########################################################################
#
# function first_backup_node i_curr_nodes i_nodelist o_fb_node
#
# i_curr_nodes	- list of space separated node names that are in the
#		  current membership.
#
# i_nodelist	- list of space separated node names, usually the nodes
#		  on which a logical host can be mastered.
#
# o_fb_node	- output parameter which contains the name of the first
#		  backup node in i_nodelist
#
# The first backup node is the first node in the ordered list i_nodelist
# which is in the cluster membership and is not the first node in the
# ordered list.

function first_backup_node
{
typeset i_curr_nodes m
typeset i_nodelist nodelist_array n

i_curr_nodes=$1
i_nodelist=$2
set -A nodelist_array ${i_nodelist}

let n=1
while (( n < ${#nodelist_array[*]} )); do
  for m in ${i_curr_nodes}; do
    if [[ "${nodelist_array[n]}" = "${m}" ]]; then
      eval $3=${nodelist_array[n]}
      return
    fi
  done
  let n=n+1
done

eval $3=""
}

########################################################################
#
# function is_backup i_nodelist i_nodename
#
# i_nodelist	- space separated list of node names
#
# i_nodename	- given node name
#
# Returns 0 if i_nodename is in i_nodelist and is not the first node in
# i_nodelist. Otherwise returns 1 (failure).

function is_backup
{
typeset i_nodelist
typeset nodelist_array n
typeset i_nodename
i_nodelist=$1
i_nodename=$2
set -A nodelist_array ${i_nodelist}
let n=1

while (( n < ${#nodelist_array[*]} )); do
  if [[ "${nodelist_array[n]}" = "${i_nodename}" ]]; then
    return 0
  fi
  let n=n+1
done
return 1
}
########################################################################
#
# function exec_fault_methods method-type
#
# method_type - "fm_start" or "fm_stop"
#
# Calls the appropriate fault monitor method for all registered data
# services.

function exec_fault_methods
{
typeset method method_type
typeset ds_state dslist dsrow ds
typeset timeout default_timeout
typeset all_loghosts lm_loghosts nm_loghosts
typeset loghost_rows loghost_name
typeset cm_row cm
typeset ccdfile

method_type=$1
default_timeout=30

ccdfile=$(${CLUSTERBIN}/ccdadm ${clustname} -w)
dslist=$(${CLUSTERBIN}/hareg | tr '\t' ':')
for dsrow in ${dslist}; do
  ds=${dsrow%:*}
  ds_state=${dsrow#*:}
  
  method=$(${CLUSTERBIN}/hareg -q ${ds} -M ${method_type})
  if [[ -n "${method}" && -x "${method}" ]]; then

    # find out all the logical hosts offering this data service
    # split this list into mastered and not-mastered lists

    all_loghosts=""
    lm_loghosts=""
    nm_loghosts=""
    loghost_rows=$(${CLUSTERBIN}/scccd -f ${ccdfile} ${clustname} LOGHOST_DS \
		query dsname ${ds})


    
    for r in ${loghost_rows}; do
      loghost_name=${r%:*}
      loghost_name=${loghost_name#*:}
      
      if [[ -z "${all_loghosts}" ]]; then
	all_loghosts=${loghost_name}
      else
	all_loghosts="${all_loghosts},${loghost_name}"
      fi

      #
      # retrieve the current master of the logical host
      # only if we are in start transition, for stop methods
      # we do not use current master.
      #

     if [[ "${method_type}" = "start" ||   "${method_type}" = "start_net" || \
        "${method_type}" = "fm_init" || "${method_type}" = "fm_start" ]]; then
      
      # retrieve the current master of the logical host
      cm_row=$(${CLUSTERBIN}/scccd -f ${ccdfile} ${clustname} LOGHOST_CM \
		query lname ${loghost_name})
      cm=${cm_row#*:*:}

      if [[ -n "${cm}" && "${cm}" = "${localhostname}" ]]; then
	if [[ -z "${lm_loghosts}" ]]; then
	  lm_loghosts=${loghost_name}
	else
	  lm_loghosts="${lm_loghosts},${loghost_name}"
	fi
      else
	if [[ -z "${nm_loghosts}" ]]; then
	  nm_loghosts=${loghost_name}
	else
	  nm_loghosts="${nm_loghosts},${loghost_name}"
	fi
      fi
    fi
    done

    timeout=$(${CLUSTERBIN}/hareg -q ${ds} -T ${method_type})
    if [[ -z "${timeout}" ]]; then
      timeout=${default_timeout}
    fi

    #
    # Call fm_start methods only if data-service is on state.
    # 
    #
    if [[ "${method_type}" = "stop" || "${method_type}" = "stop_net" || \
        "${method_type}" = "abort" || "${method_type}" = "abort_net" || \
        "${method_type}" = "fm_stop" ]]; then

	# No need to check if ds_state is on or off and use
	# always all_loghosts. For stop/abort programs, run
        # in real time. Force timeout to be short for the stop methods.

      	${CLUSTERBIN}/timed_run 10 ${method} "" "${all_loghosts}" \
		${timeout} || retval=$?
    else
	# we handle only start methods.
	# call them only if the data service state is ON.
	#
    	if [[ "${ds_state}" = "on" ]]; then
		/usr/bin/priocntl -c TS -e ${CLUSTERBIN}/timed_run ${timeout} \
			${method} "${lm_loghosts}" "${nm_loghosts}" ${timeout} \
			retval=$?
	fi
    fi
  fi
done
}

########################################################################
#
# function loghoststop_cmd
#
# This function is executed during the stop and abort transitions. It
# removes all the logical hosts being mastered by this node from the
# cluster configuration database.

function loghoststop_cmd
{
typeset r rows
typeset lhost
typeset ccdfile
if [[ "${CURRSTEP}" = "cmmabort" ]]; then

    #
    # There is every possibility that loghost entries may not have
    # been removed in stopnode transition and hence instead of
    # checking for this FILE, we could get the LOGHOST_CM entries
    # thru STATIC CCD and then if there are any entries then
    # proceed with FM_STOP methods.
    #
    # if [[ ! -f "${DOINGSTOPFLAG}" ]]; then
    #

    ccdfile=$(${CLUSTERBIN}/ccdadm ${clustname} -w)

    if [[ -z ${ccdfile} && "${CLNODEUP}" -ne 0 ]]; then
	log_info "${pre}.4100" "Could not get a valid static ccd file."
	exit 1
    elif [[ -z ${ccdfile} && "${CLNODEUP}" -eq 0 ]]; then
	# no valid ccd file and this is the first attempt to join.
	# in this case don't do anything since scccd will return
	# an error if the -f option is supplied without a filename.
	# and since we have not been a cluster member yet we have
	# nothing mastered so in turn nothing to give up.
	return 0
    fi

    # This is the abort case. We call loghost_sync directly and simulate
    # its expected arguments since we want to give up the mastered logical
    # hosts without updating the CCD
    
    numnodes=$(print ${allnodes} | wc -w)
    rows=$(${CLUSTERBIN}/scccd -f ${ccdfile}  ${clustname} LOGHOST_CM query \
		"curr_master" "${localhostname}")

    if [[ ! -z "${rows}" ]]; then
	# stop all fault monitors - abort case
	exec_fault_methods fm_stop
    fi

    for r in ${rows}; do
      lhost=${r%:*}
      lhost=${lhost#*:}

      /bin/rm -rf ${CLUSTERVAR}/${lhost}.abort
      /bin/rm -rf ${CLUSTERVAR}/loghost.${lhost}
     ( ${CLUSTERBIN}/loghost_sync CCDSYNC_PRE_REMOVE "${r}" ${ccdfile} \
                "${numnodes}" "${currnodes}" "${localnodeid}" || \
	echo $? > ${CLUSTERVAR}/${lhost}.abort ) &
   done

   wait

   for r in ${rows}; do
      lhost=${r%:*}
      lhost=${lhost#*:}

      if ( [ -f ${CLUSTERVAR}/${lhost}.abort ] || [ -f ${CLUSTERVAR}/loghost.${lhost} ] ) \
		then
	log_info "${pre}.4010" "Give up of logical host ${lhost} failed."
	/bin/rm -rf ${CLUSTERVAR}/${lhost}.abort
	exit 1
      fi 
   done
  return 0
fi

# stop all fault monitors - stop case
exec_fault_methods fm_stop

#
# There is a Possibility that other nodes are also executing
# the pdbadmin stopnode, in which case the LOGHOST_CM query
# might return NULL (since we are accessing dynamic CCD)
# and we would continue with stop and then abort
# XXX:
#   1. First get the LOGHOST_CM queries using static CCD.
#   (This should not do harm since LOGHOST_CM can be modified only
#   if someone else is doing a takeover).
#		OR
#   2. Increase the timeout when querying the LOGHOST_CM
#		OR
#   3. First Query using static CCD.
# 	If there are entries keep qurying until query is retrieved.
#   while :
#    static-query;
#    if no entries break;
#    else
#    query-dynamic ccd
#	if rows="" then
#	  continue;
#    else
#        break;
#

rows=$(${CLUSTERBIN}/scccd -r ${RETRIES} -w 5 ${clustname} LOGHOST_CM query \
		"curr_master" "${localhostname}")

for r in ${rows}; do
  lhost=${r%:*}
  lhost=${lhost#*:}
  
  ${CLUSTERBIN}/scccd -r ${RETRIES} -w 5 ${clustname} LOGHOST_CM remove "lname"\
		"${lhost}"
  if [[ "$?" -ne 0 ]]; then
      #
      # Recovery Action:
      # Add the Row Back to where it is being mastered.
      # If it is restored, exit with stopnode
      # XXX: When we execute rcK.d, we may have executed other programs
      # hence not sure if we need to exit with 1.

      log_info "${pre}.4010" "Give up of logical host ${lhost} failed."

      ${CLUSTERBIN}/scccd -r ${RETRIES} -w 5 ${clustname} LOGHOST_CM add \
		"lname:curr_master" "${lhost}:${localhostname}"

      if [[ "$?" -ne 0 ]]; then
	  #
          # abort the node out of the cluster to prevent leaving the logical
          # host in an inconsistent state.
          #
          /bin/rm -f ${DOINGSTOPFLAG}
          ${CLUSTERBIN}/clustm abort ${clustname} this
      fi
      /bin/rm -f ${DOINGSTOPFLAG}
      exit 1
  fi
done
}

########################################################################
#
# function loghoststep1_cmd
#
# This function executes step 1 of the logical host reconfiguration. All hosts
# participating in the reconfiguration give up logical hosts that they are
# supposed to give up in preparation for the takeover during the next step.

function loghoststep1_cmd
{
typeset r rows
typeset nodelist dglist m_mode
typeset lhost lhostrow
typeset nodelist_array
typeset primary_node
# retrieve all logical hosts for which this node is the current master

rows=$(${CLUSTERBIN}/scccd -r ${RETRIES} -w 5 ${clustname} LOGHOST_CM query \
		"curr_master" "${localhostname}")

for r in ${rows}; do
  lhost=${r%:*}
  lhost=${lhost#*:}

  # retrieve information about the logical host

  lhostrow=$(${CLUSTERBIN}/scccd -r ${RETRIES} -w 5 ${clustname} LOGHOST query \
		"lname" "${lhost}")
  dglist=${lhostrow%:*:*}
  dglist=${dglist#*:*:*:}
  dglist=$(print ${dglist} | tr ',' ' ')

  nodelist=${lhostrow%:*:*:*}
  nodelist=${nodelist#*:*:}
  nodelist=$(print ${nodelist} | tr ',' ' ')
  set -A nodelist_array ${nodelist}
  primary_node=${nodelist_array[0]}

  m_mode=${lhostrow#*:*:*:*:*:}

  # If I own this logical host and I am just
  # joining the cluster, remove this LOGHOST_CM entry. This row will
  # be added in the subsequent step, but it serves to avoid confusing the
  # fault monitors. This row must have been left around from the last time
  # this node aborted or paniced.

  if [[  "${CLNODEUP}" -eq 0 ]]; then
    ${CLUSTERBIN}/scccd -r ${RETRIES} -w 5 ${clustname} LOGHOST_CM remove "lname"\
		"${lhost}"
    #
    # No Action Needs to taken since it should always succeed, since this
    # is a left over from previous cluster.
    #
  else
    if is_backup "${nodelist}" ${localhostname}; then

      # If the primary node is in the cluster membership and manual mode is
      # off, give up the logical host as the primary will take it over in
      # the next step. If manual mode is on, we should not give up the
      # logical host, but we should just release any reservations in case
      # the primary takes over at a later point in time.

      if is_member "${curr_members}" ${primary_node}; then
	if [[ "${m_mode}" -eq 0 ]]; then
	  ${CLUSTERBIN}/scccd -r ${RETRIES} -w 5 ${clustname} LOGHOST_CM remove \
		 "lname" "${lhost}"
	  if [[ "$?" -ne 0 ]]; then
	    #
	    # Recovery:
	    # If removal of CCD ROW fails, we add it here so that
	    # this nodes will remaster the node. Add a Message in the console
	    # Log so that user knows about it and does not complain
	    # that  in m_mode = 0, the logical host is not being moved.
	    # If Add fails again then it would be better to exit.
	    #
	    ${CLUSTERBIN}/scccd -r ${RETRIES} -w 5 ${clustname} LOGHOST_CM add \
		"lname:curr_master" "${lhost}:${localhostname}"
	    if [[ "$?" -ne 0 ]]; then
	     log_info "${pre}.4010" "Give up of logical host ${lhost} failed."
	     #
	     # return transition failure which will cause cmm to abort the
	     # node out of the cluster
	     #
	     exit 1
	    else 
	     log_info "${pre}.1400" "Could not give up logical host ${lhost}, ${localhostname} will still master it"
	    fi
	  fi
	  continue
	else
	  failure_fencing release private ${dglist}
	fi
      else
	# primary node of this logical host is not in the cluster membership.
	# Reserve the diskgroups if no other node that can master this logical
	# host is in the membership. Otherwise, release the reservations.

	if anyone_else_present "${nodelist}" "${curr_members}" ${localhostname}; then
	  failure_fencing release private ${dglist}
	else
	  failure_fencing reserve private ${dglist}
	fi
      fi
      #
      # we may need to mount the filesystem if there are any additions.
      # Hence call scnfs
      #
      ${CLUSTERBIN}/scnfs ${clustname} mount ${lhost}
    fi
  fi
done
# now that all logical host have given away, calling FM_INIT
# so that it gets called on the logical hosts that are mastered
# on this node.(after this point we have the list of logical
# hosts that will remain mastered on this node)
# For logical hosts that will be taken over, loghost_sync will
# take care of it.
# XXX: Should we call START/START_NET here..
exec_fault_methods fm_init

}

########################################################################
#
# function loghoststep2_cmd
#
# This function executes step2 of the logical host reconfiguration. In
# this step, all nodes that are supposed to take over or master logical
# hosts will do so. The correctness of the cluster reconfiguration
# protocols should ensure that all logical hosts that are going to be
# taken over in this step should have been given up by their corresponding
# masters before this step.

function loghoststep2_cmd
{
typeset r rows
typeset nodelist dglist lhost
typeset nodelist_array primary_node fb_node
typeset mstate
typeset ccdfile
# retrieve all logical hosts in the cluster

ccdfile=$(${CLUSTERBIN}/ccdadm ${clustname} -w)

rows=$(${CLUSTERBIN}/scccd -f ${ccdfile}  ${clustname} LOGHOST query "lname" "")

for r in ${rows}; do
  lhost=${r%:*:*:*:*}
  lhost=${lhost#*:}

  dglist=${r%:*:*}
  dglist=${dglist#*:*:*:}
  dglist=$(print ${dglist} | tr ',' ' ')

  nodelist=${r%:*:*:*}
  nodelist=${nodelist#*:*:}
  nodelist=$(print ${nodelist} | tr ',' ' ')
  set -A nodelist_array ${nodelist}
  primary_node=${nodelist_array[0]}

  #
  # If the Logical Host is in maintenance mode then we should not
  # touch it.
  #
  mstate=$(${CLUSTERBIN}/scccd -f ${ccdfile} ${clustname} LOGHOST_MSTATE \
		query "lname" ${lhost})
  mstate=${mstate#*:*:}
  if [[ "${mstate}" -ne "1" ]]; then
		continue;
  fi



  # get the first backup node for this logical host

  fb_node=""
  first_backup_node "${curr_members}" "${nodelist}" fb_node

  # all processing for this step applies only if the local host is either
  # the primary or the first backup node for this logical host

  if [[ "${primary_node}" = "${localhostname}" || \
        "${fb_node}" = "${localhostname}" ]]; then

    # retrieve current master of the logical host
    curr_master=$(${CLUSTERBIN}/scccd -f ${ccdfile} ${clustname} LOGHOST_CM \
			query "lname" "${lhost}")

    # if there is no current master, the primary or the first backup (if
    # primary is not in the cluster membership) should take over this
    # logical host.

    if [[ -z "${curr_master}" ]]; then
      if ( [[ "${primary_node}" = "${localhostname}" ]] ) || \
         ( [[ "${fb_node}" = "${localhostname}" ]] && \
	    ! is_member "${curr_members}" ${primary_node} ) ; then

	${CLUSTERBIN}/scccd -r ${RETRIES} -w 5 ${clustname} LOGHOST_CM add \
		"lname:curr_master"  "${lhost}:${localhostname}"

	if [[ "$?" -ne 0 ]]; then
	  #
	  # Clean Up the Mess That would have  been left by LOGHOST_CM add 
	  # failure;
	  ${CLUSTERBIN}/scccd -r ${RETRIES} -w 5 ${clustname} LOGHOST_CM remove \
			lname "${lhost}"
	  if [[ "$?" -ne 0 ]]; then
		log_info "${pre}.4200" "Unable to to restore consistent data service status while taking over logical host ${lhost}"
		exit 1
	  fi
	  log_info "${pre}.4020" "Take over of logical host ${lhost} failed."
	fi
      fi
      continue
    fi

    # If this host owns the logical host and and is already a cluster
    # member, then we just need to release the reservations on the disks if
    # any other host that can master this logical host is in the membership.
    # Otherwise, we reserve the disks.

    curr_master=${curr_master#*:*:}
    if [[ "${curr_master}" = "${localhostname}" ]]; then
      if anyone_else_present "${nodelist}" "${curr_members}" ${localhostname}; then
	failure_fencing release private ${dglist}
      else
	failure_fencing reserve private ${dglist}
      fi
    else

      # some other host is the current master. If the current master is not in
      # the cluster membership, then the primary or the first backup (if the
      # primary is not in the membership) should take over this logical host
      # after first removing the row
      # the loghost_sync will take care of calling fm_init.

      if ! is_member "${curr_members}" ${curr_master} ; then
	if ( [[ "${primary_node}" = "${localhostname}" ]] ) || \
	   ( [[ "${fb_node}" = "${localhostname}" ]] && \
	        ! is_member "${curr_members}" ${primary_node} ) ; then

	    ${CLUSTERBIN}/scccd -r ${RETRIES} -w 5 ${clustname} LOGHOST_CM remove \
			   lname "${lhost}"

	    if [[ "$?" -ne 0 ]]; then
		log_info "${pre}.5001" "Unable to giveup logical host ${lhost}, ignoring logical host ${lhost}"
		continue;
	    fi

	    ${CLUSTERBIN}/scccd -r ${RETRIES} -w 5 ${clustname} LOGHOST_CM add \
		"lname:curr_master" "${lhost}:${localhostname}"

	    if [[ "$?" -ne 0 ]]; then

		#
		# Clean UP the mess left to LOGHOST_CM add.
		#

		${CLUSTERBIN}/scccd -r ${RETRIES} -w 5 ${clustname} LOGHOST_CM \
				remove  lname "${lhost}"

		if [[ "$?" -ne 0 ]]; then
			log_info "${pre}.4200" \
			"Unable to to restore consistent data service status while taking over logical host ${lhost}"
			exit 1
		fi

		log_info "${pre}.4020" "Take over of logical host ${lhost} failed."
	    fi
	fi
      fi
    fi
  fi
done
}



localhostname=$(${CLUSTERBIN}/cdbmatch cluster.node.${localnodeid}.hostname \
			${cdbfile})


curr_members=""
for n in ${currnodes}; do
  nodename=$(${CLUSTERBIN}/cdbmatch cluster.node.${n}.hostname ${cdbfile})
  curr_members="${curr_members} ${nodename}"
done

case ${CURRSTEP} in
  cmmabort | stopnode)
	set_retries_for_ccd_operation stopnode;
	loghoststop_cmd ;;
  cmmstep4)
	exec_fault_methods fm_stop ;;
  cmmstep10)
	# first parameter should match the step number;
	set_retries_for_ccd_operation step10
	loghoststep1_cmd ;;
  cmmstep11)
	# first parameter should match the step number.
	set_retries_for_ccd_operation step11
	loghoststep2_cmd ;;
  cmmstep12)
	exec_fault_methods fm_start ;;
  *) ;;
esac

exit 0
