#! /bin/sh
#
#ident "@(#)nshttp_probe.shi   1.10     97/07/15 SMI"
#
#	Copyright 07/15/97 Sun Microsystems, Inc.  All Rights Reserved.
#

# Usage: nshttp_probe <instance name>
# Started up in the background via pmfd in nshttp_fm_start
# during reconfiguration.

#
# Add the path to framework binaries, since the probe is not called in the
# context of the methods
#
PATH=${PATH}:/opt/SUNWhadf/bin:/opt/SUNWhadf/fault_progs

INST_NAME=$1

#
#	Copyright 11/18/96 Sun Microsystems, Inc.  All Rights Reserved.
#
#ident "@(#)ds_boiler		1.7	96/11/18 SMI"
#
# common boiler for HA Internet Pro data services
#
#


ARGV0=`basename $0`
LOGGER=logger
HA_SLOGFACILITY=`haget -f syslog_facility`
HA_SLOGTAG=hadf
prog_path=`dirname $0`
# add the ha-service specific clust_progs path since
# it will not co-reside with the framework clust_progs.
# if it's a fault monitor method, add also fault_progs.
expr "$prog_path" : '.*/clust_progs' >/dev/null 2>&1
if [ $? -eq 0 ]; then
	PATH=${prog_path}:${PATH}
else
	PATH=${prog_path}:${prog_path}/../clust_progs:${PATH}
fi

# source in ha-services common utilities
. ds_utilities

#
# for use by subsequent hactl command, get hostnames of local and sibling hosts
#
LOCALHOST=`uname -n`
if [ $? -ne 0 ]; then
	logerr `gettext "Cannot obtain name of local host"`
	exit 1
fi
# compute hostname of sibling
PHYS_HOSTS="`/opt/SUNWhadf/bin/haget -f all_physical_hosts`"
if [ `count_items $PHYS_HOSTS` -ne 2 ]; then
	logerr `gettext "Cannot compute hostname of sibling"`
	exit 1
fi
for i in $PHYS_HOSTS; do
	if [ "$i" != "$LOCALHOST" ]; then
		REMOTEHOST=$i
	fi
done

#! /bin/sh 
#
#	Copyright 12/20/96 Sun Microsystems, Inc.  All Rights Reserved.
#
#ident "@(#)do_service		1.13	96/12/20 SMI"
#
#

ARG_MASTERED=$1
ARG_NOT_MASTERED=$2

# Replace comma with space to form an sh word list
MASTERED="`echo $ARG_MASTERED | tr ',' ' '`"
NOT_MASTERED="`echo $ARG_NOT_MASTERED | tr ',' ' '`"

# This file is replicated on both servers.
HAHTTP_CONFIG_FILE=/etc/opt/SUNWhahtt/hadsconf

#
# Call the parser to handle the config file.
#
if [ ! -f $HAHTTP_CONFIG_FILE ]; then
	logerr `gettext "$HAHTTP_CONFIG_FILE doesn't exist"`
	exit 1
fi

source_env_file $HAHTTP_CONFIG_FILE
if [ $? -ne 0 ]; then
	# source_env_file logs error message if it fails.
	# No need to log another; just exit.
	exit 1
fi

#
# Timeout to waiting for SIGTERM to stop a process
# This should be in the config file
#
STOP_TIMEOUT=15
#
# bundle_do_svc <action>
#
# is called for each instance
#
bundle_do_svc ()
{
	action=$1

	HTTP_START=${_INST_BASE_DIR}/start
	HTTP_STOP=${_INST_BASE_DIR}/stop

	case $action in

	'start')

		# First do some error checking.

		if [ ! -x $HTTP_START ]; then
			logerr `gettext "<$HTTP_START> is not executable."`
			exit 1
		fi

		# The process monitor facility calls the start program,
		# passing to it the instance-specific information it needs.
		# Note that we're using pmf to start/stop, but not to probe.

		if [ ${_INST_RETRY} = "n" ]; then
			pmfadm -c ${_INST_NAME} $HTTP_START
		else
			pmfadm -c ${_INST_NAME}          \
			       -n ${_INST_RETRY_TIMES}    \
			       -t ${_INST_RETRY_INTERVAL} \
			       -a ${_INST_PROBE_CALLBACK_1} \
			       $HTTP_START
		fi

		if [ $? -ne 0 ]; then
			logerr \
		`gettext "pmfadm failed to start HTTP instance ${_INST_NAME}"`
			exit 1
		else
			lognotice \
			    `gettext "Started HTTP instance ${_INST_NAME}"`

		fi
	;;

	'stop' | 'abort')

		# delete from queue, but don't kill
		pmfadm -s ${_INST_NAME}
		if [ $? -ne 0 ]; then
			logerr \
		`gettext "pmfadm failed to delete ${_INST_NAME} from queue"`
			exit 1
		fi

		# use Netscape's stop script to stop nshttp instance
		if [ ! -x $HTTP_STOP ]; then
			logerr `gettext "<$HTTP_STOP> is not executable"`
			exit 1
		fi

		$HTTP_STOP

		if [ $? -ne 0 ]; then
			logerr `gettext "$HTTP_STOP execution failed."`
		else
			lognotice \
			    `gettext "Stopped HTTP instance ${_INST_NAME}"`
		fi
	;;

	'fm_start')

		# pmf starts nshttp_probe
		# nshttp_probe runs until nshttp_fm_stop kills it.
		# Don't start probe if diskset is in maintenance mode.

		# If this HTTP instance's diskset is in maint mode, exit now.
		MAINT=`haget -f is_maint -h ${_INST_LOGICAL_HOST}`
		if [ "$MAINT" = "1" ]; then
			exit 0
		fi

		pmfadm -c ${_INST_NAME}.probe ${_INST_PROBE_PROG_1} \
		    ${_INST_NAME}

		if [ $? -ne 0 ]; then
			logerr \
`gettext "pmfadm failed to start HTTP probe for instance ${_INST_NAME}"`
			exit 1
		else
			lognotice \
		`gettext "Started HTTP probe instance ${_INST_NAME}.probe"`
		fi
	;;

	'fm_stop')

		# If probe not running, do nothing
		ha_svc_not_running ${_INST_NAME}.probe && exit 0

		# pmf kills nshttp_probe
		pmfadm -s ${_INST_NAME}.probe -w ${STOP_TIMEOUT} TERM || \
			pmfadm -s ${_INST_NAME}.probe KILL
		if [ $? -ne 0 ]; then
			logerr \
`gettext "pmfadm failed to stop HTTP probe instance ${_INST_NAME}.probe"`
			exit 1
		else
			lognotice \
		`gettext "Stopped HTTP probe instance ${_INST_NAME}.probe"`
		fi
	;;

	'fm_check_this_host_ok')

		# If the HA-HTTP logical host for this instance
		# is not currently mastered by this machine, exit now.

		is_member "${_INST_LOGICAL_HOST}" "$MASTERED"
		if [ $? -ne 0 ]; then
			exit 0
		fi

		# Otherwise, probe Web service now.
		# If dead, request will time out in
		# ${_INST_PROBE_TIMEOUT_1} secs.

		HTTPPROBEFILE=/tmp/${_INST_NAME}.probe.$$

		hatimerun -t ${_INST_PROBE_TIMEOUT_1} \
		    /usr/bin/telnet ${_INST_LOGICAL_HOST} \
		    ${_INST_PORT} <<EOF > $HTTPPROBEFILE 2>&1

EOF
		if [ $? -eq 99 ]; then
			# timeout
logerr `gettext "This server is supposed to be providing HTTP service for instance <${_INST_NAME}>, but request timed out"`
			exit 1
		fi
		grep refused $HTTPPROBEFILE > /dev/null 2>&1
		if [ $? -eq 0 ]; then
logerr `gettext "This server is supposed to be providing HTTP service for instance <${_INST_NAME}>, but isn't"`
			exit 1
		fi
	;;

	esac

	exit 0
}
#include_boiler


if [ -z "$INST_NAME" ]; then
	logerr `gettext "Usage: $ARGV0 <instance>"`
	exit 1
fi

MASTERED_LOGICAL_HOSTS="`haget -f mastered`"

HTTP_PORT=`get_config_param $INST_NAME PORT`
# required parameter
if [ -z "$HTTP_PORT" ]; then
	logerr \
	    `gettext "HTTP_PORT value not set for instance $INST_NAME"`
	exit 1
fi

HTTP_HOST=`get_config_param $INST_NAME LOGICAL_HOST`
# parser requires this to be set

HTTP_PROBE_INTERVAL=`get_config_param $INST_NAME PROBE_1_INTERVAL`
# parser requires this to be set, but doesn't check for negative values
if [ $HTTP_PROBE_INTERVAL -lt 0 ]; then
	lognotice \
`gettext "INTERVAL value is negative for instance $INST_NAME; using 60 seconds"`
	HTTP_PROBE_INTERVAL=60
fi

HTTP_PROBE_TIMEOUT=`get_config_param $INST_NAME PROBE_1_TIMEOUT`
# optional parameter, parser doesn't check for <= 0 values
if [ -z "$HTTP_PROBE_TIMEOUT" ]; then
	lognotice \
`gettext "TIMEOUT value not set for instance $INST_NAME; using 60 seconds"`
	HTTP_PROBE_TIMEOUT=60
fi
# what timeout value is too low?
if [ $HTTP_PROBE_TIMEOUT -le 0 ]; then
	lognotice \
`gettext "TIMEOUT is <= zero for instance $INST_NAME; resetting to 60 seconds"`
	HTTP_PROBE_TIMEOUT=60
fi

HTTP_TAKEOVER=`get_config_param $INST_NAME PROBE_1_TAKEOVER`
# optional parameter
if [ -z "$HTTP_TAKEOVER" ]; then
	lognotice \
	   `gettext "TAKEOVER value not set for instance $INST_NAME; using 'y'"`
	HTTP_TAKEOVER=y
fi

LOCAL=no
is_member "$HTTP_HOST" "$MASTERED_LOGICAL_HOSTS"
if [ $? -eq 0 ]; then
	# HTTP_HOST is running locally
	LOCAL=yes
fi

HTTPGRACE=0
HTTPPROBEFILE=/tmp/.nshttp_probe
RETRY=0

while : ; do
	hatimerun -t $HTTP_PROBE_TIMEOUT tcpclnt -a $HTTP_HOST $HTTP_PORT "HEAD / HTTP/1.0" "" > $HTTPPROBEFILE 2>&1
	if [ $? -ne 0 ]; then
		# If running locally, restart it.
		# If it fails to restart successfully, the sibling
		# will eventually take over if TAKEOVER set to 'y'.
		if [ $LOCAL = "yes" ]; then
			logerr \
			   `gettext "nshttp instance $INST_NAME failed locally"`
			RETRY=`expr $RETRY + 1`
			logerr \
	`gettext "restarting nshttp instance $INST_NAME; restart number $RETRY"`
			nshttp_svc_start "$MASTERED_LOGICAL_HOSTS" ""
		else
			logerr \
		`gettext "nshttp instance $INST_NAME failed on sibling"`
			# give sibling chance to restart nshttp
			# before doing a takeover
			if [ $HTTPGRACE -eq 0 ]; then
				HTTPGRACE=1
				sleep $HTTP_PROBE_INTERVAL
				continue
			fi
			if [ "$HTTP_TAKEOVER" = "y" ]; then
				# $REMOTEHOST set in ds_boiler
				hactl -t -s nshttp -p $REMOTEHOST
			fi
		fi
	else
		HTTPGRACE=0
	fi

	sleep $HTTP_PROBE_INTERVAL

done
