#! /usr/bin/sh
#
# ident	"@(#)bringup.sh	1.59	98/03/05 SMI"
#
# Copyright (c) 1996 by Sun Microsystems, Inc.
# All rights reserved.
#
# bringup
#
#	The bringup script boots up a domain identified by the
#	SUNW_HOSTNAME environment variable.
#
#	The process of booting a domain consists in:
#	- check that no other bringup is running on this domain
#	- check that domain is powered on.
#	- check that the domain is not currently up.
#	- kill the obp_helper and netcon_server running on the domain.
#	- execute hpost.
#	- start new obp_helper.
#	- start new netcon_server.
#	- update the Starfire MIB according to new domain configuration
#

# Procedure to Check if remaining domains are UP.
Any_other_domain_UP()
{
	if test -f "${SSPVAR}/.ssp_private/domain_config"
	then
		if test -z "${SSPVAR}/.ssp_private/domain_config"
		then
			# Empty domain_config file, no domains have been created
			echo "No domain has been created!!"
			echo "Please run domain_create and then bringup"
			cleanup
			exit 1
		fi
		# See how many domains are created
		domain_status | grep -v SYSBDS | grep -v OS | awk '{print $1}' | grep -v \
		"^$SUNW_HOSTNAME$" > /tmp/temp$$
		# n is the number of domains minus the current domain.
		n=`wc -l /tmp/temp$$ | awk '{print $1}'`
		if [ "$n" -eq 0 ]
		then
			# No domain other than the current domain has been created.
			/bin/rm -f /tmp/temp$$
			return 0
		else
			# More than one domain exists , check if any of them is active.
			domain_list=`cat /tmp/temp$$`
			cray_hostname=$SUNW_HOSTNAME
			for domain in ${domain_list}
			do
				SUNW_HOSTNAME=$domain
				export SUNW_HOSTNAME


				# Is hpost running on this domain?
				if test -f "${SSPVAR}/adm/${domain}/hpost.lock"
				then
					hpost_pid=`cat ${SSPVAR}/adm/${domain}/hpost.lock`
					if test -n "$hpost_pid"
					then
						ps -ef | grep hpost | grep -v grep | grep " ${hpost_pid} " > /dev/null 2>&1
						if test $? -eq 0
						then
							# Yes! hpost running
							if [ "$CH_ARGS" != "" ]
							then
								logger -p local0.info \
								-t "bringup-(SUNW_HOSTNAME:${cray_hostname})" \
								"INFO : domain $SUNW_HOSTNAME is active. hpost running"
							fi
							echo "Another hpost is in progress"
							SUNW_HOSTNAME=$cray_hostname
							export "SUNW_HOSTNAME"
							/bin/rm -f /tmp/temp$$
							return 1
						fi
					fi
				fi

				# ping the host, timeout after 3 seconds
				#ping $domain 3 > /dev/null 2>&1
				#if test $? -eq 0
				#then
				#	logger -p local0.info \
				#		-t "bringup-(SUNW_HOSTNAME:${cray_hostname})" \
				#		"INFO : domain $SUNW_HOSTNAME is UP - ping!"
				#	echo "At least one domain is active"
				#	SUNW_HOSTNAME=$cray_hostname
				#	export "SUNW_HOSTNAME"
				#	/bin/rm -f /tmp/temp$$
				#	return 1
				#fi

				# Is domain active?
				check_host -qb
				val=$?
				if [ "$val" -eq 0 ]
				then
					# Domain $SUNW_HOSTNAME is active
					if [ "$CH_ARGS" != "" ]
					then
						logger -p local0.info \
						-t "bringup-(SUNW_HOSTNAME:${cray_hostname})" \
						"INFO : domain $SUNW_HOSTNAME is active"
					fi
					echo "At least one domain is active"
					SUNW_HOSTNAME=$cray_hostname
					export "SUNW_HOSTNAME"
					/bin/rm -f /tmp/temp$$
					return 1
				fi

				if [ "$val" -lt 0 ] || [ "$val" -gt 1 ]
				then
					if [ "$CH_ARGS" != "" ]
					then
						logger -p local0.info \
						-t "bringup-(SUNW_HOSTNAME:${cray_hostname})" \
						"INFO : check_host $SUNW_HOSTNAME failed"
					fi
					echo "WARNING: check_host failed for host $SUNW_HOSTNAME"
					SUNW_HOSTNAME=$cray_hostname
					export "SUNW_HOSTNAME"
					/bin/rm -f /tmp/temp$$
					return 1
				fi

				# No active domain
				if [ "$CH_ARGS" != "" ]
				then
					logger -p local0.info \
					-t "bringup-(SUNW_HOSTNAME:${cray_hostname})" \
					"INFO : domain $SUNW_HOSTNAME is inactive"
				fi
			done
			SUNW_HOSTNAME=$cray_hostname
			export "SUNW_HOSTNAME"
			/bin/rm -f /tmp/temp$$
			return 0
		fi
	else
		# No domain_config file, exit
		echo "No domain has been created!!"
		echo "Please create the domain and then bringup"
		cleanup
		exit 1
	fi
}

#
# Procedure to clean up bringup when a signal is received.
#
cleanup()
{
	# kill hpost if appropriate
	if test -f "${SSPVAR}/adm/${SUNW_HOSTNAME}/hpost.lock"
	then
		hpost_pid=`cat ${SSPVAR}/adm/${SUNW_HOSTNAME}/hpost.lock`
		kill -TERM $hpost_pid > /dev/null 2>&1
		/bin/rm -f ${SSPVAR}/adm/${SUNW_HOSTNAME}/hpost.lock
	fi

	# kill process holding lock for bringup
	if [ "$lock_pid" -ne 0 ]
	then
		kill -TERM $lock_pid > /dev/null 2>&1
	fi

	logger -p local0.info \
		-t "bringup-(SUNW_HOSTNAME:${SUNW_HOSTNAME})" \
		"INFO : BRINGUP TERMINATED!!"

	echo "bringup terminated!!"
	exit 1
}

#
# Procedure to obtain the bringup.lock file
#
getLock()
{
	ans=`scotty -nc "bringup_lock 2"`
	if [ "$ans" -eq 0 ]
	then
		# Go ahead, grab the lock
		trap "cleanup" INT HUP TERM KILL
		scotty -nc "bringup_lock 9" &
		lock_pid=$!
#		Fix the problem with another bringup getting the lock before the back ground task can be started
		sleep 10
		ps -ef | grep scotty | grep -v grep | grep " ${lock_pid} " > /dev/null 2>&1
		if test $? -eq 0  
		then 
			return $lock_pid
		else
			lock_pid=0
			return 0
		fi
	else
		# This call hangs till lock is released by another bringup client
		echo "Another bringup is configuring the centerplane or a hpostdump is running."
		echo "Waiting for this operation to complete...\c"
		scotty -nc "bringup_lock 0"
		echo ""
		return 0
	fi
}

#
# Procedure to get a Y(es) or N(o) answer from the user
#
getSelection()
{
	while /bin/true
	do
		read selection
		selection=`echo "$selection" | tr '[A-Z]'`
		if [ "$selection" = "y" -o "$selection" = "yes" ]
		then
			return 0
		else
			if [ "$selection" = "n" -o "$selection" = "no" ]
			then
				return 1
			else
				echo "bringup: Please enter y(es) or n(o)  \c";
			fi
		fi
	done
}

#
# test SSP environment variables
#
#
#	Bringup starts here.
#
if test -z "${SSPVAR}"
then
	echo "No SSPVAR environment variable. Cannot proceed."
	exit 9
fi
if test -z "${SUNW_HOSTNAME}"
then
	echo "No SUNW_HOSTNAME environment variable. Cannot proceed."
	exit 9
fi
if test -z "${SSPETC}"
then
	echo "No SSPETC environment variable. Cannot proceed."
	exit 9
fi


#
# Check that no other bringup command is running; otherwise exit.
#
if test -d "${SSPVAR}/pids"
then
	touch ${SSPVAR}/pids/bringup-${SUNW_HOSTNAME}.pid
else
	echo "Directory: ${SSPVAR}/pids is missing. Cannot proceed."
	exit 1
fi

bringupid=`cat ${SSPVAR}/pids/bringup-${SUNW_HOSTNAME}.pid`
if test -n "$bringupid"
then
	ps -ef | grep bringup | grep -v grep | grep " ${bringupid} " > /dev/null 2>&1
	if test $? -eq 0
	then
		echo "Another bringup command is running, sorry exiting!"
		exit 1
	fi
fi

#
# Put our PID in the bringup-${SUNW_HOSTNAME}.pid file.
#
echo $$ > ${SSPVAR}/pids/bringup-${SUNW_HOSTNAME}.pid

#
# Lock bringup.lock resource
#
lock_pid=0
while [ "$lock_pid" -eq 0 ]
do
	getLock
	sleep 1
done

POSTARGS=""; verbose=0; force=0; FORCE=0; unknowns=0; unk=""
BOOTARGS=""; cplane_config=0; CH_ARGS="-B"

#
# Go through the command line arguments
#
for a in $*
do
	arg=`echo $a | sed -e 's/\(..\).*/\1/'`
	case $arg in
	-B)
		# Internal use, send info messages to domain's log
		CH_ARGS="-B";
		;;
	-v)
		verbose=1;
		POSTARGS="${POSTARGS} -v70";
		;;
	-f)
		# skip check for domain UP|DOWN
		force=1;
		;;
	-F)
		# used by automatic recovery scripts
		FORCE=1;
		;;
	-g)
		# This is strictly for POST to maintain a log file.
		POSTARGS="${POSTARGS} -g"
		;;
	-p)
		pnum=`echo $a | sed -e 's/..\(.*\)/\1/'`;
		POSTARGS="${POSTARGS} -p${pnum}";
		;;
	-J)
		# Jam requested bus config with minimal testing.
		jammer=`echo $a | sed -e 's/..\(.*\)/\1/'`
		POSTARGS="${POSTARGS} -J${jammer}";
		;;
	-L)
		# This is used to have hpost send everyting to syslog.
		POSTARGS="${POSTARGS} -s -v10";
		;;
	-Q)
		# quick reboot changed to use -l7 for patch update to bug 4059181
		POSTARGS="${POSTARGS} -l7";
		;;
	-l)
		# This is used to have hpost send everything to syslog.
		level=`echo $a | sed -e 's/..\(.*\)/\1/'`
		POSTARGS="${POSTARGS} -l${level}";
		;;
	-C)
		# Have hpost do initial centerplane configuration.
		cplane_config=1
		;;
	-X)
		# Have hpost look at a different blacklist file.
		blacklistfile=`echo $a | sed -e 's/..\(.*\)/\1/'`
		POSTARGS="${POSTARGS} -X${blacklistfile}"
		;;
	*)
		# assumed boot args. These must be handed to obp_helper.
		case ${a} in
			# known args to obp_helper... just pass them on
		-D | -A | on | off )
			BOOTARGS="${BOOTARGS} ${a}"
			;;
		*)
			# unknown argument.
			# keep track of them and append them after an --
			# to obp_helper (its using optarg and just
			# doesn't know any better
			unk="${unk} ${a}"
			unknowns=1
		esac

		;;
	esac
done

if test ${unknowns} -ne 0
then
	unk="boot ${unk}"
	BOOTARGS="${BOOTARGS} ${unk}"
fi

if [ "$CH_ARGS" != "" ]
then
	logger -p local0.info \
	-t "bringup-(SUNW_HOSTNAME:${SUNW_HOSTNAME})" \
		"INFO : BRINGUP COMMAND STARTED"
fi

#
# Check that domain is powered ON
# NOTE: -F means it comes from automatic recovery scripts
#
if [ "$FORCE" -eq 0 ] && [ "$force" -eq 0 ]
then
	power -v -q
	status=$?
	if test $status -eq 1
	then
		if [ "$CH_ARGS" != "" ]
		then
			logger -p local0.info \
				-t "bringup-(SUNW_HOSTNAME:${SUNW_HOSTNAME})" \
				"INFO : domain not powered ON"
			logger -p local0.info \
				-t "bringup-(SUNW_HOSTNAME:${SUNW_HOSTNAME})" \
				"INFO : BRINGUP COMMAND FINISHED"
		fi
		echo "Domain not fully powered on, please run \`power -on\` and then bringup"
		cleanup
		exit 1
	fi
	if test status -ge 1
	then
		echo "Warning: check for domain power status FAILED"
	fi
fi

#
# Check that domain is DOWN
# NOTE: -F means it comes from automatic recovery scripts
#
if [ "$force" -eq 0 ] && [ "$FORCE" -eq 0 ]
then
	check_host -q
	val=$?
	if [ "$val" -eq 0 ]
	then
		# Host is UP
		echo "WARNING: Host is active, bringup may corrupt filesystems"
		echo " "
		echo "Do you really wish to continue (y/n)? \c"
		getSelection
		ret=$?
		if [ "$ret" -eq 0 ]
		then
			echo "Is this bringup executed because of a Hung Host (y/n)? \c"
			getSelection
			ret=$?
			if [ "$ret" -eq 0 ]
			then
				logger -p local0.info \
				-t "bringup-(SUNW_HOSTNAME:${SUNW_HOSTNAME}): " "DOWN System Hung"
			fi
		else
			echo "Bringup exiting"
			cleanup
			exit 1
		fi
	fi

	if [ "$val" -lt 0 ] || [ "$val" -gt 1 ]
	then
		echo "WARNING: check_host failed for $SUNW_HOSTNAME"
		echo "Bringup failed, use bringup -f if you wish to continue"
		cleanup
		exit 1
	fi
fi

#
# Determine if system needs centerplane configuration. Criteria:
#	- Only one domain exists.
#	- Other domains are not being brought UP
#
if [ "$force" -eq 0 ] && [ "$FORCE" -eq 0 ]
then
	Any_other_domain_UP
	ret=$?
	if [ "$ret" -eq 0 ] || [ "$cplane_config" -eq 1 ]
	then
		#
		# It is safe to run -C because no domain is UP or
		# user has run bringup -C
		#
		echo "This bringup will configure the Centerplane. Please confirm (y/n)? \c"
		getSelection
		ret=$?
		if [ "$ret" -eq 0 ]
		then
			POSTARGS="${POSTARGS} -C"
		else
			# Release lock
			kill -TERM $lock_pid > /dev/null 2>&1
			lock_pid=0
		fi
	fi
fi

# Check case when -f and -C are given so as to update POSTARGS
if [ "$cplane_config" -eq 1 ] && [ "$force" -eq 1 ]
then
	POSTARGS="${POSTARGS} -C"
fi

# Check case when only -f is given
if [ "$force" -eq 1 ] && [ "$cplane_config" -eq 0 ]
then
	Any_other_domain_UP
	ret=$?
	if [ "$ret" -eq 0 ]
	then
		# It is safe to run -C because no domain is UP
		POSTARGS="${POSTARGS} -C"
		cplane_config=1
	fi
fi

# Check case when -F is given. Normally invoked from automatic reboot scripts.
if [ "$FORCE" -eq 1 ]
then
	Any_other_domain_UP
	ret=$?
	if [ "$ret" -eq 0 ]
	then
		#
		# It is safe to run -C because no domain is UP or
		# user has run bringup -C
		#
		POSTARGS="${POSTARGS} -C"
		cplane_config=1
	fi
fi

#
# Execute sys_reset if we configure the centerplane. Otherwise, release lock file
#
echo $POSTARGS | grep '\-C' > /dev/null 2>&1
if [ $? -eq 0 ]
then
	logger -p local0.info \
		-t "bringup-(SUNW_HOSTNAME:${SUNW_HOSTNAME})" \
		"INFO : configuring the centerplane, POST args ${POSTARGS}"
	if test -n "${SUNW_INTERNAL}"
	then
		if [ "${SUNW_INTERNAL}" = "TRUE" ]
		then
			echo "Starting: sys_reset"
			sys_reset -A
		fi
	fi
	ans=`scotty -nc "stopDR; resetConfMemAddrMap"`
else
	# Release lock 
	if [ "$lock_pid" -ne 0 ]
	then
		kill -TERM $lock_pid > /dev/null 2>&1  
		lock_pid=0
	fi
fi

#
# kill obp_helper and netcon_server running on this domain
#
if test -f "${SSPVAR}/pids/obp_helper-${SUNW_HOSTNAME}.pid"
then
	obp_helper_pid=`cat ${SSPVAR}/pids/obp_helper-${SUNW_HOSTNAME}.pid`
	if test -n "$obp_helper_pid"
	then
		ps -ef | grep obp_helper | grep -v grep | grep " ${obp_helper_pid} " > /dev/null 2>&1
		if test $? -eq 0
		then
			kill -KILL $obp_helper_pid > /dev/null 2>&1
		fi
	fi
fi

if test -f "${SSPVAR}/pids/netcon_server-${SUNW_HOSTNAME}.pid"
then
	netcon_pid=`cat ${SSPVAR}/pids/netcon_server-${SUNW_HOSTNAME}.pid`
	if test -n "$netcon_pid"
	then
		ps -ef | grep netcon_server | grep -v grep | grep " ${netcon_pid} " > /dev/null 2>&1
		if test $? -eq 0
		then
			kill -KILL $netcon_pid > /dev/null 2>&1
		fi
	fi
fi
#
# check for arbstop dumps that might be running
#
	ans=`scotty -nc "eddpostdump_check"`
	if [ "$ans" -ne 0 ]
	then
		# A error occured during the arbstop dump check.
		logger -p local0.info \
			-t "bringup-(SUNW_HOSTNAME:${SUNW_HOSTNAME})" \
				"WARNING : Could not assert the state of eddpostdump lock file"
		fi

#
# Start hpost.
#
if [ "$CH_ARGS" != "" ]
then
	logger -p local0.info \
		-t "bringup-(SUNW_HOSTNAME:${SUNW_HOSTNAME})" \
		"INFO : Starting: hpost ${POSTARGS}"
fi
echo "Starting: hpost ${POSTARGS}"
hpost ${POSTARGS}

#
# Save return value from hpost and validate it
#
bootproc=$?
if [ "$bootproc" -lt 0 ] || [ "$bootproc" -gt 63 ]
then
	if [ "$CH_ARGS" != "" ]
	then
		logger -p local0.info \
			-t "bringup-(SUNW_HOSTNAME:${SUNW_HOSTNAME})" \
			"INFO : hpost failed, exit status is $bootproc"
		logger -p local0.info \
			-t "bringup-(SUNW_HOSTNAME:${SUNW_HOSTNAME})" \
			"INFO : BRINGUP COMMAND FINISHED"
	fi
	echo "hpost FAILED, exit status= $bootproc. Cannot proceed."
	cleanup
	exit 1
fi

#
# Release bringup.lock file if appropriate
#
if [ "$lock_pid" -gt 0 ]
then
	# kill process holding lock for bringup
	kill -TERM $lock_pid > /dev/null 2>&1
fi

#
# Save bootproc to file
#
platf=`cat ${SSPVAR}/.ssp_private/cb_config | awk -F: '{print $1}'`
touch ${SSPVAR}/etc/${platf}/$SUNW_HOSTNAME/bootproc
echo "Boot proc $bootproc is written to ${SSPVAR}/etc/${platf}/${SUNW_HOSTNAME}/bootproc"
echo ${bootproc} > ${SSPVAR}/etc/${platf}/${SUNW_HOSTNAME}/bootproc
if test $? -eq 1
then
	echo "Could not create file: ${SSPVAR}/etc/${platf}/${SUNW_HOSTNAME}/bootproc"
	exit 1
else
	bootproc_test=`cat ${SSPVAR}/etc/${platf}/${SUNW_HOSTNAME}/bootproc`
	if test -z "$bootproc_test"
	then
		echo "${SSPVAR}/etc/${platf}/${SUNW_HOSTNAME}/bootproc empty"
		echo "file system might be full. bringup exiting!"
		exit 1
	fi
fi

#
# Update Starfire MIB. Note: done only to the domainTable's BootProc
# and InterruptVector objects.
#
echo "Updating the bootproc and IntVector MIB"
updateDTable=`scotty -nc "updateDomainTable $SUNW_HOSTNAME $bootproc"`

#
# Start obp_helper
#
if [ "$CH_ARGS" != "" ]
then
	logger -p local0.info \
		-t "bringup-(SUNW_HOSTNAME:${SUNW_HOSTNAME})" \
		"INFO : Starting: obp_helper -m ${bootproc} ${BOOTARGS}"
fi
echo "Starting: obp_helper -m ${bootproc} ${BOOTARGS}"
obp_helper -m ${bootproc} ${BOOTARGS} </dev/null 2>&1 &

#
# Start netcon_server
#
if [ "$CH_ARGS" != "" ] 
then 
	logger -p local0.info \
		-t "bringup-(SUNW_HOSTNAME:${SUNW_HOSTNAME})" \
		"INFO : Starting: netcon_server -p ${bootproc}"
fi
echo "Starting: netcon_server -p ${bootproc}"
netcon_server -p ${bootproc} &

#
# Before we exit, let's sleep to give time for dhlp to put its
# signature on the bootproc.
#
sleep 5
flag=1
for i in 1 2 3 4 5
do
	check_host -q -p $bootproc
	if test $? -eq 0
	then
		flag=0
		break
	else
		sleep 3
	fi
done

if [ "$flag" -eq 1 ]
then
	logger -p local0.info \
		-t "bringup-(SUNW_HOSTNAME:${SUNW_HOSTNAME})" \
		"WARNING : could not assert signature and state on bootproc"
fi

if [ "$CH_ARGS" != "" ]
then
	logger -p local0.info \
		-t "bringup-(SUNW_HOSTNAME:${SUNW_HOSTNAME})" \
		"INFO : BRINGUP COMMAND FINISHED"
fi
exit 0
