#! /usr/bin/sh
#
# ident	"%Z%%M%	%I%	%E% SMI"
#
# Copyright (c) 1997 by Sun Microsystems, Inc.
# All rights reserved.
#
# hasap_start_net
#
# This program starts SAP R3 CI and AS.
#
# Start_method relies on the stop_method to do the right thing,
# also assumes Oracle/Informix is already started.
#

VERSION=1.3
ARGV0=`basename $0`
PATH0=`dirname $0`
SYSLOG=`haget -f syslog_facility`
SAPCONF=/etc/opt/SUNWscsap/hadsconf
MYNAME=`uname -n`
HA_DEBUG=1


prog_path=`dirname $0`

# add the ha-service specific clust_progs path since
# it will not co-reside with the framework clust_progs.
# if it's a fault monitor method, add also fault_progs.
expr "$prog_path" : '.*/clust_progs' >/dev/null 2>&1
if [ $? -eq 0 ]; then
	PATH=${prog_path}:${PATH}
else
	PATH=${prog_path}:${prog_path}/../clust_progs:${PATH}
fi

# source in ha-services common utilities
. ds_utilities.1.3

lognotice `gettext "$0 $1 $2"`

source_env_file $SAPCONF


#
# SAP variables
#
NOTAVAIL=${_INST_1_PRIV_NOT_AVAIL}
SAPSID=${_INST_1_PRIV_YOUR_SAP_SID}
DBSID=${_INST_1_PRIV_YOUR_DB_SID}
CI_INSTANCE_ID=${_INST_1_PRIV_CI_INSTANCE_ID}
CI_STARTSAP_RETRY_CNT=${_INST_1_PRIV_CI_STARTSAP_RETRY_CNT} 
CI_STARTSAP_RETRY_INTERVAL=${_INST_1_PRIV_CI_STARTSAP_RETRY_INTERVAL} 
CI=${_INST_1_PRIV_CI_LOGICAL_HOSTNAME}
DB=${_INST_1_PRIV_DB_LOGICAL_HOSTNAME}
NFS=${_INST_1_PRIV_NFS_LOGICAL_HOSTNAME}
SAPADM=${_INST_1_PRIV_SAP_ADMIN_LOGIN_NAME}
DBADM=${_INST_1_PRIV_DB_ADMIN_LOGIN_NAME}
SAPMNT1=${_INST_1_PRIV_USR_SAP_TRANS_MOUNT_POINT}
SAPMNT2=${_INST_1_PRIV_SAP_MOUNT_POINT_B}
SAPMNT3=${_INST_1_PRIV_SAP_MOUNT_POINT_C}
SAPMNT4=${_INST_1_PRIV_SAP_MOUNT_POINT_D}
AUTO_NFSMOUNT=${_INST_1_PRIV_AUTO_NFS_MOUNT}


SAPEXE=/usr/sap/${SAPSID}/SYS/exe/run

isDBOracle=`echo $DBADM | grep ora`

MASTERED="$1"
# Replace comma with space to form an sh word list:
MASTERED="`echo $MASTERED | tr ',' ' '`"

if [ -n "$NFS" ]; then
	nfs_physhost=`haget -f master -h "$NFS"`
fi

AmICI=

#
# determine if I own the CI
#
for LH in $MASTERED ; do
        if [ "${LH}" = "${CI}" ]; then
                AmICI=$LH
		lognotice `gettext "This is a master of CI."`
                break
        fi
done
 
#
# NFS mount the file system, if they are specified.
# NFS server can be CI, DB, or 3rd system.
#
#if [ -z "$AmICI" ]; then
if [ "$AUTO_NFSMOUNT" = "y" -o "$AUTO_NFSMOUNT" = "Y" ]; then
	#if [ "$nfs_physhost" != "$MYNAME" ]; then
		#
		# This physhost does not master the NFS server's logical host, need
		# to nfs mount the /usr/sap, /sapmnt, etc.
		# Think that it is in the n+1 environment.
		#
		if [ "$SAPMNT1" = "$NOTAVAIL" -a "$SAPMNT2" = "$NOTAVAIL" ]; then
			if [ "$SAPMNT3" = "$NOTAVAIL" -a "$SAPMNT4" = "$NOTAVAIL" ]; then
				needToMount=n
			else 
				needToMount=y
			fi
		else
			needToMount=y
		fi

		if [ "$needToMount" = "y" -a -n "$NFS" ]; then
			#
			# umount first, in case nfs server is new
			#
			${PATH0}/hasap_nfsumount
			${PATH0}/hasap_nfsmount
		fi
	#fi
fi

#fi


#
# If I am not CI, mount nfs is the only thing to do.
#
if [ -z "$AmICI" ]; then
	exit 0
fi


#
# In SC3.0, there is a data service state.
# 
if [ "${DS_STATE}" = "n" ];then 
	exit 0
fi


#
# OK. I do own the CI. Now...
#

# ###############################################################
#
# Routine to restart CI and AS.
#
# When we start, DB could be still in the init state, we have to
# try to restart r3 again & again. If the Dev/Test instance or AS 
# is on this physical host, stop it first. Same is the app server.
# Usually when this routine is called, there is no CI running. 
# We also assume ipcs is clean. 
# 
# We need to stop the Test/Devel and app server here.
# There is a chance CI crashes, or a takeover happens.
# There is no chance for stop_net_method to run, which
# will stop the test/devel and app server. This is a safety net.
# stopsap executables might not be around since the nfs server
# crashed, when CI=NFS. Thus we need this here.
# We stop all test/devel, app server before we start CI.
# After we start CI, we restart all the app servers.
# 
# ###############################################################

restartR3 ()  {

	lognotice `gettext "Restarting SAP R3..."`

	#
	# Now, shutdown the application servers.
	# It is a safety net. When CI crashes there is no chance to
	# stop app server or test/development instance, since their
	# stopsap executables might not be around since the nfs server
	# crashed, when CI=NFS.
	#
	if [ -x ${PATH0}/hasap_stop_all_instances ]; then
		lognotice `gettext "Executing hasap_stop_all_instances in background"`
		${PATH0}/hasap_stop_all_instances &
	fi

	
  	r3Started=
	cur_retry=1
	while [ ${cur_retry} -le ${CI_STARTSAP_RETRY_CNT} ]; do

      		lognotice `gettext "Starting SAP R3...(retry# ${cur_retry}/max ${CI_STARTSAP_RETRY_CNT})"`

		#
		# Test if DBMS is available
		#

		# Check if DBMS is available by calling "sapmon db"
		if [ -x ${PATH0}/sapmon ]; then
			${PATH0}/sapmon db > /dev/null 2>&1
			probe_status=$?

			if [ ${probe_status} -eq 0 ]; then

				lognotice `gettext "DBMS is running, Proceeding to startsap r3"`

				#
				# DBMS is running, so we move on to "startsap r3"
				#

				:

			else
				lognotice `gettext "DBMS not yet available, retry in ${CI_STARTSAP_RETRY_INTERVAL} seconds..."`

				cur_retry=`expr $cur_retry + 1` 
				sleep ${CI_STARTSAP_RETRY_INTERVAL} 
				continue
			fi
		else
			logerr `gettext "${PATH0}/sapmon executable not found!"`
			exit 1
		fi

		#
		# Check if there are any remnant of old SAP ipc objects
		# or other old process left running that may hinder the
		# startup of SAP 
		#
		if [ -x ${SAPEXE}/cleanipc ]; then
			numipc=`${SAPEXE}/cleanipc ${CI_INSTANCE_ID} | grep 'Number of IPC-Objects' | awk -F: '{print \$2}'`
			
			if [ "$numipc" -gt 0 ]; then
				lognotice `gettext "Found $numipc leftover IPC objects for SAP instance, removing via cleanipc... "`
				${SAPEXE}/cleanipc ${CI_INSTANCE_ID} remove >/dev/console 2>&1
			fi

			SAPKILLFILE=/usr/sap/${SAPSID}/DVEBMGS${CI_INSTANCE_ID}/work/kill.sap
			if [ -f ${SAPKILLFILE} ]; then
				lognotice `gettext "Found leftover kill.sap file, will execute it..."` 
			        eval ${SAPKILLFILE} >/dev/console 2>&1

			fi
				
		else
			logwarning `gettext "Cannot find ${SAPEXE}/cleanipc"`
		fi
		

		#
		# Execute startsap r3
		#

		lognotice `gettext "Executing startsap r3 as user \"${SAPADM}\""`
		su - ${SAPADM} -c "startsap r3" >/dev/console 2>&1

		# Check if SAP is started by calling "sapmon proc"
		if [ -x ${PATH0}/sapmon ]; then
			${PATH0}/sapmon proc > /dev/null 2>&1
			probe_status=$?

			if [ $probe_status -eq 0 ]; then

				lognotice `gettext "SAP R3 started."`
				r3Started=y

				#
				# SAP critical processes are up, we can exit
				# the retry loop now
				#

				break

			else

				lognotice `gettext "Some SAP R3 Process failed to start,retry in ${CI_STARTSAP_RETRY_INTERVAL} seconds...(sapmon proc=${probe_status})"`
				cur_retry=`expr $cur_retry + 1` 
				sleep ${CI_STARTSAP_RETRY_INTERVAL} 
				continue
			fi
		else
			logerr `gettext "${PATH0}/sapmon executable not found!"`
			exit 1
		fi


  	done
	
  	if [ "$r3Started" != "y" ]; then
    		logerr `gettext "SAP R3 failed to start after repeated tries."`
    		exit 1
  	fi
	
	
	#
	# When R3 starts, it is time to start all instances now.
	#
	if [ -x ${PATH0}/hasap_start_all_instances ]; then
		lognotice `gettext "Executing hasap_start_all_instances in background"`
		${PATH0}/hasap_start_all_instances &
	fi

}

# This host currently masters the logical host that hasap uses.
# Check if the process is already running, if so exit. (We must have
# started it on some earlier cluster reconfiguration when this
# physical host first took over mastery of the logical host.)

# All of the checking is done in the startup script, so let it do the work.
# We only check here if the process(es) is (are) already running, because
# that takes a very long time in sap start.
	
	
#
# Check if we need to restart CI & AS.
# If one of the procs dies, restart CI & AS as well.
#

NeedToStart=

# Check if SAP is started by calling "sapmon proc"
if [ -x ${PATH0}/sapmon ]; then
	${PATH0}/sapmon proc > /dev/null 2>&1
	probe_status=$?

	if [ $probe_status -eq 0 ]; then

		lognotice `gettext "Found critical SAP processes running, no need to restart."`
		NeedToStart=n
	else

		lognotice `gettext "Cannot find some critical SAP process (probe_status=${probe_status}), need to restart."`
   		NeedToStart=y
  	fi
else
	logerr `gettext "${PATH0}/sapmon executable not found!"`
	exit 1
fi


# ###############################################################
# 
# Restart CI and AS here.
#
# ###############################################################

if [ "$NeedToStart" = "y" ];then
	if [ "$DS_STATE" = "n" ]; then
		#
		# don't start
		#
		exit 0
	elif [ "$DS_STATE" = "y" ]; then
		#
		# Do start
		#
		restartR3 &
		exit 0
	else
		#
		# Don't know what's going on
		# or for older version of HASAP, which knows
		# nothing about DS_STATE
		# restart it anyway
		#
		restartR3 &
		exit 0
	fi
fi

exit 0
