#! /bin/ksh
#
#	Copyright 09/30/99 Sun Microsystems, Inc.  All Rights Reserved.
#
# @(#)haoracle_fmon_start.sho	1.48	99/09/30	SMI
# haoracle_fmon_start - start an Oracle fault monitor (haoracle_fmon) for the
#			instance #1
#
# Environment:
# HA_ENV - file name of HA environment file
# HA_METASETSERVE - list of the logical hosts to run on the local host
# HA_SIBLING_METASETSERVE - list of the logical hosts not on local host
#

# make sure we reread HA_ENV (might have changed)
# setting HA_CLUSTER to "" forces a re-read in oracle_boiler
HA_CLUSTER=""
# Copyright 03/30/99 Sun Microsystems, Inc.  All Rights Reserved.
# @(#)oracle_boiler	1.31 99/03/30 SMI

# begin of common ha-dbms boilerplate

# remember our name
argv0=`basename $0`

# Get the BASEDIR and PRODUCTDIR settings from the installed pkgs
_basedir=`pkgparam SUNWscor BASEDIR 2>/dev/null`
_productdir=`pkgparam SUNWscor PRODUCTDIR 2>/dev/null`
_basedir=${_basedir:=""}
_productdir=${_productdir:="SUNWcluster"}

LOGGER=/usr/bin/logger
PATH=${_basedir}/${_productdir}/bin:${_basedir}/${_productdir}/ha/oracle:${PATH}
export PATH
HA_FILES=/etc/${_basedir}/SUNWscor; export HA_FILES
HA_VAR=/var/${_basedir}/SUNWscor; export HA_VAR

# include HA utilities library
. dbms_utilities
if [ $? -ne 0 ]; then
	$LOGGER -p local7.err -t "$HA_SLOGTAG" "$argv0: Cannot find HA utilities library"
	exit 1
fi

HA_CLUSTER=$CLUSTNAME; export HA_CLUSTER

# some default files and locations
# don't move this up - it depends on HA_FILES, which gets set above
HA_DATABASES=haoracle_databases
HA_DB_SUPPORT=${HA_FILES}/haoracle_support; export HA_DB_SUPPORT
ds=oracle

PREFIX="SUNWcluster.ha.${ds}"
HA_FM_NAME="fault_mon"

# verify that we have the haoracle_support file
if [ ! -r $HA_DB_SUPPORT ] ; then
  logerr "4000" \
	"file ${HA_DB_SUPPORT} does not exist or is not readable!"
  exit 2
fi


# talk_start ... - echo the args, run them, and evaluate return value
talk_start() {
  	logprint "$$: $*"
  	if $* ; then
    		return 0
  	else
    		logprint "$$: $* ... failed"
    		return 1
  	fi
}


# talk_start_bg ... - print the args, then run them in the background
talk_start_bg() {
  	logprint "$$: $*"
  	$* &
  	}


# get_pid [ -u userid ] pattern - find the process id of a running program
# if called with "-u userid", only process for that user will be considered
get_pid() {
	if [ "$1" = "-u" ] ; then
		ps_args="-f -u $2"
		shift
		shift
	else
		ps_args="-ef"
	fi
	PID=`/usr/bin/ps $ps_args | grep "\<$*\>" | nawk -v pat="$*" '
 BEGIN { 
   search_pat = substr(pat, 0, 79)
   }
 { if ( match($0, search_pat) )
     if ( ! match($0, "nawk -v pat=") )
       print $2
  }
'`
}


# kill_proc pattern - find and terminate a process
kill_proc() {
  	logprint "Killing $*..." 
  	get_pid $*
  	if [ "$PID" ] ; then
    		for p in ${PID} ; do
      			logprint "Killing process id $p"
      			kill $p
    		done
    		echo
  	else
    		logprint "process \"$*\" could not be located..." 
  	fi
}

# read_ha_databases instance - find an entry in the HA_DATABASES file,
# 	parse it apart, and return in variables. Assumes that oracle_home
#	is set!
read_ha_databases() {
	conf_line=$(get_instance "${ds}" "$1")
        set_inst_name $1

	if [ "$conf_line" = "" ] ; then
		logerr "4010" "$1 missing from HA ORACLE CCD!"
		return 1
	fi
        on_off_mode=`echo "$conf_line" | cut -s -f 1`
	logical_host=`echo "$conf_line" | cut -s -f 3`
	poll_cycle=`echo "$conf_line" | cut -s -f 4`
	connect_cycle=`echo "$conf_line" | cut -s -f 5`
	timeout=`echo "$conf_line" | cut -s -f 6`
	restart_delay=`echo "$conf_line" | cut -s -f 7`
	db_login=`echo "$conf_line" | cut -s -f 8`
	init_ora=`echo "$conf_line" | cut -s -f 9`
        listener_name=`echo "$conf_line" | cut -s -f 10`

        set_logical_host $logical_host
	return 0
}

# find_oracle instance - finds and sets various Oracle related variables:
#	ORACLE_HOME SQLDBA PFILE LD_LIBRARY_PATH
# in case of a problem, it returns with an empty ORACLE_HOME variable

find_oracle() {

	typeset my_node
	typeset physical_hosts

  	ORATAB=/var/opt/oracle/oratab

  	ORACLE_HOME=""
	SQLDBA=""
        read_ha_databases $1
        if [ $? -ne 0 ]; then
                return 1
        fi
        my_node=`uname -n`
        physical_hosts=$(haget -f physical_hosts -h $logical_host | tr '\012' ' ')
        is_member "$my_node" "$physical_hosts"
        if [ $? -ne 0 ] ; then
                return 1
        fi


	if oratab_line=`grep "^[	 ]*$1:" $ORATAB` ; then
		oracle_home=`echo $oratab_line | awk -F: '{print $2}' -`
		if [ -d $oracle_home ] && [ -n "`disk_mounted $1`" ] ; then
			if [ ! -f $oracle_home/orainst/RELVER ] ; then
				export ORACLE_HOME=$oracle_home
				if [ -f $oracle_home/bin/svrmgrl ] ; then

	ora_version=`$oracle_home/bin/svrmgrl command=exit |awk '/PL\/SQL (Release|Version)/ {substr($3,1,3) ; print substr($3,1,3)} '`

				elif [ -f $oracle_home/bin/sqldba ] ; then

	ora_version=`$ORACLE_HOME/bin/sqldba command=exit | awk '/SQL\*DBA: (Release|Version)/ {split($3, V, ".") ; print V[1]}'`

				else logerr "4031" \
					    "Check Oracle installation"

				fi
                        ora_majversion=`echo $ora_version | cut -d. -f1`
				if [ $ora_majversion -lt 8 ] ; then
				logerr "4032" \
					"Release Version missing. Check Oracle installation"
					exit 1;
				fi
			else
			ora_version=`grep RELEASE_VERSION $oracle_home/orainst/RELVER | cut -d= -f2` 

			fi

			if [ "$ora_version" = "" ] ; then
				logerr "4030" \
	"RELEASE_VERSION missing from RELVER file"
				exit 1
			fi

                        ora_majversion=`echo $ora_version | cut -d. -f1`
                        if [ $ora_majversion -eq 8 ];then
                                SQLDBA="${oracle_home}/bin/svrmgrl"
                        else
				ora_subversion=`echo $ora_version | cut -d. -f2`
				ora_rel=`echo $ora_version | cut -d. -f3`
				if [ "$ora_rel" = "" ] ; then
					ora_rel=0
				fi

				# check if the oracle version is supported
				# The versions we will support are:  7.3.[>=2] ,
				# 7.2.[>=3] , and 7.1.[>=6].
				if [ $ora_subversion -eq 3 -a $ora_rel -lt 2 -o \
			     		$ora_subversion -eq 2 -a $ora_rel -lt 3 -o \
			     		$ora_subversion -eq 1 -a $ora_rel -lt 6 ] ; then
					logerr "4040" \
						"Oracle $ora_version not supported!"
					exit 1;
				fi
	
				# release 7.3 use svrmgrl to connect to the instance
				if [ $ora_subversion -ge 3 ] ; then
					SQLDBA="${oracle_home}/bin/svrmgrl"
				else
					SQLDBA="${oracle_home}/bin/sqldba"
				fi
                        fi

			if [ -x ${SQLDBA} ] ; then
				read_ha_databases $1
				ORACLE_HOME="$oracle_home"
				PFILE="$init_ora"
			else
				logerr "4060" \
				"${SQLDBA} does not exist or is not executable!"
			fi
		fi
	else
		logerr "4070" "Database '$1' not found in ${ORATAB}"
	fi
	LD_LIBRARY_PATH=${ORACLE_HOME}/lib${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}
    	export ORACLE_HOME SQLDBA PFILE LD_LIBRARY_PATH
}

make_rpc_call() {
  	logprint "Calling $3 for instance $2 on host $1..."
  	ha_dbms_call $1 $2 $3 
  	}


get_remote_host(){
	get_diskgroups $1 $2
        RSHHOST=""
        for X in `haget  -f private_links -h $HA_REMOTEHOST` ; do
          net_pinghost $X > /dev/null 2>&1
          if [ $? -eq 0 ] ; then
                RSHHOST=$X
                export RSHHOST
                return 0
          fi
        done
        return 1
        }


#end of common ha-dbms boiler plate
#include_boiler

. $HA_VAR/ha_env

		
#process_parm_file - process an Oracle parameter file $1, follow ifile
#references, and echo the value for background_dump_dest
process_parm_file() {
        typeset dump_line

        if [ -r "$1" ] ; then
		# the following [ ]'s contain one space and on tab
		line=`grep -i '^[ 	]*background_dump_dest[ 	]*=' $1`
		if [ "$line" != "" ] ; then
			# found
			set -A dump_line $(print ${line} | tr '=' ' ')
			echo ${dump_line[1]}
		else
			# check include files
			ifile_list=`grep -i '^[ 	]*ifile[ 	]*=' $1 | cut -d= -f 2`
			for ifile in $ifile_list ; do
				process_parm_file $ifile
			done
		fi
	else
		logerr "${prog}.4047"\
		 "Oracle parameter file $1 does not exist or is not readable!"
		echo ""
	fi
}


# ############# Main ########################## Main ##########################
if [ $# -ne 1 ] ; then
	echo "usage: $argv0 instance"
	exit 2
fi

ORACLE_SID=$1
read_ha_databases $1 
DB_LOGIN=$db_login
export DB_LOGIN
LOCALHOST=`uname -n`

LOCAL_PROBE_DELAY=15
#
# BugID# 4269425 - increase delay to avoid fault monitor race condition
#
REMOTE_PROBE_DELAY=$(($poll_cycle + 45))
HA_FM_DBMSPROBE_DELAY=${HA_FM_DBMSPROBE_DELAY:-${LOCAL_PROBE_DELAY}}

if [ ${HA_FM_DBMSPROBE_DELAY} -ne 0 ]; then
   HA_FM_DBMSPROBE_DELAY=${LOCAL_PROBE_DELAY}
fi

prog="${HA_FM_NAME}"
export ORA_NLS32=${_basedir}/${_productdir}/ha/oracle/ORA_NLS32

find_logical_host=`haget -f all_logical_hosts | grep $logical_host`
if [ "$find_logical_host" = "" ] ; then
        logerr "${prog}.4000"\
		 "logical host $logical_host is not in the cluster configuration"
        logerr "${prog}.4010"\
		 "Monitors for oracle database \"$ORACLE_SID\" NOT started"
        exit 1
fi

remote_args=""
#matches=`expr " $HA_METASETSERVE " : ".* $logical_host "`
#if [ "$matches" -eq 0 ] ; then

#MASTERED_LOGICAL_HOSTS="`echo $HA_METASETSERVE | tr ',' ' '`"
MASTERED_LOGICAL_HOSTS=$(haget -f mastered | tr '\012' ' ')

is_member $logical_host "$MASTERED_LOGICAL_HOSTS"
if [ $? -ne 0 ] ; then
    	remote_args="-r $logical_host"
	HA_FM_DBMSPROBE_DELAY=${REMOTE_PROBE_DELAY}

        # Don't start probe if diskset is in maintenance mode.
        # If this instance's logical host is in maint mode, exit now.
        MAINT=`haget -f is_maint -h ${logical_host}`
        if [ "$MAINT" = "1" ]; then
               logdeb "${prog}.2900" \
                        "Logical host in maintenance mode, don't run fault monitor"
            exit 0
        fi

        need_to_run_probe ${logical_host} ${LOCALHOST}
        rc=$?
        if [ $rc -ne 0 ]; then
               logdeb "${prog}.2901" \
                        "No need to run probe (condition $rc)"
           exit 0
        fi
fi

current_mode=$(get_instance_dynamic "${ds}" "$ORACLE_SID" | awk '{print $1}')
if [ "$current_mode" != "on" ] ; then
	logerr "${prog}.4062"\
	"on/off mode for \"$ORACLE_SID\" is not on. Fault monitor not started"
     exit 0
fi

# Set environment variable HA_REMOTEHOST for this instance before starting
# starting fault monitor.
 
my_node=`uname -n`
res=$(haget -f physical_hosts -h $logical_host | tr '\012' ' ')
HA_REMOTEHOST=""
for next in ${res}; do
        if [ "$next" != "$my_node" ]; then
                HA_REMOTEHOST="${HA_REMOTEHOST} ${next}"
        fi
done
export HA_REMOTEHOST

HA_LOGICALHOST=$logical_host
export HA_LOGICALHOST
export HA_FM_DBMSPROBE_DELAY 

binaries_on_logical=

if [ "$remote_args" != "" ] ; then
        ORATAB=/var/opt/oracle/oratab
        if oratab_line=`grep "^[	 ]*$1:" $ORATAB` ; then
                oracle_home=`echo $oratab_line | awk -F: '{print $2}' -`
	fi
	if [ ! -d $oracle_home ] || [ -z "`disk_mounted $1`" ] ; then
		binaries_on_logical=$oracle_home
	fi
fi

# starting up remote fault monitor when the Oracle binaries are
# installed on the logical host then the remote server does not
# have access to version and error log, thus,they are placed in $HA_VAR. 
# The remote fmon will be started and exit from here
if [ "$remote_args" != "" -a "$binaries_on_logical" != "" ] ; then
        alert_file="none"
        oracle_home="/var/opt/oracle"
        ORACLE_HOME="/var/opt/oracle"

        grep -v "^#" ${HA_DB_SUPPORT} | \
        while read support_line ; do
                        ha_executable="haoracle_fmon_7.3"
                        action_file=`echo "$support_line" | cut -f 3`
                        if [ "$ha_executable" = "" ] ; then
                           logerr "${prog}.4020" \
	"instance ${ORACLE_SID}: executable file name missing in line '$support_line' in file $HA_DB_SUPPORT"
                                exit 1
                        fi
                        if [ "$action_file" = "" ] ; then
                           logerr "${prog}.4030" \
	"instance ${ORACLE_SID}: action file name missing in line '$support_line' in file $HA_DB_SUPPORT"
                                exit 1
                        fi
                        talk_start ${ha_executable} $remote_args $ORACLE_SID $poll_cycle $connect_cycle $timeout $restart_delay $HA_FILES/$action_file $alert_file
                        exit 3
	done
	[ $? = "3" ] && exit 0
fi
#---------- end of remote fault monitor for binaries on logical host-----------

find_oracle $ORACLE_SID
oracle_owner=`ls -ld ${ORACLE_HOME} | nawk '{print $3}'`

# startup listner process if it is not started for local host only. 
# if oracle version is < 7.3 then need to start both
# v1 listener using "orasrv" and v2 listener using "lsnrctl"
# otherwise, start up v2 listener using "lsnrctl"

if [ "$remote_args" = "" ] ; then
(
	export LISTENER_NAME=${listener_name:="LISTENER"}
    /usr/bin/ps -e -u $oracle_owner -o args | grep -w "tnslsnr $LISTENER_NAME " | grep -v "grep"  > /dev/null
    if [ $? -ne 0 ]; then
        lognotice "${prog}.2000" "starting up SQL*Net V2 listener"
        su  $oracle_owner -c sh << EOF > /dev/console 2>&1
        	LD_LIBRARY_PATH=$LD_LIBRARY_PATH
        	export LD_LIBRARY_PATH
       		$ORACLE_HOME/bin/lsnrctl start $LISTENER_NAME &
EOF
    fi
)&
fi
#------ end of starting up listener ---------

if [ "$ORACLE_HOME" != "" ] ; then
	export ORACLE_LISTENER="$listener_name"
      	grep -v "^#" ${HA_DB_SUPPORT} | \
      	while read support_line ; do
	  		ha_executable="haoracle_fmon_7.3"
	  		action_file=`echo "$support_line" | cut -f 3`
	  		if [ "$ha_executable" = "" ] ; then
				logerr "${prog}.4020" \
"instance ${ORACLE_SID}: executable file name missing in line '$support_line' in file $HA_DB_SUPPORT"
				exit 1
			fi
	  		if [ "$action_file" = "" ] ; then
				logerr "${prog}.4030" \
"instance ${ORACLE_SID}: action file name missing in line '$support_line' in file $HA_DB_SUPPORT"
				exit 1
			fi
      			if [ "$remote_args" = "" ] ; then
				alert_file=`process_parm_file $PFILE`
            			if [ "$alert_file" = "" ] ; then
              				logerr "${prog}.4040" \
"Could not locate background_dump_dest value in parameter file"
					exit 1
				else
					alert_file=${alert_file}/alert_${ORACLE_SID}.log
            			fi
          		else
            			alert_file="none"
          		fi
	  		talk_start ${ha_executable} $remote_args $ORACLE_SID $poll_cycle $connect_cycle $timeout $restart_delay $HA_FILES/$action_file $alert_file 
          		exit 3
      	done
        case $? in
                1) ;;
                3) exit 0 ;;
                *) logerr "${prog}.4050" "Oracle ${ora_version} not supported!" ;;
        esac

fi

# if we reach here, then a fault monitor did not get started
# the reason did already get logged in the various logerr calls above
# Note: The "exit" statements in the while statement above do not
#       cause an exit of the entire script, but just the subshell that
#	executes the while command as part of a pipe!
logerr "${prog}.4060" "Monitors for Oracle database \"${ORACLE_SID}\" NOT started"
exit 1
