#!/bin/sh
#
#+-#+-#+-#-
# $GENXXX_ROOT/util/startup_template
#
# DO NOT EDIT THIS FILE - this file is used as an template
# Don't change the markers #+-#+-#+-# and "#-#-#-#" , they will be removed
#-#-#-#-#-#
#
# QSYST_NAME startup script
#
# (c) 2000, 2001 Sun Microsystems, Inc.
#
# This script can be called with *one* the following arguments:     
#
#       start       start qmaster, scheduler and execution daemon 
#                   The execution daemon on current qmaster host is only
#                   started if the is an execution host for this host
#                   This is necessary to avoid starting an execution deamon
#                   on a qmaster host during the boot of a machine in case
#                   there should be no execution host for this machine
#       stop        Terminates scheduler and qmaster if we are on the master 
#                   machine. Terminates commd. Terminates the execution daemon 
#                   and the shepherd This only works if the execution daemon spool directory
#                   is in the default location
#       -qmaster    only starts qmaster and scheduler
#       -execd      only start the execution daemon
#                   --> "-execd" and "-qmaster may not be used together
#       -migrate    shuts down qmaster and scheduler if they are running
#                   on another host and start the daemons on this host
#
# If the file "primary_qmaster" in the QSYST_ROOT/QSYST_CELL/common
# exists and it contains the hostname of the current machine and qmaster
# is running on another host it will be shut down and started on this host
#       
# Unix commands which may be used in this script:
#    cat cut tr ls grep awk sed sleep
#    clear basename

PATH=/bin:/usr/bin:/sbin:/usr/sbin 

unset LANG LC_CTYPE LC_COLLATE LC_ALL LC_MESSAGES 
unset LC_NUMERIC LC_MONETARY LC_TIME

QSYST_ROOT=GENROOT; export QSYST_ROOT
QSYST_CELL=GENCELL; export QSYST_CELL
GENCOMMD_PORT; export COMMD_PORT

#---------------------------------------------------------------------------
# Shutdown
# Send SIGTERM to process name $1 with pid in file $2
#
Shutdown()
{
   name=$1
   pidfile=$2
   if [ -f $pidfile ]; then
      pid=`cat $pidfile`
      $utilbin_dir/checkprog $pid $name >> /dev/null
      if [ "$?" = 0 ]; then
         kill -TERM $pid
      fi
   fi
}

#---------------------------------------------------------------------------
# QmasterSpoolDir
#    Return qmasters spool directory
#
QmasterSpoolDir()
{
   qma_spool_dir=`grep qmaster_spool_dir \
                      $QSYST_ROOT/$QSYST_CELL/common/configuration | \
                      awk '{ print $2 }'`
   echo $qma_spool_dir
}


#---------------------------------------------------------------------------
# CheckIfQmasterHost
#    If our hostname given in $1 is the same as in the "act_qmaster" file
#    echo "true" else echo "false"
#
CheckIfQmasterHost()
{
   host=$1

   if [ "$host" = "`cat $QSYST_ROOT/$QSYST_CELL/common/act_qmaster`" ]; then
      echo true
   else
      echo false
   fi
}

#---------------------------------------------------------------------------
# CheckIfExecHost
#    If $2 = true then check in the qmaster spool directory if this host $1 
#    is an execution host (by verifying the exec host file).
# 
#    The purpose of the check is to avoid to start an execution daemon
#    if this is a qmaster machine but should not run an execution daemon
#
#    Only on a qmaster machine we can be sure that we have access to the 
#    qmaster spool directory
#
CheckIfExecHost()
{
   host=$1
   is_qmaster_host=$2

    if [ $is_qmaster_host = false ]; then
       echo false
    else
       qma_spool_dir=`QmasterSpoolDir`
       exec_host_cnt=`ls $qma_spool_dir/exec_hosts/${HOST}* 2>/dev/null | tr '[A-Z]' '[a-z]' | wc -l 2>/dev/null`
       if [ $exec_host_cnt -gt 0 ]; then
          echo true
       else
          echo false
       fi
   fi
}

#---------------------------------------------------------------------------
# CheckIfPrimaryQmasterHost
#    Check if our hostname given in $1 is the same as in the
#    "primary_qmaster" file
#    echo true if there is our hostname else echo false
#
CheckIfPrimaryQmasterHost()
{
   host=$1

   fname=$QSYST_ROOT/$QSYST_CELL/common/primary_qmaster

   if [ -f $fname ]; then
      if [ "$host" = "`cat $fname`" ]; then
         echo true
      else
         echo false
      fi
   else
      echo false
   fi
}


#---------------------------------------------------------------------------
# CheckIfShadowMasterHost
#    Check if our hostname given in $1 is contained in the 
#    "shadow_masters" file
#    echo true if there is our hostname else echo false
#
CheckIfShadowMasterHost()
{
   host=$1

   fname=$QSYST_ROOT/$QSYST_CELL/common/shadow_masters

   if [ -f $fname ]; then
      grep -i $host $fname 2>&1 > /dev/null
      if [ $? = 0 ]; then
         echo true
      else
         echo false
      fi
   else
      echo false
   fi
}

#---------------------------------------------------------------------------
# GetPathToBinaries
#    echo the name of the bin_dir on this system
#    The check is fullfilled if we can access the qstat binary
#    echo "none" if we can't determine the binary path    
GetPathToBinaries()
{
   cfgname=$QSYST_ROOT/$QSYST_CELL/common/configuration
   
   base=none
   
   if [ -f $cfgname ]; then
      base=`grep binary_path $cfgname | awk '{ print $2 }'`
      if [ -f $base/qstat ]; then
         :
      elif [ -f $QSYST_ROOT/util/arch ]; then
         arch=`$QSYST_ROOT/util/arch`
         if [ -f $base/$arch/qstat ]; then
               base=$base/$arch
         fi
      fi
   fi

   echo $base
}


#---------------------------------------------------------------------------
# GetAdminUser
#    echo the name of the admin user on this system
#    echo "root" if admin user retrieval fails
GetAdminUser()
{
   cfgname=$QSYST_ROOT/$QSYST_CELL/common/configuration
   user=none
   
   if [ -f $cfgname ]; then
      user=`grep admin_user $cfgname | awk '{ print $2 }'`
   fi

   if [ `echo $user|tr "A-Z" "a-z"` = "none" ]; then
      user=root
   fi
   echo $user
}

#---------------------------------------------------------------------------
# GetPathToUtilbin
#    echo the path to the binaires in utilbin
#    The check is fullfilled if we can access the "gethostname" binary
#    echo "none" if we can't determine the binary path    
#
GetPathToUtilbin()
{
   base=none

   if [ -f $QSYST_ROOT/util/arch ]; then
      utilbindir=$QSYST_ROOT/utilbin

      arch=`$QSYST_ROOT/util/arch`
      if [ -f $utilbindir/$arch/gethostname ]; then
         base=$utilbindir/$arch
      fi
   fi

   echo $base
}


#---------------------------------------------------------------------------
usage()
{
   
   echo "QSYST_NAME start/stop script. Valid parameters are:"
   echo ""
   echo "   (no parameters): start qmaster and execution daemon if applicable"
   echo "   \"start\"        dto."
   echo "   \"stop\"         shut down shepherd processes and execution daemon"
   echo "   \"-execd\"       only start execution daemon"
   echo "   \"-qmaster\"     only start qmaster and scheduler (if applicable)"
   echo "   \"-shadowd\"     only start shadowd (if applicable)"
   echo "   \"-migrate\"     shutdown qmaster/scheduler if it's running on another"
   echo "                    host and restart it on this host"
   echo "                    Migration only works if this host is an admin host"                 
   echo
   exit 1
}


#---------------------------------------------------------------------------
# MAIN Procedure
#      
  
if [ "$#" -gt 1 -o "$1" = "-h" -o "$1" = "help" ]; then
   usage
fi

startup=true
qmaster=true
execd=true
shadowd=true
force_execd=false  
migrate_qmaster=false

for i in $*; do
   if [ "$i" = start ]; then
      :
   elif [ "$i" = stop ]; then
      startup=false
   elif [ "$i" = -execd ]; then
      execd=true
      force_execd=true
      qmaster=false
      shadowd=false
   elif [ "$i" = -qmaster ]; then
      execd=false
      qmaster=true
      shadowd=false
   elif [ "$i" = -shadowd ]; then
      execd=false
      qmaster=false
      shadowd=true
   elif [ "$i" = -migrate ]; then
      execd=false
      migrate_qmaster=true
      qmaster=true
      shadowd=false
   else
      usage
   fi
done

bin_dir=`GetPathToBinaries`
if [ "$bin_dir" = "none" ]; then
   echo "can't determine path to QSYST_NAME binaries"
   exit 1
fi

utilbin_dir=`GetPathToUtilbin`
if [ "$utilbin_dir" = "none" ]; then
   echo "can't determine path to QSYST_NAME utility binaries"
   exit 1
fi

HOST=`$utilbin_dir/gethostname -name`

if [ "$startup" = true ]; then

   # qmaster_host=true if qmaster was running on this host the last time
   # exec_host=true    if qmaster was running on this host the last time and
   #                   this host is an execution host

   # execution daemon is started in this host if either
   #        - this script is started with the parameter "-execd"
   #        - qmaster *is not* running on this host
   #        - qmaster *is* running on this host *and* there is an exec host config 
   #

   qmaster_host=`CheckIfQmasterHost $HOST`
   exec_host=`CheckIfExecHost $HOST $qmaster_host`
   primary_qmaster_host=`CheckIfPrimaryQmasterHost $HOST`
   shadow_host=`CheckIfShadowMasterHost $HOST`

   if [ $qmaster = true -a $qmaster_host = false -a  \
        \( $primary_qmaster_host = true -o $migrate_qmaster = true \) ]; then
      actual_qmaster_host=`cat $QSYST_ROOT/$QSYST_CELL/common/act_qmaster` 
      echo "   shutting down qmaster and scheduler on host \"$actual_qmaster_host\" ..."
      qconf_output=`$bin_dir/qconf -ks 2>&1 | grep "denied"`
      if [ "$qconf_output" != "" ]; then
         echo "   denied: host \"$HOST\" is no admin host."
         exit 1
      fi
      $bin_dir/qconf -km 2>&1 > /dev/null
      qmaster_sconf_info=`$bin_dir/qconf -sconf 2> /dev/null | grep "qmaster_spool_dir"`
      if [ "$qmaster_sconf_info" != "" ]; then
      #  qmaster is still running
         echo "   qmaster and scheduler still alive. Cannot migrate qmaster."
         exit 1 
      fi
      qmaster_host=true
   fi

   if [ $qmaster = true -a $qmaster_host = true ]; then
      echo "   starting QSYST_MASTER_NAME"
      $bin_dir/QSYST_MASTER_NAME

      sleep 3
      echo "   starting QSYST_SCHEDD_NAME"
      $bin_dir/QSYST_SCHEDD_NAME
   fi

   if [ $force_execd = true -o \
        \( $execd = true -a $qmaster_host = false \) -o \
        \( $execd = true -a $qmaster_host = true -a $exec_host = true \) ]; then
      echo "   starting QSYST_EXECD_NAME"
      $bin_dir/QSYST_EXECD_NAME
   fi

   if [ $shadowd = true -a $shadow_host = true ]; then
      echo "   starting QSYST_SHADOWD_NAME"
      $bin_dir/QSYST_SHADOWD_NAME
   fi
else
   if [ `CheckIfQmasterHost $HOST` = true ]; then
      qmaster_spool_dir=`QmasterSpoolDir`

      # Send SIGTERM to scheduler
      echo "   Shutting down QSYST_NAME scheduler"
      Shutdown QSYST_SCHEDD_NAME $qmaster_spool_dir/schedd/schedd.pid

      # Send SIGTERM to qmaster
      echo "   Shutting down QSYST_NAME qmaster"
      Shutdown QSYST_MASTER_NAME $qmaster_spool_dir/qmaster.pid
   fi

   # Shutdown execution daemon
   UQHOST=`$utilbin_dir/gethostname -name | cut -f1 -d.`
   execd_spool_dir=$QSYST_ROOT/$QSYST_CELL/spool/$UQHOST

   # Send SIGTERM to execd
   echo "   Shutting down QSYST_NAME execution daemon"
   Shutdown QSYST_EXECD_NAME $execd_spool_dir/execd.pid

   # Send SIGTERM to all shepherds
   for jobid in `ls $execd_spool_dir/active_jobs`; do
      echo "   Shutting down QSYST_NAME shepherd of job $jobid"
      Shutdown QSYST_SHEPHERD_NAME $execd_spool_dir/active_jobs/$jobid/pid
   done

   sleep 2

   # Shutdown communication daemon
   echo "   Shutting down QSYST_NAME communication daemon"
   $bin_dir/QSYST_COMMDCNTL_NAME -k
fi
