#! /usr/bin/env bash
#
# skeleton	example file to build /etc/init.d/ scripts.
#		This file should be used to construct scripts for /etc/init.d.
#
#		Written by Miquel van Smoorenburg <miquels@cistron.nl>.
#		Modified for Debian
#		by Ian Murdock <imurdock@gnu.ai.mit.edu>.
#               Further changes by Javier Fernandez-Sanguino <jfs@debian.org>
#
# Version:	@(#)skeleton  1.9  26-Feb-2001  miquels@cistron.nl
#

# chkconfig: 345 10 90
# description: Nsx local controller agent

# Set NSX_AGENT_EXEC_PREFIX to prepend commands to the CONTROLLERD
# command line.  Use the same value for NSX_AGENT_EXEC_PREFIX for
# start and stop because this script checks the value of the
# executable in the script against the executable in the running
# process.

### BEGIN INIT INFO
# Provides:          nsx-agent
# Required-Start:    $network $named $local_fs openvswitch
# Required-Stop:     $network $local_fs
# Default-Start:
# Default-Stop:      0 1 6
# Short-Description: NSX agent daemon
### END INIT INFO

# Load the LSB shell functions
[ -f /lib/lsb/init-functions ] && . /lib/lsb/init-functions

export LD_LIBRARY_PATH=:/usr/lib/:$LD_LIBRARY_PATH

PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
PYTHON=/usr/bin/python
CLOUDNET_PATH=/opt/vmware/nsx-agent/bin
CONTROLLERD="$CLOUDNET_PATH/nsx-agent-daemon"
CLOUDNET_DATA_DIR=/var/vmware/nsx-agent/data
NSXAGENT_SCRIPT="$CLOUDNET_PATH/nsx-agent.sh"
NSXAGENT_MANAGER="unix:/var/run/vmware/nsx-agent/nsxagent_ovsdb.sock"
JAVA_HEAP_MB=2048
NAME=nsx-agent
MODE=chassis
DESC=nsx-agent
TASKSET=$(which taskset)
PIDDIR=/var/run/vmware/nsx-agent
PIDFILE=$PIDDIR/$NAME.pid
NSX_AGENT_TAG=NSX-NsxAgent
NSX_AGENT_MAX_QUICK_RESTARTS="10"
NSX_AGENT_MAX_RESTARTS="1000"
WATCHDOG_ARGS="-d -s ${NSX_AGENT_TAG} -q ${NSX_AGENT_MAX_QUICK_RESTARTS} -t ${NSX_AGENT_MAX_RESTARTS} -p ${PIDFILE}"
WATCHDOG=/opt/vmware/nsx-agent/bin/watchdog.sh

# For enforcing log rotation:-
# glog_dir -- log dir for nsx-agent,
# space_usage_high -- in percent, space usage at which log cleaning should be started
# space_usage_low -- in percent, space usage that is considered ok
# clean_frequency -- in minutes, interval at which freespace.py should be run
glog_dir=/var/log/vmware/nsx-agent/
space_usage_high=90
space_usage_low=75
clean_frequency=5

test -f /etc/init.d/functions && . /etc/init.d/functions
test -x $CONTROLLERD || exit 0
test -x $NSXAGENT_SCRIPT || exit 0


CLUSTER_STATUS_FILE=$CLOUDNET_DATA_DIR/cluster-status.txt
CLUSTER_UUID_FILE=$CLOUDNET_DATA_DIR/cluster-uuid.txt
CHASSIS_INFO_FILE=$CLOUDNET_DATA_DIR/controllerd.info
DODTIME=1                   # Time to wait for the server to die, in seconds
                            # If this value is set too low you might not
                            # let some servers to die gracefully and
                            # 'restart' will not work

# $USER environment variable is not set when the controller is started
# automatically on boot, and it causes the log filename to to contain
# "invalid-user". Here we just set $USER to root explicitly if it's not
# set.
if [ -z "$USER" ]; then
    export USER=root
fi

# <instrumentation> #

set -e

log() {
    logger -t NSX $1
}

if [ -f /etc/default/nsx-agent ] ; then
    # load this once, at the beginning, to make sure core and fd limits get
    # pulled in.
    . /etc/default/nsx-agent
fi

running_pid()
{
    # Check if a given process pid's cmdline matches a given name
    pid=$1
    name=$2
    daemon_pos=$3
    [ -z "$pid" ] && return 1
    [ ! -d /proc/$pid ] &&  return 1
    cmd=`cat /proc/$pid/cmdline | LC_CTYPE=C tr "\000" "\n"|head -n $daemon_pos | tail -1 |cut -d : -f 1`
    # Is this the expected child?
    [ "$cmd" != "$name" ] &&  return 1
    return 0
}

running()
{
# Check if the process is running looking at /proc
# (works for all users)

    # No pidfile, probably no daemon present
    if [ ! -f "$PIDFILE" ];
    then
        # Check if there is a $CONTROLLERD process running
        pid=`pidof -s "${CONTROLLERD}"`
        [ -z "$pid" ] && return 1
    else
        # Obtain the pid and check it against the binary name
        pid=`cat $PIDFILE`
    fi
    if [ -z "$NSX_AGENT_EXEC_PREFIX" ];
    then
	PROC_NAME=$CONTROLLERD
    else
	PROC_NAME=`echo "$NSX_AGENT_EXEC_PREFIX" | awk '{print $1}'`
    fi
    running_pid $pid "$PROC_NAME" 1 || return 1
    return 0
}

shutdown_intf()
{
  # down all the bridges (even those that don't look up), sort -r to get VLANs first
  echo -n "Bringing down network interfaces"
  for BR in $(ifconfig -a | awk '/^br(eth|bond)/ { print $1 }' | sort -r); do
    echo -n "."
    /sbin/ifdown $BR >/dev/null 2>&1
    # make sure interface is down even if it wasn't configured by ifup
    /sbin/ifconfig $BR down >/dev/null 2>&1
    if [ "$BR" != "${BR%.*}" ]; then
      /sbin/vconfig rem $BR >/dev/null 2>&1
    fi
  done
  echo ""
}

clear_state()
{
    # clear out all the files in the data directory
    find $CLOUDNET_DATA_DIR -type f -exec rm -f {} \;

    log "state cleared"
    echo "OK"
}

clear_everything()
{
  clear_state
  rm -f /etc/default/nsx-agent-extra

  # XXX(ashieh) This needs to be revisited. Seems to me that we need
  # to be a lot more careful about nuking OVS state on a hypervisor.

  return 0

  # force a subsequent start to reload info from the defaults file, rather than
  # relying on cached local variables
  unset ONIX_RPC_IF

  shutdown_intf

  /opt/nvp/bin/setup-controller-node

  log "everything cleared"
}

force_stop() {
# Forcefully kill the process
    [ ! -f "$PIDFILE" ] && return
    if running ; then
        # This only stops the watchdog process.
        ${WATCHDOG} -k "${NSX_AGENT_TAG}"
        kill -15 $pid
        # Is it really dead?
        [ -n "$DODTIME" ] && sleep "$DODTIME"s
        if running ; then
            kill -9 $pid
            [ -n "$DODTIME" ] && sleep "$DODTIME"s
            if running ; then
                echo "Cannot kill $LABEL (pid=$pid)!"
                exit 1
            fi
        fi
    fi
    rm -f $PIDFILE
    return 0
}

do_start()
{
    if test -n "$ONIX_CORE_LIMIT"; then
        echo "Setting core limit to $ONIX_CORE_LIMIT"
        ulimit -c "$ONIX_CORE_LIMIT" || true
    fi

    if test -n "$ONIX_FD_LIMIT"; then
        echo "Setting file descriptor limit to $ONIX_FD_LIMIT"
        ulimit -n "$ONIX_FD_LIMIT" || true
    fi

    # Return
    #   0 if daemon has been started
    #   0 if daemon was already running
    #   2 if daemon could not be started

    if running; then
        echo "$NAME is already running"
        exit 0
    fi

    rm -f $CLUSTER_STATUS_FILE

    mkdir -p $PIDDIR

    /sbin/restorecon -v $PIDDIR &> /dev/null || :

    cd $CLOUDNET_PATH
    set +e
    ${WATCHDOG} ${WATCHDOG_ARGS} "${NSXAGENT_SCRIPT}" >/dev/null 2>&1
    set -e

    # wait up to 30 seconds for nsx-agent to start
    for i in `seq 0 30`; do
        sleep 1
        if [ -f $PIDFILE ] ; then
          break
        fi
    done

    # install a cron job which invokes freespace.py every 'clean_frequency' minutes.
    crontab -l 2>/dev/null | grep -v freespace | crontab -
    (crontab -l 2>/dev/null; echo "*/$clean_frequency * * * * /opt/vmware/nsx-agent/bin/freespace.py -f $glog_dir:$space_usage_low:$space_usage_high:0:0 -d $glog_dir:0:0 -p onix:1 -p binlog:1") | crontab -

    if running; then
        echo -e " $NAME [\e[32mOK\e[37m\e[0m]"
        $TASKSET -pc 0 `cat $PIDFILE` >/dev/null
        log "started successfully"

    else
        echo -e " [\e[31mERROR\e[37m\e[0m]"
        log "failed on startup start"
        exit 2
    fi

    return 0
}

do_stop()
{
    # Return
    #   0 if daemon has been or was already stopped
    #   2 if daemon could not be stopped
    #   other if a failure occurred
    echo -n "Stopping $NAME: "
    if ! running; then
      echo "$NAME was already stopped"
      return 0
    fi
    set +e
    # This only stops the watchdog process.
    ${WATCHDOG} -k "${NSX_AGENT_TAG}"
    pkill --pidfile $PIDFILE $NAME
    ret=$?

    # remove any freespace.py cron jobs
    crontab -l 2>/dev/null | grep -v freespace | crontab -

    case $ret in
      0) echo -e "[\e[32mDone\e[37m\e[0m]"
         rm -f $CLUSTER_STATUS_FILE
         log "stopped"
         ;;
      2) echo -e " [\e[31mERROR\e[37m\e[0m] Failed to stop"
         log "failed"
         ;;
      *) echo -e " [\e[31mERROR\e[37m\e[0m] Error code: $ret"
         log "error: $ret"
         ;;
    esac

    return $ret
}

case "$1" in
  start)
      echo -n "Starting $DESC: "
      do_start
      ;;
  stop)
      do_stop
      ;;
  restart|force-reload)
      log "restarting"
      do_stop
      do_start
      ;;
  clear-state)
      echo -n "Clearing $DESC's state: "
      if running ; then
          do_stop
          clear_state
          do_start
      else
          clear_state
      fi
      ;;
  clear-everything)
      echo -n "Clearing everything for $DESC: "
      if running ; then
          do_stop
          clear_everything
          do_start
      else
          clear_everything
      fi
      ;;
  status)
      echo -n "$DESC is "
      if running ;  then
          echo "running"
          pid=`cat $PIDFILE`
          uptime=`ps -o etime $pid | grep -v ELAPSED | sed 's/\s*//g' | sed "s/\(.*\)-\(.*\):\(.*\):\(.*\)/\1d\ \2h/; s/\(.*\):\(.*\):\(.*\)/\1h\ \2m/; s/\(.*\):\(.*\)/\1m \2s/"`
          echo "Uptime: $uptime"
      else
          echo "not running."
      fi

      if [ running ] ; then
          exit 0
      else
          exit 1
      fi
      ;;
  *)
      N=/etc/init.d/$NAME
      # echo "Usage: $N {start|stop|restart|reload|force-reload}" >&2
      echo "Usage: $N {start|stop|restart|force-reload|status|clear-state|clear-everything}" >&2
      exit 1
      ;;
esac

exit 0
