#!/bin/bash
#
# This script is used to collect RAM based data after a push button event.  It
# is assumed that the SAOS server is non-responsive.
#

# globals
verbose=0     # enabled by --verbose or --dry-run
dry_run=0     # enabled by --dry-run

this_script=$(basename $0)
hostname=$(hostname)

SYSLOG_ERR="logger -s -p USER.ERR  -t $this_script"

source /ciena/scripts/paramApi.sh
source /ciena/scripts/core_paths.sh

# -----------------------------------------------------------------------------
print_usage_and_exit() {
    echo "Usage: $this_script [option] <output-file>"
    echo
    echo -e "\tThis script will collect ram based system information and will"
    echo -e "\ttrigger a SAOS server core dump if the server is running."
    echo
    echo "Options:"
    echo -e "\t--verbose  displays debug information while running"
    echo -e "\t--dry_run  shows commands that will be run (but does nothing)"
    echo -e "\t--help     shows this message"
    echo
    exit 1
}

# -----------------------------------------------------------------------------
command_wrapper() {
    local command_line="$*"

    if [ "$verbose" -eq "1" ] ; then
        echo "$command_line"
    fi

    if [ "$dry_run" -eq "1" ] ; then
        return 0
    fi

    echo "root@$hostname# $command_line" >> $dump_file
    $command_line                        >> $dump_file
    echo "----end-command----"           >> $dump_file

    return "$?"
}

# -----------------------------------------------------------------------------
write_dump_header() {
    command_wrapper echo "push button dump"
    command_wrapper date
    command_wrapper echo
}

# -----------------------------------------------------------------------------
create_saos_core_dump() {

    local SAOS_PIDFILE="/var/run/leos.pid"

    if [ ! -f "$SAOS_PIDFILE" ] ; then
        return
    fi

    local saos_pid=$(cat $SAOS_PIDFILE)

    # If SAOS doesn't appear to be running, then there is nothing to do here
    if [ ! -d "/proc/$saos_pid" ] ; then
        return
    fi

    # If there are too many core files, remove the oldest core file
    core_count=$(ls -1 $CORE_FINAL_PATH/core-* 2>/dev/null | wc -l)
    if [ "$core_count" -gt "1" ]; then
        command_wrapper ls -1cr $CORE_FINAL_PATH/core-*
        oldest_core=$(ls -1cr $CORE_FINAL_PATH/core-* | head -n1)
        command_wrapper rm $oldest_core
    fi

    # kill the SAOS server to trigger a core dump
    command_wrapper kill -TRAP $saos_pid

    # wait for the core dump to get going
    sleep 5

    # check to see if it looks like a core file is being written
    if [ -f "$core_lock_file" ] ; then

        newest_core=$(ls -1c $CORE_FINAL_PATH/core-* | head -n1)
        command_wrapper echo "$newest_core is being written"

        # poll to wait until the core dump completes
        while [ -f "$core_lock_file" ] ; do
            sleep 1
        done

        command_wrapper ls -lc $CORE_FINAL_PATH/core-*

    else
        echo "Unable to trigger core dump for pid $saos_pid"
    fi
}

# --- main --------------------------------------------------------------------

# handle options
while :; do
    case "$1" in
        "--help"|"-h"|"-?"|"help") print_usage_and_exit ;;
        "--verbose") verbose=1 ; shift ;;
        "--dry_run") verbose=1 ; dry_run=1; shift ;;
        *) break ;;
    esac
done

if [ "$(whoami)" != "root" ] ; then
    $SYSLOG_ERR "must be run as root"
    exit 1
fi

dump_file=$1

if [ -z $dump_file ] ; then
    $SYSLOG_ERR "requires a destination file argument"
    exit 1    
fi

write_dump_header

# write the reset reason cookie
command_wrapper saparam -f write $ParamType_LastResetReason $ResetReason_ResetButton

# take a snapshot of running processes
command_wrapper top -Hbn1

# take a snapshot of LINX
command_wrapper linxstat -fqHT -l32

# gather ram based logs
command_wrapper evt_show
command_wrapper evt_show details
command_wrapper evt_show ram1
command_wrapper evt_show ram1 details

create_saos_core_dump
