#!/bin/sh

# -----------------------------------------------------------------------------
print_usage_and_exit() {
    echo
    echo "Usage: $this_script <exec-name> <pid> <timestamp>"
    echo
    echo -e "\tThis script will accept a core file on stdin and save it in"
    echo -e "\tgzip format to $CORE_FINAL_PATH."
    echo
    exit 1
}

# - MAIN ----------------------------------------------------------------------

# pick up some helper functions
source /ciena/scripts/fault.sh
source /ciena/scripts/saos_utils.sh
source /ciena/scripts/core_paths.sh

# Take note of our name as invoked
this_script=$(basename $0)

TMP_STDERR=/tmp/$$.stderr

# Errors should be sent to syslog, as there is no guarantee that stdout or
# stderr will be connected to anything when this script is invoked.
SYSLOG="logger -t $this_script"

# Add appfs to our path so that artamir platforms can find gzip
export PATH=$PATH:/mnt/apps/usr/bin

# check for various help incarnations
case "$1" in
    "-h"|"--help"|"-?") print_usage_and_exit ;;
esac

# Verify that the correct number of parameters were provided
if [ "$#" -lt 3 ] ; then
    echo
    echo "ERROR: "$#" parameters, expecting at least 3"
    $SYSLOG $#" parameters, expecting at least 3"
fi

# Grab the parameters provided by the kernel.  These are defined by the
# parameters included in /proc/sys/kernel/core_pattern.
#
# Default values are set in case something goes wrong and the kernel does
# not provide parameters.
#
thread_name="unknown_bin"
pid=0
timestamp="no_time_"$$

if [ "$#" -gt 0 ] ; then
    thread_name=$1

    # If we receive more than 3 parameters, assume that the thread name
    # has spaces in it, and concatenate the initial parameters until we
    # have only 3.
    while [ "$#" -gt 3 ] ; do
        thread_name+=_"$2"
        shift
    done

    if [ "$#" -gt 1 ] ; then
        pid=$2
        if [ "$#" -gt 2 ] ; then
            timestamp=$3
        fi
    fi
fi

# Use the exe entry in proc to determine the real name of the binary.  This
# allows the correct name to be attached to commands that soft link to a
# binary or change their name using prctl().  Use the tr command to squash
# any spaces in the executable name.
#
real_name=$(readlink /proc/$pid/exe 2> /dev/null)
if [ -n "$real_name" ] ; then
    real_name=$(echo $real_name | tr ' ' '_')
    real_name=$(basename $real_name 2> /dev/null)
else
    real_name=$thread_name
fi

cmdline="$(xargs -0 </proc/$pid/cmdline)"

core_initial_file=$CORE_INITIAL_PATH/core.$pid
core_final_file=$CORE_FINAL_PATH/core-$real_name-$pid-$timestamp.gz
core_partial_file=$CORE_FINAL_PATH/core-$real_name-$pid-$timestamp.partial.gz

source /ciena/scripts/krn getenv
family=$(cat /family)
if [ "$KRN_MODE" != "debug" ]; then
    NCORES=2
else
    NCORES=5
fi

# grab a snapshot of LINX status before system state changes too much
generate_linxstat_log "$this_script called for $real_name($pid)"

# First advertise to the world that we're trying to make a core.
# Processes can know that if there is a core file in $CORE_INITIAL_PATH
# that it's not finished yet, and they can react accordingly.  We try to
# mimic the 6.3 core behavior to minimize compatibility problems.

touch $core_initial_file

# Serialize the making of core files.  Eliminates situations where
# flash fills up and _all_ paralleled core files end up truncated and
# thus corrupt.  Done this way all that will fit intact will be so,
# and only the extras are truncated/corrupt.  (We keep them anyway
# because a truncated core file still has useful information in it.)
# The hard-link is used as an atomic test-and-set mutex, this is
# standard Unix script practice for the last thirty years.

waitcount=180	# Three minutes.
while ! ln $core_initial_file $core_lock_file 2>/dev/null; do
    sleep 1
    waitcount=$(expr $waitcount - 1)
    if [ $waitcount -eq 0 ]; then
	$SYSLOG "Too busy, can't make core file $core_final_file"
	rm -f $core_initial_file
	exit
    fi
done

# Grab everything knowable about our dying process, esp. VSIZE
# Do this after we've been serialized, to avoid report hash.
generate_procdump_log "procdump for $real_name($pid)" $pid

# Do not use nice on the gzip call below if associated corefile is from a 
# SAOS server process crash.  Use of nice on SAOS server crash in the first
# minute of operation was found to result in >10mins to gzip corefile,
# resulting in watchdog expiry and reboot.  This defeats config revert-to-
# defaults defense mechanism.  (The point of using nice is to allow the
# server to operate relatively unimpeded, which does not apply when it is
# the server itself that has died.)
unset NICE

if [ "$(saos_server_pid)" -ne "$pid" ]; then
    NICE=nice
fi

zipcount=$(ls -1 $CORE_FINAL_PATH/core-* 2>/dev/null | wc -l)
if [ "$zipcount" -lt $NCORES ]; then

    echo "Core file for $pid ($cmdline) being generated..." >/dev/console
    mkdir -p -m 0775 $CORE_FINAL_PATH

    # Run gzip with input from stdin, and send the output to $core_final_file.
    # We choose speed rather than compression density; the size
    # difference is small but the speed difference is immense.
    $NICE gzip -1fc > $core_partial_file 2> $TMP_STDERR
    if [ "$?" -ne "0" ] ; then
	$SYSLOG $(cat $TMP_STDERR)
	$SYSLOG "Error making $core_final_file, it may be corrupt."
        write_error=" (core file may have errors)"
    fi
    rm -rf $TMP_STDERR

    # Once we are done, use the final name instead of the partial one.
    mv $core_partial_file $core_final_file

else
    $SYSLOG "$real_name($pid) \"$cmdline\" maximum number of core files reached ($NCORES)"
fi

# Finished, release interlocks.
rm -f $core_initial_file $core_lock_file

generate_core_log $real_name $pid $timestamp $core_final_file $thread_name "$write_error"
