# Helper utilities for shell script fault logging
#
# This file should be "source"d by callers to pick up the functions.
#
# This file assumes that the caller has sourced /ciena/scripts/utils.sh
# (to provide print_app_line and print_result).

FAULTDIR=/mnt/sysfs/fault
FAULTLOG=$FAULTDIR/fault.log
DEBUGLOG=$FAULTDIR/debug.log

APPFS_VERSION_FILE=/mnt/apps/version
ROOTFS_VERSION_FILE=/version

LOG_SIZE_THRESHOLD=204800

PSTORE_DIR="/sys/fs/pstore"
PSTORE_ARCH_DIR=$FAULTDIR
PSTORE_KEEP_COUNT=8

# -----------------------------------------------------------------------------
create_fault_log() {
    # Make sure the fault directory exists with appropriate permissions
    if [ ! -d $FAULTDIR ]; then
        mkdir -p    $FAULTDIR
        chmod 0775 $FAULTDIR
    fi

    # Force creation of a fault log file (if necessary), and fix permissions.
    # Note that this will not affect the log contents if it already exists.
    touch      $FAULTLOG
    chmod a+rw $FAULTLOG
}

# -----------------------------------------------------------------------------
append_header() {
    source_file=$1
    dest_file=$2

    echo "************** APPENDED FILE *****************" >> $dest_file
    echo "$(date)" >> $dest_file
    echo "************** START $source_file" >> $dest_file
}

# -----------------------------------------------------------------------------
append_trailer() {
    source_file=$1
    dest_file=$2

    echo "************** END $source_file" >> $dest_file
}

# -----------------------------------------------------------------------------
compress_large_logfile() {
    logfile=$1

    if [ -f "$logfile" ] ; then
        size=$(ls -l $logfile | awk '{print $5}')
        if [ "$size" -gt "$LOG_SIZE_THRESHOLD" ] ; then
            print_app_line "archiving $(basename $logfile .log) log"
            /ciena/scripts/fault_log_compress $logfile
            print_result $?
        fi
    fi
}

# -----------------------------------------------------------------------------
initialize_fault_log() {
    # This function will:
    # - collect various log files into the main fault log
    # - compress log files if necessary
    # - ensure the fault log exists with the correct permissions
    # (must be done after filesystems are mounted)

    temp_collector=/tmp/temp.$$.log

    # Collect up any stray fault.log.<pid> or sigfault.log.<pid> files
    # and put them into a temporary file
    for f in ${FAULTLOG}.* ${FAULTDIR}/sigfault.log.* ; do
        if [ -f "$f" ] ; then              # avoid the no match case
            append_header $f $temp_collector
            print_app_line "collecting $(basename $f)"
            cat $f >> $temp_collector
            print_result $?
            append_trailer $f $temp_collector
            rm $f
        fi
    done

    # If we collected anything above, then push it into the main fault log
    if [ -f "$temp_collector" ] ; then
        print_app_line "archiving collected logs"
        cat $temp_collector >> $FAULTLOG
        print_result $?
        rm $temp_collector
    fi

    # See if the log files are getting too full, and compress if necessary
    compress_large_logfile $FAULTLOG
    compress_large_logfile $DEBUGLOG

    create_fault_log
}

# -----------------------------------------------------------------------------
generate_fault_banner() {

    create_fault_log

    echo "************** FAULT REPORT ******************" >>$FAULTLOG
    echo "$(date)" >>$FAULTLOG

    if [ -f "$APPFS_VERSION_FILE" ] ; then
        echo "appfs version  : $(cat $APPFS_VERSION_FILE)" >>$FAULTLOG
    fi

    if [ -f "$ROOTFS_VERSION_FILE" ] ; then
        echo "rootfs version : $(cat $ROOTFS_VERSION_FILE)" >>$FAULTLOG
    fi
}

# -----------------------------------------------------------------------------
generate_linxstat_log() {

    local message=$1

    generate_fault_banner
    echo $message         >>$FAULTLOG
    echo "linxstat dump:" >>$FAULTLOG
    linxstat -fqHT -l32   >>$FAULTLOG
}

# -----------------------------------------------------------------------------
generate_procdump_log() {

    local message=$1     ; shift
    local pid=$1

    # Grab everything knowable about our dying process, esp. VSIZE
    generate_fault_banner
    echo $message         >>$FAULTLOG
    echo "procdump $pid:" >>$FAULTLOG
    # Unfortunately there is a LOT of replication in a heavily-threaded
    # process, in addition to some useless and bulky stuff, so we are pretty
    # selective about what we take.  While threads CAN have different fd's
    # and memory maps, we don't do this, so excluding them is helpful.
    /ciena/bin/procdump -x '/net$' -x '/mounts$' -x '/mountinfo$' \
                        -x '/mountstats$' -x '/pagemap$' -x '/ns$' \
                        -x '/smaps$' -x '/maps$' -x '/fd$' -x '/fdinfo$' \
                        -x '/environ$' -x '/limits$' -x '/auxv$' -x '/comm$' \
                        -x '/cmdline$' -x '/oom_' -x '/mnt$' -x '/cwd$' \
                        -x '/root$' -x '/exe$' -x '/clear_refs$' -x '/mem$' \
                        -x '/personality$' -x '/syscall$' -x '/cpuset$' \
                        -x '/cgroup$' /proc/$pid \
     | fgrep -v Poisonous >>$FAULTLOG
    # But we do want the main thread's (shared) fd's and maps, etc.
    /ciena/bin/procdump /proc/$pid/fd /proc/$pid/fdinfo /proc/$pid/environ \
                        /proc/$pid/limits /proc/$pid/auxv /proc/$pid/cmdline \
                        /proc/$pid/comm /proc/$pid/oom_score \
                        /proc/$pid/oom_adj /proc/$pid/oom_score_adj \
                        /proc/$pid/mnt /proc/$pid/cwd /proc/$pid/root \
                        /proc/$pid/exe \
                          >>$FAULTLOG
    # The maps is a subset of smaps, but smaps has way too much noise in it.
    # We skip maps and filter smaps pretty heavily, keeping only Size and Rss.
    /ciena/bin/procdump /proc/$pid/smaps | egrep -v -e Pss: -e Shared_Clean: \
                        -e Shared_Dirty: -e Private_Clean: -e Private_Dirty: \
                        -e Referenced: -e Anonymous: -e AnonHugePages: \
                        -e Swap: -e KernelPageSize: -e MMUPageSize: \
                        -e Locked: -e VmFlags: \
                          >>$FAULTLOG
}

# -----------------------------------------------------------------------------
generate_core_log() {

    local exec_name=$1   ; shift
    local pid=$1         ; shift
    local timestamp=$1   ; shift
    local file_name=$1   ; shift
    local thread_name=$1 ; shift
    local write_error=$1 ; shift
    local fancy_name

    # check if the thread_name is different from the executable name
    if [ "$exec_name" == "$thread_name" ] ; then
        fancy_name="$exec_name($pid)"
    else
        fancy_name="$thread_name, $exec_name($pid)"
    fi

    # write an entry to the fault log
    generate_fault_banner
    echo "Core dump from $fancy_name$write_error" >>$FAULTLOG

    if [ -f "$file_name" ]; then
        ls -al "$file_name" >>$FAULTLOG 2>&1
        /ciena/bin/evt_log CRIT "$fancy_name: core file $file_name$write_error"
        /ciena/bin/evt_copy ram0 flash0
    else
        if [ -z "$write_error" ] ; then
            echo "(corefile max reached)" >>$FAULTLOG
        fi
    fi
}

#--------------------------------------------------------------------------------
# check if a kernel panic log is present 
kernel_paniclog_present()
{
    test 0 -ne $(ls -A ${PSTORE_DIR} 2>/dev/null | wc -w)
}

#------------------------------------------------------------------------------
# move kernel panic log files to $PSTORE_ARCH_DIR
handle_kernel_paniclog()
{
    if kernel_paniclog_present; then
        mkdir -p ${PSTORE_ARCH_DIR}

        # keep only the most recent archives
        ls -1t ${PSTORE_ARCH_DIR}/paniclog-* 2>/dev/null \
            | awk -vKEEP=${PSTORE_KEEP_COUNT} 'NR>KEEP' \
            | xargs -r rm -f

        # move the logs from pstore
        for logfile in ${PSTORE_DIR}/*; do
            [ -f ${logfile} ] || continue;
            filename=paniclog-$(basename ${logfile})-$(stat -c %Z ${logfile})
            mv ${logfile} ${PSTORE_ARCH_DIR}/${filename} 
        done
    fi
}

