#!/bin/bash
set +x
LOG_FILE="/var/log/inspect.log"
G_MML_FILE_PATH="/opt/huawei/snas/script/inspect_mml"
source $G_MML_FILE_PATH/CheckItems
CurInspectNum="309"
CurInspectFun="$(GetInspectType $CurInspectNum)"
RESULTFILE="${G_TMP_INSPECT_PATH}tmpResult${CurInspectFun}"
>${RESULTFILE}

function LOG
{
    time=$(date)
    echo [${time}][$$][$CurInspectFun]$@ >> $LOG_FILE
}

function checkNodeWALSwitch()
{
    local walinfo=""

    walinfo="$( cat /opt/huawei/snas/etc/ca_d.ini | grep wal_switch | sed "s/ //g" )"
    LOG "[$LINENO]info 1:${walinfo}"
    walinfo="${walinfo} $( cat /opt/huawei/snas/etc/ca_d.ini | grep walbigfile_switch | sed "s/ //g" )"
    LOG "[$LINENO]info 2:${walinfo}"
    walinfo="${walinfo} $( cat /opt/huawei/snas/etc/ca_d.ini | grep wal_global_switch | sed "s/ //g" )"
    LOG "[$LINENO]info 3:${walinfo}"
    walinfo="${walinfo} $( cat /opt/huawei/snas/etc/mds_c.ini | grep mwa_system_switch | sed "s/ //g" )"
    LOG "[$LINENO]info 4:${walinfo}"

    echo "${walinfo}" >>${RESULTFILE} 2>&1

    return
}

function checkNodeWalHealth()
{
    local walErrInfo=""
    local MinMem=1024           #1024M
    local MaxLogSize=204800    #200M   

    # walûпҪ
    /usr/local/bin/MmlBatch 988 "show wal switch" | sed -e 's/\x1B\[0;[3-4][0-9]m//g' -e 's/\x0D//g' -e 's/\x00//g'| grep -q "global .*0"
    if [ $? -eq 0 ]
    then
        LOG "[$LINENO]The WAL switch is not enabled,have no need to check the WAL health status."
        return
    fi

    # wle/wl 0.5(wleΪwrite log errorд)˵MDSдWAL־ʧʸߣѲ콨Ż
    walErrInfo=$( cat /var/log/snas_workload | Workload MDS_MWA wl,wle | tail -n1 )
    if [ X"${walErrInfo}" == X"" ]
    then
        LOG "[$LINENO]there is no WAL error info in /var/log/snas_workload."
    else
        LOG "[$LINENO]WAL info:${walErrInfo}"
        writeLogCnt=$( echo "${walErrInfo}" | awk '{print $2}' | awk -F ':' '{print $2}' | awk -F ',' '{print $1}' )
        writeLogErrorCnt=$( echo "${walErrInfo}" | awk '{print $3}' | awk -F ':' '{print $2}' | awk -F ',' '{print $1}' )
        if [ ${writeLogCnt} -ne 0 ]
        then
            percent=$( echo "scale=4;${writeLogErrorCnt} / ${writeLogCnt} > 0.5000" | bc )
            [ ${percent} -eq 1 ] && echo "[ERR]INFO:The MDS fails to write WAL logs at a high rate(${walErrInfo})."  >>${RESULTFILE} 2>&1
        fi
    fi

    # WALطʧܣѲ콨Ż,snasmessage200MҴʱڵڴ1GѲ첻ȥֱӱΪֶ
    message_log_list=$(ls /var/log/snasmessages*)
    walPlaybackFailCnt=0
    for item in $message_log_list;do
        snasmessages_size=$( du -s ${item}  | awk '{print $1}' )
        MemFree=$(free -m | grep -w "Mem:" | awk '{print $4}')
        if [ ${snasmessages_size} -ge ${MaxLogSize} ] && [ ${MemFree} -lt ${MinMem} ]
        then
            echo "[ERR]INFO: Available memory of the node is:${MemFree}M,but the file(${item}) size is too big:${snasmessages_size}K, please manually confirm the cluster status and inspection result."  >>${RESULTFILE} 2>&1
            return
        fi
        walPlaybackFail_log=$(cat ${item} | grep WAL_OprReplayFsmErr | wc -l )
        if [ ${walPlaybackFail_log} -gt 0 ];then
            walPlaybackFailCnt=$((walPlaybackFailCnt+walPlaybackFail_log))
            lastOnePlaybackFailInfo=$( cat ${item} | grep WAL_OprReplayFsmErr | tail -1 )
        fi
    done
    if [ ${walPlaybackFailCnt} -ge 1 ]
    then
        echo "[ERR]INFO:There are ${walPlaybackFailCnt} WAL playback failed records. The last record is${lastOnePlaybackFailInfo}"  >>${RESULTFILE} 2>&1
    fi

    # WALˢʧܣѲ콨Ż
    message_log_list=$(ls /var/log/snasmessages*)
    walRefreshFailCnt=0
    for item in $message_log_list;do
        snasmessages_size=$( du -s ${item}  | awk '{print $1}' )
        MemFree=$(free -m | grep -w "Mem:" | awk '{print $4}')
        if [ ${snasmessages_size} -ge ${MaxLogSize} ] && [ ${MemFree} -lt ${MinMem} ]
        then
            echo "[ERR]INFO: Available memory of the node is:${MemFree}M,but the file(${item}) size is too big:${snasmessages_size}K, please manually confirm the cluster status and inspection result."  >>${RESULTFILE} 2>&1
            return
        fi
        failWal_log=$(cat ${item} | grep "datalen err" | grep WAL | wc -l )
        if [ ${failWal_log} -gt 0 ];then
            walRefreshFailCnt=$((walRefreshFailCnt+failWal_log))
            lastOneFailWal=$( cat ${item} | grep "datalen err" | grep WAL | tail -1 )
        fi
    done
    if [ ${walRefreshFailCnt} -ge 1 ]
    then
        echo "[ERR]INFO:There are ${walRefreshFailCnt} WAL playback failed records. The last record is${lastOneFailWal}"  >>${RESULTFILE} 2>&1
    fi

    return
}

# WALһУ
checkNodeWALSwitch

# WAL״̬
checkNodeWalHealth
