#!/bin/bash

#############################################################
#
# 名  称：检查V1/V2 SSD卡健康状
# 错误码：
#         0000=正常
#         0001=lspci 命令执行失败，无法确认是否存在V1、V2 SSD卡
#         0002=未安装 hio_info
#         0003=存在非健康状态的SSD卡
#         0004=存在电池故障状态的SSD卡
#         0005=存在坏块率超过11%的SSD卡
#         0006=存在非健康状态的SSD卡和电池故障状态的SSD卡
#         0007=存在非健康状态的SSD卡和坏块率超过11%的SSD卡
#         0008=存在电池故障状态的SSD卡和坏块率超过11%的SSD卡
#         0009=存在非健康状态的SSD卡和电池故障状态的SSD卡和坏块率超过11%的SSD卡
#         0010=内部错误，走到了异常分支
#
#############################################################

PATH="/sbin:/usr/sbin:/usr/local/sbin:/root/bin:/usr/local/bin:/usr/bin:/bin:/opt/omm/oma/workspace/tools"
UPG_LIB_PATH="/opt/omm/oma/atoms/Inspect/lib"

source "${UPG_LIB_PATH}/log.sh" || { echo "source ${UPG_LIB_PATH}/log.sh failed."; exit 130; }
source "${UPG_LIB_PATH}/out_put.sh" >> ${LOG_FILE} 2>&1 || { log ERROR "source ${UPG_LIB_PATH}/out_put.sh failed."; exit 130; }
source "${UPG_LIB_PATH}/version.sh" >> ${LOG_FILE} 2>&1 || { log ERROR "source ${UPG_LIB_PATH}/version.sh failed."; exit 130; }

checkItemId="4302"
resultCode=0
errorKey=""
params=""
originalInfo=""

healthy_abnormal_ssd=""
power_abnormal_ssd=""
bad_block_abnormal_ssd=""
BAD_BLK_THRESHOLD=11

function main()
{   
    agent_items_check ${checkItemId}
    if [ $? -ne 0 ]
    then
        log INFO "${checkItemId} do not select, pass"
        return 0
    fi

    osd_judg=`cat /opt/dsware/agent/conf/agentMonitor | grep osd | grep monitor | grep True | wc -l`
    if [ ${osd_judg} -eq 0 ]
    then
        originalInfo="${originalInfo}""result:this is not OSD_node, no need to check."
        log INFO "FSA_${checkItemId}:this is not OSD_node, no need to check."
        log INFO "FSA_${checkItemId}:ok"
        FSA_json_output "${checkItemId}" "${resultCode}" "${errorKey}" "${params}" "${originalInfo}"
        return 0
    fi

    lspci_judg=`which lspci`
    originalInfo="${originalInfo}""command:which lspci\n"
    originalInfo="${originalInfo}""review:${lspci_judg}\n"
    if [[ -z "${lspci_judg}" ]]
    then
        resultCode=1
        errorKey="${checkItemId}0001"  # 0001 lspci 命令执行失败，无法确认是否存在V1、V2 SSD卡
        originalInfo="${originalInfo}""result:command of lspci can not excute."
        log ERROR "FSA_${checkItemId}:command of lspci can not excute."
        log ERROR "FSA_${checkItemId}:unok"
        FSA_json_output "${checkItemId}" "${resultCode}" "${errorKey}" "${params}" "${originalInfo}"
        return 1
    fi

    V1_V2_judg=`lspci -v | grep -E '19e5:0007|19e5:0008|19e5:0009'`
    originalInfo="${originalInfo}""command:lspci -v | grep -E '19e5:0007|19e5:0008|19e5:0009'\n"
    originalInfo="${originalInfo}""review:V1_V2_judg=\n${V1_V2_judg}\n"
    if [[ -z "${V1_V2_judg}" ]]
    then
        originalInfo="${originalInfo}""result:there is no V1 or V2 SSD card, no need to check ."
        log INFO "FSA_${checkItemId}:there is no V1 or V2 SSD card, no need to check ."
        log INFO "FSA_${checkItemId}:ok"
        FSA_json_output "${checkItemId}" "${resultCode}" "${errorKey}" "${params}" "${originalInfo}"
        return 0
    fi

    devList=($(ls /dev/ | grep -E "^hio[a-zA-Z]+$" 2>/dev/null))
    devList=(${devList[*]} $(ls /dev/ | grep -E "^nvme[a-zA-Z0-9]+$" 2>/dev/null))
    originalInfo="command:ls /dev/ | grep -E '^hio[a-zA-Z]+$';ls /dev/ | grep -E '^nvme[a-zA-Z0-9]+$'\n"
    originalInfo="${originalInfo}""review:${devList}\n"
    if [ ${#devList[@]} -eq 0 ]
    then
        originalInfo="${originalInfo}""result:This environment does not support SSD card!"
        log INFO "FSA_${checkItemId}:This environment does not support SSD card!"
        log INFO "FSA_${checkItemId}:ok"
        FSA_json_output "${checkItemId}" "${resultCode}" "${errorKey}" "${params}" "${originalInfo}"
        return 0
    fi

    hio_info_judg=`which hio_info`
    originalInfo="${originalInfo}""command:which hio_info\n"
    originalInfo="${originalInfo}""review:${hio_info}\n"
    if [[ -z "${hio_info_judg}" ]]
    then
        resultCode=2
        errorKey="${checkItemId}0002"  # 0002 未安装 hio_info
        originalInfo="${originalInfo}""result:command of lspci can not excute."
        log ERROR "FSA_${checkItemId}:command of lspci can not excute."
        log ERROR "FSA_${checkItemId}:unok"
        FSA_json_output "${checkItemId}" "${resultCode}" "${errorKey}" "${params}" "${originalInfo}"
        return 1
    fi
    
    smio_info=$(cat /proc/smio_host 2>/dev/null)
    originalInfo="${originalInfo}""command:cat /proc/smio_host\n"
    originalInfo="${originalInfo}""review:smio_ifo:\n${smio_ifo}\n"
    for dev in ${devList[@]}
    do
        if [ "$(echo $dev |grep "nvme")" != "" ];
        then
            devName=$(echo "$smio_info" |grep "$dev"| awk -F '|' '{print $3}')
            if [ ${dev} != ${devName} ];
            then
                continue
            fi
            alarm_type=$(echo "$smio_info" |grep "$dev"| awk -F '|' '{print $15}')
            if [ ${alarm_type} -eq 5 ];
            then
                originalInfo="${originalInfo}""result:${dev} card is not healthy.detail: SMIO_ALARM_SSD_FAULT\n"
                log ERROR "FSA_${checkItemId}:${dev} card is not healthy.detail: SMIO_ALARM_SSD_FAULT"
                healthy_abnormal_ssd="${healthy_abnormal_ssd},${dev}"
            elif [ ${alarm_type} -eq 7 ];
            then
                originalInfo"${originalInfo}""result:${dev} card is not healthy.detail: SMIO_ALARM_SSD_CAPACITANCE_FAULTY\n"
                log ERROR "FSA_${checkItemId}:${dev} card is not healthy.detail: SMIO_ALARM_SSD_CAPACITANCE_FAULTY"
                power_abnormal_ssd="${power_abnormal_ssd},${dev}"
            elif [ ${alarm_type} -eq 9 ];
            then
                originalInfo="${originalInfo}""result:${dev} card is not healthy.detail: SMIO_ALARM_SSD_BAD_BLOCK_EXCEED\n"
                log ERROR "FSA_${checkItemId}:${dev} card is not healthy.detail: SMIO_ALARM_SSD_BAD_BLOCK_EXCEED"
                bad_block_abnormal_ssd="${bad_block_abnormal_ssd},${dev}"
            else
                log INFO "FSA_${checkItemId}:${dev} is healthy.detail: ${alarm_type}"
            fi
        else
            #检查SSD健康状态
            result=$(hio_info -d ${dev} | grep Health | awk -F ':' '{print $2}' | sed 's/\ //g' | sed 's/\t//g')
            originalInfo="${originalInfo}""command:hio_info -d ${dev} | grep Health | awk -F ':' '{print \$2}'\n"
            originalInfo="${originalInfo}""review:${result}\n"
            if [ "${result}" != "OK" ]
            then
                originalInfo="${originalInfo}""result:${dev} card is not healthy.detail: ${result}\n"
                log ERROR "FSA_${checkItemId}:${dev} card is not healthy.detail: ${result}"
                healthy_abnormal_ssd="${healthy_abnormal_ssd},${dev}"
            else
                log INFO "FSA_${checkItemId}:${dev} is healthy.detail: ${result}"
            fi
            
            #检查SSD电池状态
            result=$(hio_info -d ${dev} | grep "Powerloss module fault")
            originalInfo="${originalInfo}""command:hio_info -d ${dev} | grep 'Powerloss module fault'\n"
            originalInfo="${originalInfo}""review:${result}\n"
            if [ "${result}" != "" ]
            then
                originalInfo="${originalInfo}""result:${dev} battery status is not OK.detail: ${result}"
                log ERROR "FSA_${checkItemId}:${dev} battery status is not OK.detail: ${result}"
                power_abnormal_ssd="${power_abnormal_ssd},${dev}"
            else
                log INFO "FSA_${checkItemId}:${dev} battery status is OK.detail: ${result}"
            fi

            #检查最大坏块率
            result=$(hio_info -d ${dev} | grep "Max bad block rate" | awk -F ':' '{print $2}' | tr -d '%' | sed 's/^[ \t]*\|[ \t]*$//')
            originalInfo="${originalInfo}""command:hio_info -d ${dev} | grep 'Max bad block rate' | awk -F ':' '{print \$2}'\n"
            originalInfo="${originalInfo}""review:${result}%\n"
            if [ `awk -v num1=${result} -v num2=${BAD_BLK_THRESHOLD} 'BEGIN{print(num1>num2)?"1":"0"}'` -eq 1 ]
            then
                originalInfo="${originalInfo}""result:${dev} Abnormal Max bad block rate over the limit,Detail:${result}\n"
                log ERROR "FSA_${checkItemId}:${dev} Abnormal Max bad block rate over the limit,Detail:${result}"
                bad_block_abnormal_ssd="${bad_block_abnormal_ssd},${dev}"
            else
                log INFO "FSA_${checkItemId}:${dev} Max bad block rate is normal.detail: ${result}"
            fi
        fi
    done 
    #删除非健康状态SSD卡名字回显面前的逗号
    if [ -n "${healthy_abnormal_ssd}" ]; then

        healthy_abnormal_ssd=${healthy_abnormal_ssd#?}
    fi
    #删除电池故障SSD卡名字回显面前的逗号
    if [ -n "${power_abnormal_ssd}" ]; then
        power_abnormal_ssd=${power_abnormal_ssd#?}
    fi
    #删除坏块率超过11%的SSD卡名字回显面前的逗号
    if [ -n "${bad_block_abnormal_ssd}" ]; then
        bad_block_abnormal_ssd=${bad_block_abnormal_ssd#?}
    fi
    #根据不同异常状态的回显来判断异常的最终回显
    if [ -z "${healthy_abnormal_ssd}" -a -z "${power_abnormal_ssd}" -a -z "${bad_block_abnormal_ssd}" ]
    then
        originalInfo="${originalInfo}""result:All SSD card is healthy."
        log INFO "FSA_${checkItemId}:All SSD card is healthy."
        log INFO "FSA_${checkItemId}:ok"
        FSA_json_output "${checkItemId}" "${resultCode}" "${errorKey}" "${params}" "${originalInfo}"
        return 0
    elif [ -n "${healthy_abnormal_ssd}" -a -z "${power_abnormal_ssd}" -a -z "${bad_block_abnormal_ssd}" ]
    then
        resultCode=1
        errorKey="${checkItemId}0003"  # 0003 存在非健康状态的SSD卡
        originalInfo="${originalInfo}""result:there are some ssd cards that are not healthy, include: ${healthy_abnormal_ssd}."
        log ERROR "FSA_${checkItemId}:there are some ssd cards that are not healthy, include: ${healthy_abnormal_ssd}."
        log ERROR "FSA_${checkItemId}:unok"
        FSA_json_output "${checkItemId}" "${resultCode}" "${errorKey}" "${params}" "${originalInfo}"
        return 1
    elif [ -z "${healthy_abnormal_ssd}" -a -n "${power_abnormal_ssd}" -a -z "${bad_block_abnormal_ssd}" ]
    then
        resultCode=1
        errorKey="${checkItemId}0004"  # 0004 存在电池故障状态的SSD卡
        originalInfo="${originalInfo}""result:there are some ssd cards whose battery status is not OK, include: ${power_abnormal_ssd}."
        log ERROR "FSA_${checkItemId}:there are some ssd cards whose battery status is not OK, include: ${power_abnormal_ssd}."
        log ERROR "FSA_${checkItemId}:unok"
        FSA_json_output "${checkItemId}" "${resultCode}" "${errorKey}" "${params}" "${originalInfo}"
        return 1
    elif [ -z "${healthy_abnormal_ssd}" -a -z "${power_abnormal_ssd}" -a -n "${bad_block_abnormal_ssd}" ]
    then
        resultCode=1
        errorKey="${checkItemId}0005"  # 0005 存在坏块率超过11%的SSD卡
        originalInfo="${originalInfo}""result:there are some ssd cards whose abnormal max bad block rate over the limit, include: ${bad_block_abnormal_ssd}."
        log ERROR "FSA_${checkItemId}:there are some ssd cards whose abnormal max bad block rate over the limit, include: ${bad_block_abnormal_ssd}."
        log ERROR "FSA_${checkItemId}:unok"
        FSA_json_output "${checkItemId}" "${resultCode}" "${errorKey}" "${params}" "${originalInfo}"
        return 1
    elif [ -n "${healthy_abnormal_ssd}" -a -n "${power_abnormal_ssd}" -a -z "${bad_block_abnormal_ssd}" ]
    then
        resultCode=1
        errorKey="${checkItemId}0006"  # 0006 存在非健康状态的SSD卡和电池故障状态的SSD卡
        originalInfo="${originalInfo}""result:there are some ssd cards that are not healthy, include: ${healthy_abnormal_ssd}.\n"
        log ERROR "FSA_${checkItemId}:there are some ssd cards that are not healthy, include: ${healthy_abnormal_ssd}."
        originalInfo="${originalInfo}""result:there are some ssd cards whose battery status is not OK, include: ${power_abnormal_ssd}."
        log ERROR "FSA_${checkItemId}:there are some ssd cards whose battery status is not OK, include: ${power_abnormal_ssd}."
        log ERROR "FSA_${checkItemId}:unok"
        FSA_json_output "${checkItemId}" "${resultCode}" "${errorKey}" "${params}" "${originalInfo}"
        return 1
    elif [ -n "${healthy_abnormal_ssd}" -a -z "${power_abnormal_ssd}" -a -n "${bad_block_abnormal_ssd}" ]
    then
        resultCode=1
        errorKey="${checkItemId}0007"  # 0007 存在非健康状态的SSD卡和坏块率超过11%的SSD卡
        originalInfo="${originalInfo}""result:there are some ssd cards that are not healthy, include: ${healthy_abnormal_ssd}.\n"
        log ERROR "FSA_${checkItemId}:there are some ssd cards that are not healthy, include: ${healthy_abnormal_ssd}."
        originalInfo="${originalInfo}""result:there are some ssd cards whose abnormal max bad block rate over the limit, include: ${bad_block_abnormal_ssd}."
        log ERROR "FSA_${checkItemId}:there are some ssd cards whose abnormal max bad block rate over the limit, include: ${bad_block_abnormal_ssd}."
        log ERROR "FSA_${checkItemId}:unok"
        FSA_json_output "${checkItemId}" "${resultCode}" "${errorKey}" "${params}" "${originalInfo}"
        return 1
    elif [ -z "${healthy_abnormal_ssd}" -a -n "${power_abnormal_ssd}" -a -n "${bad_block_abnormal_ssd}" ]
    then
        resultCode=1
        errorKey="${checkItemId}0008"  # 0008 存在电池故障状态的SSD卡和坏块率超过11%的SSD卡
        originalInfo="${originalInfo}""result:there are some ssd cards whose battery status is not OK, include: ${power_abnormal_ssd}.\n"
        log ERROR "FSA_${checkItemId}:there are some ssd cards whose battery status is not OK, include: ${power_abnormal_ssd}."
        originalInfo="${originalInfo}""result:there are some ssd cards whose abnormal max bad block rate over the limit, include: ${bad_block_abnormal_ssd}."
        log ERROR "FSA_${checkItemId}:there are some ssd cards whose abnormal max bad block rate over the limit, include: ${bad_block_abnormal_ssd}."
        log ERROR "FSA_${checkItemId}:unok"
        FSA_json_output "${checkItemId}" "${resultCode}" "${errorKey}" "${params}" "${originalInfo}"
        return 1
    elif [ -n "${healthy_abnormal_ssd}" -a -n "${power_abnormal_ssd}" -a -n "${bad_block_abnormal_ssd}" ]
    then
        resultCode=1
        errorKey="${checkItemId}0009"  # 0009 存在非健康状态的SSD卡和电池故障状态的SSD卡和坏块率超过11%的SSD卡
        originalInfo="${originalInfo}""result:there are some ssd cards that are not healthy, include: ${healthy_abnormal_ssd}.\n"
        log ERROR "FSA_${checkItemId}:there are some ssd cards that are not healthy, include: ${healthy_abnormal_ssd}."
        originalInfo="${originalInfo}""result:there are some ssd cards whose battery status is not OK, include: ${power_abnormal_ssd}.\n"
        log ERROR "FSA_${checkItemId}:there are some ssd cards whose battery status is not OK, include: ${power_abnormal_ssd}."
        originalInfo="${originalInfo}""result:there are some ssd cards whose abnormal max bad block rate over the limit, include: ${bad_block_abnormal_ssd}."
        log ERROR "FSA_${checkItemId}:there are some ssd cards whose abnormal max bad block rate over the limit, include: ${bad_block_abnormal_ssd}."
        log ERROR "FSA_${checkItemId}:unok"
        FSA_json_output "${checkItemId}" "${resultCode}" "${errorKey}" "${params}" "${originalInfo}"
        return 1
    else
        resultCode=1
        errorKey="${checkItemId}0010"  # 0010 内部错误，走到了异常分支
        originalInfo="${originalInfo}""result:there are some ssd cards that are not healthy, include: ${healthy_abnormal_ssd}."
        log ERROR "FSA_${checkItemId}:there are some ssd cards that are not healthy, include: ${healthy_abnormal_ssd}."
        log ERROR "FSA_${checkItemId}:unok"
        FSA_json_output "${checkItemId}" "${resultCode}" "${errorKey}" "${params}" "${originalInfo}"
        return 1
    fi

}

log MUST "enter [$0],para=[$@]"
main $@
retValue=$?
log MUST "leave [$0],retValue=${retValue}"
exit ${retValue}
