#!/bin/bash

#############################################################
#
# 名  称：检查ES3000 V6寿命
# 错误码：
#  0001 #  0001 建议优化，联系研发工程师处理.
#############################################################
UPG_LIB_PATH="/opt/omm/oma/atoms/Inspect/lib"
PATH="/sbin:/usr/sbin:/usr/local/sbin:/root/bin:/usr/local/bin:/usr/bin:/bin:/opt/omm/oma/workspace/tools"
source "${UPG_LIB_PATH}/log.sh" || { echo "source ${UPG_LIB_PATH}/log.sh failed."; exit 130; }
source "${UPG_LIB_PATH}/version.sh" >> ${LOG_FILE} 2>&1 || { log ERROR "source ${UPG_LIB_PATH}/version.sh failed."; exit 130; }
source "${UPG_LIB_PATH}/out_put.sh" >> ${LOG_FILE} 2>&1 || { log ERROR "source ${UPG_LIB_PATH}/out_put.sh failed."; exit 130; }

checkItemId="4433"
resultCode=0
errorKey=""
params=""
originalInfo=""

agentMonitorFile="/opt/dsware/agent/conf/agentMonitor"
NET_FILTER_CFG_PATH="/proc/sys/net/netfilter/"
SMARTINFO="smart infos"

PERCENTAGE_LIMIT=18
CODE_WHITELIST="
02312YXY
02313EDD
02313EDF
02313KKD
02314BAR
02314CLB
02314BAS
02314CLG
02313EDG
02313EDH
02313TQT
02313KKH
02314CLD
02314BAT
02314CLH
02314BAU
02314DJK
02550078
02550079
02313FBD
02313FBE
02313KKS
02550168
02314BAC
02314CKU
02550522
02550552
02313KKT
02313XJU
02314BAJ
02314CKV
02550164
02550553
02550523
02313NUU
02314BAK
02314CKW
02550064
02313EDT
02313EDU
02550166
02313KJW
02313NUW
02313XJW
02550554
02550524
02314BAL
02314CKX
02314BAQ
02314CKY
02314BAM
02314CLJ
02314DJP
02550065
02313EDV
02313EDW
02313KJV
02314BAN
02314CLA
02314BAP
02314CLK
02550323
02550555
02550525
02550324
02550556
02550526
02550062
02550156
02550557
02550527
02550063
02550157
02550558
02550528
02550066
02550559
02550529
02312UBN
02312UCF
02312UCG
02312UCH
02312UCL
02312UCW
02312UCX
02312XDV
02312XDW
02312UED
02312UEE
02312UEH
02312UEJ
"

##############################################################
## 检查是否为存储节点
## 返回值：0 存在节点，1 不是存储节点
##############################################################
function checkOSDNode()
{
    # 监控文件不存在
    if [ ! -f ${agentMonitorFile} ]
    then
        return 1
    fi
    # 检查是否有OSD监控
    local osd_judg=$(cat ${agentMonitorFile} | grep osd | grep monitor | grep True | wc -l)
    if [ ${osd_judg} -eq 0 ]
    then
        return 1
    fi

    return 0
}

##############################################################
## 获取smart和vendor smart信息
##############################################################
function getSmarts()
{
    # 输入参数 参数1: 盘片的基本信息
    # 输出参数 smart和vendor smart
    # 返回值 0:通过; 1:smartctl工具执行失败;
    local lineInfo=$1
    local device=$(echo ${lineInfo} | awk -F"|" '{print $3}' | sed -e 's/ //g')
    local diskType=$(echo ${lineInfo} | grep "SAS_SSD")

    if [ ! -z "${diskType}" ] || [[ "${device}" =~ "sd" ]];then
        smartctlLifeTemp=$(timeout -k 3 30 smartctl -a /dev/${device})
        if [ $? -ne 0 ];then
            return 1
        fi
        smartctlTimeTemp=$(timeout -k 3 30 smartctl -l background /dev/${device})
        if [ $? -ne 0 ];then
            return 1
        fi
        local sasVendorSmart=$(timeout -k 3 30 hioadm info -d ${device} -e)
        SMARTINFO="${SMARTINFO} $(echo ${smartctlLifeTemp}) $(echo ${smartctlTimeTemp}) vendor smart:$(echo ${sasVendorSmart})"
    else
        hioadmTemp=$(timeout -k 3 30 hioadm info -d ${device} -s)
        if [ $? -ne 0 ];then
            hioadmTemp=
            local smioPath=$(ls -l /proc/`pidof smio_jbodmng` | grep smio_jbodmng | awk '{print $11}' | xargs dirname)
            smioNvmeCliTemp=$(timeout -k 3 30 ${smioPath}/tools/smio_nvme_cli /dev/${device})
            if [ $? -ne 0 ];then
                return 1
            fi
            SMARTINFO="${SMARTINFO} $(echo ${smioNvmeCliTemp})"
        else
            SMARTINFO="${SMARTINFO} $(echo ${hioadmTemp})"
        fi
        local nvmeVendorSmart=$(timeout -k 3 30 hioadm info -d ${device} -e)
        SMARTINFO="${SMARTINFO} vendor smart:$(echo ${nvmeVendorSmart})"
    fi
    return 0
}

##############################################################
## 计算剩余时间
##############################################################
function calcTimeRemaining()
{
    # 输入参数 参数1: 盘片的基本信息
    # 输出参数 全局变量 剩余寿命
    # 返回值 2:获取已用寿命或上电时间失败；3:上电时间等于0或使用寿命百分比小于等于1; 4:剩余时间>=0天; 5:剩余时间<0天
    local lineInfo=$1
    local device=$(echo ${lineInfo} | awk -F"|" '{print $3}' | sed -e 's/ //g')
    local diskType=$(echo ${lineInfo} | grep "SAS_SSD")
    local pohVal=""
    local percentageUsed=""

    if [ ! -z "${diskType}" ] || [[ "${device}" =~ "sd" ]];then
        pohVal=$(awk -F ':' '/power on time/{printf("%s", $2)}' <<< "${smartctlTimeTemp}" | awk -F" " '{print $2}' | sed -e 's/[ ]//g' | sed -e 's/,//g' | grep -v '[A-Z|a-z]')
        percentageUsed=$(awk -F ':' '/(Percentage|Media) used endurance indicator/{printf("%s", $2)}' <<< "${smartctlLifeTemp}" | sed -e 's/[ ]//g' | sed -e 's/[%]*$//g' | grep -v '[A-Z|a-z]')
        
    else
        if [ -z "${hioadmTemp}" ];then
            pohVal=$(awk -F ':' '/power_on_hours/{printf("%s", $2)}' <<< "${smioNvmeCliTemp}" | sed -e 's/[ ]//g' | sed -e 's/,//g' | grep -v '[A-Z|a-z]')
            percentageUsed=$(awk -F ':' '/percentage_used/{printf("%s", $2)}' <<< "${smioNvmeCliTemp}" | sed -e 's/[ ]//g' | sed -e 's/[%]*$//g' | grep -v '[A-Z|a-z]')
        else
            pohVal=$(awk -F ':' '/power on hours/{printf("%s", $2)}' <<< "${hioadmTemp}" | sed -e 's/[ ]//g' | sed -e 's/,//g'  | sed -e 's/[a-z|A-Z]//g' | grep -v '[A-Z|a-z]')
            percentageUsed=$(awk -F ':' '/percentage used/{printf("%s", $2)}' <<< "${hioadmTemp}" | sed -e 's/[ ]//g' | sed -e 's/[%]*$//g' | grep -v '[A-Z|a-z]')
        fi
    fi

    if [ -z "${pohVal}" ] || [ -z "${percentageUsed}" ]; then  # 工具获取寿命或上电时间失败
        return 2
    fi

    if [ ${pohVal} -eq 0 ] || [ ${percentageUsed} -le 1 ];then  # 上电时间等于0或使用寿命百分比小于等于1
        return 3
    fi

    local timeRemainingDay=$(((${pohVal}*${PERCENTAGE_LIMIT}/${percentageUsed}-${pohVal})/24))
    timeRemaining=${timeRemainingDay}
    if [ ${timeRemainingDay} -ge 0 ];then # 剩余时间>=0天
        return 4
    fi

    # 剩余时间<0天
    return 5
}

main()
{
    agent_items_check ${checkItemId}
    if [ $? -ne 0 ]
    then
        log INFO "${checkItemId} do not select, pass"
        return 0
    fi

    # 检查是否为风险节点版本
    local nodePath="/opt/dsware/DSwareAgentNodeVersion"
    local nodeVersion=$(cat ${nodePath} | grep "nodeVersion" | awk -F'=' '{print $2}' | sed -e 's/[ ]//g' | grep -E 'V100R006C30')
    if [ -z "${nodeVersion}" ];then
        originalInfo="NodeVersion is not risk, no need to check."
        FSA_json_output "${checkItemId}" "${resultCode}" "${errorKey}" "${params}" "${originalInfo}"
        return 0
    fi

    # 检查是否为存储节点
    checkOSDNode
    ret=$?
    if [[ ${ret} -ne 0 ]];then
        originalInfo="Not storage node, no need check."
        FSA_json_output "${checkItemId}" "${resultCode}" "${errorKey}" "${params}" "${originalInfo}"
        return 0
    fi

    local diskInfo=$(cat /proc/smio_host | grep -E "HSSD-D[6-9]|HWE[5-9]")
    if [ -z "${diskInfo}" ];then
        originalInfo="This node does not have a ES3000/HSSD disk. No need check."
        FSA_json_output "${checkItemId}" "${resultCode}" "${errorKey}" "${params}" "${originalInfo}"
        return 0
    fi
    smartctlIsOk=1
    while read line
    do
        smartctlLifeTemp=NA
        smartctlTimeTemp=NA
        hioadmTemp=NA
        smioNvmeCliTemp=NA
        errCode=NA
        timeRemaining=NA
        local sn=$(echo ${line} | awk -F"|" '{print $7}' | sed -e 's/[ ]//g')
        local fw=$(echo ${line} | awk -F"|" '{print $8}' | awk -F"," '{print $3}' | sed -e 's/[ ]//g' | grep '^[0-9]\{4\}')
        local mn=$(echo ${line} | awk -F"|" '{print $8}' | awk -F"," '{print $2}' | sed -e 's/[ ]//g')
        if [ ${smartctlIsOk} -eq 1 ];then
            SMARTINFO="${SMARTINFO} sn:${sn}"
            getSmarts "${line}"
            if [ $? -eq 1 ];then
                smartctlIsOk=0
                errCode=1
            fi
        fi
        
        isV6Disk=$(echo ${line} | grep -E "HSSD-D7|HWE6" | wc -l)
        if [ ${isV6Disk} -eq 0 ];then
            log INFO "Disk sn:${sn} fw:${fw} mn:${mn}, is not ES3000/HSSD V6 disk, no need check."
            continue
        fi

        local code0231="${sn:2:8}"
        if [ ${#code0231} -ne 8 ] || [ ${#fw} -ne 4 ];then
            log INFO "Disk sn:${sn} fw:${fw} mn:${mn}, the disk info is not as expected."
            continue
        fi

        if [ ${fw} -lt 5014 ] || [ ${fw} -gt 8316 ];then # fw版本在5014到8316
            log INFO "Disk sn:${sn} fw:${fw} mn:${mn}, fw version is no risk, no need check."
            continue
        fi

        local isRisk0303=$(echo ${CODE_WHITELIST} | grep ${code0231} | wc -l)
        if [ ${isRisk0303} -eq 0 ];then
            log INFO "Disk sn:${sn} mn:${mn}, not exist risk , no need check."
            continue
        fi
        
        resultCode=1
        retProcess=". Not passed. If the firmware version of a disk in the device is too early, the number of bad sectors may increase and the fault rate may increase if the disk runs for a long time. Contact R&D engineers"
        if [ ${smartctlIsOk} -eq 1 ];then
            calcTimeRemaining "${line}"
            errCode=$?
            if [ ${errCode} -eq 2 ];then
                smartctlIsOk=0
            elif [ ${errCode} -eq 3 ] || [ ${errCode} -eq 4 ];then
                retProcess=". Not passed. If the firmware version of a disk in the device is too early, the number of bad sectors may increase and the fault rate may increase if the disk runs for a long time. Upgrade the firmware version to 8320 or later as soon as possible. At least one spare part must be prepared during the upgrade"
            fi
        fi

        originalInfo="${originalInfo}Disk sn:${sn} fw:${fw} mn:${mn} time remaining:${timeRemaining}d  code:${errCode}${retProcess}.\n"
        log ERROR "Disk sn:${sn} fw:${fw} mn:${mn} time remaining:${timeRemaining}d code:${errCode}${retProcess}." ${FUNCNAME} ${SCRIPT_NAME}
    done <<< "$diskInfo"

    if [[ ${resultCode} -eq 0 ]];then
        originalInfo="This node has no risk.\n"
    fi

    if [[ ${resultCode} -eq 1 ]];then
        errorKey="${checkItemId}000${resultCode}"
    fi

    originalInfo="${originalInfo}${SMARTINFO}\n"
    FSA_json_output "${checkItemId}" "${resultCode}" "${errorKey}" "${params}" "${originalInfo}"
    return ${resultCode}
}

log MUST "enter [$0],para=[$@]"
main
retValue=$?
log MUST "leave [$0],retValue=${retValue}"
exit ${retValue}

##############################
#
#  错误码：
#  0001 es3000 V6硬盘存在M14风险.
##############################