#!/bin/bash
set +x

source /opt/node_type_recognize.sh
G_INSPECT_MMLPATH="/opt/huawei/snas/script/inspect_mml"
source $G_INSPECT_MMLPATH/CheckItems
source $G_INSPECT_MMLPATH/CheckRoCE
source $G_INSPECT_MMLPATH/CommonFunc.sh
CurInspectNum="154"
CurInspectFun="$(GetInspectType $CurInspectNum)"
RESULTFILE="/tmp/tmpResult${CurInspectFun}"
>"${RESULTFILE}"

LOG_FILE="/var/log/inspect.log"

function LOG
{
    time=$(date)
    echo "[${time}][$$][$CurInspectFun]$*" >> $LOG_FILE
}

# 返回值0、1、2、3、4分别表示通过、不通过、未完成检查、不涉及和建议优化
function Checkport_Dcb_mlnx()
{
    PhyPortName=$1
    # 1、网卡侧是否配置pfc？
    mlnx_qos -i "$PhyPortName" | grep 'enabled     0   0   0   1'  >/dev/null 2>&1
    if [ $? -ne 0 ]; then
        # 不通过, 请联系华为工程师
        LOG "[$FUNCNAME][${LINENO}]mlnx_qos: pfc is not configured!"
        PrintRoCECheckTip "${CurInspectFun}" 1 "${RESULTFILE}" "PFC is not configured on ${PhyPortName}"
        return 1
    fi

    # 2、是否丢包？
    businfo=$(ethtool -i  "${PhyPortName}" | grep 'bus-info:' | awk '{print $2}')
    out_of_sequence_path=$(find /sys/devices/ -name out_of_sequence | grep "${businfo}")
    out_of_sequence_value=$(cat "${out_of_sequence_path}")
    if [ "$out_of_sequence_value" == "0" ]; then
        # 不丢包，本分支通过
        LOG "[$FUNCNAME][${LINENO}]Good communication quality, no discard package."
    else
        IfOfedVer_3_2=$(ofed_info -s | grep MLNX_OFED_LINUX-3.2)
        LOG "[$FUNCNAME][${LINENO}]IfOfedVer_3_2=$IfOfedVer_3_2."
        if [ "$IfOfedVer_3_2" != "" ]; then
            pfc_tx=$(ethtool -S "${PhyPortName}" | egrep "pause_ctrl_tx" | awk '{print $2}')
            pfc_rx=$(ethtool -S "${PhyPortName}" | egrep "pause_ctrl_rx" | awk '{print $2}')
        else
            pfc_tx=$(ethtool -S "${PhyPortName}" | egrep "tx_prio3_pause:" | awk '{print $2}')
            pfc_rx=$(ethtool -S "${PhyPortName}" | egrep "rx_prio3_pause:" | awk '{print $2}')
        fi

        LOG "[$FUNCNAME][${LINENO}]pfc_tx=${pfc_tx}, pfc_rx=${pfc_rx}"
        # 丢包，PFC_TX > 0 and PRC_RX=0?
        if [ "${pfc_tx}" != "0" -a "${pfc_rx}" == "0" ]; then
            # 建议优化，请检查交换机的pfc配置
            LOG "[$FUNCNAME][${LINENO}]please check the pfc configuration of the switch."
            PrintRoCECheckTip "${CurInspectFun}" 4 "${RESULTFILE}" "PFC packages have not been received on ${PhyPortName}."
            return 4
        fi
    fi
    PrintRoCECheckTip "${CurInspectFun}" 0 "${RESULTFILE}" "${PhyPortName} PFC check is OK."
    return 0
}

# 返回值0、1、2、3、4分别表示通过、不通过、未完成检查、不涉及和建议优化
function Checkport_Dcb_1822()
{
    PhyPortName="$1"
    # 1、网卡侧是否配置pfc？
    hinicadm pfc -i "$PhyPortName" | grep enable
    if [ $? -ne 0 ]; then
        # 不通过, 请联系华为工程师
        LOG "[$FUNCNAME][${LINENO}]hinicadm: pfc is not configured!"
        PrintRoCECheckTip "${CurInspectFun}" 1 "${RESULTFILE}" "PFC is not configured on ${PhyPortName}"
        return 1
    fi

    # 2、是否丢包？
    hinicinfo=$(hinicadm info | egrep "hinic|$PhyPortName" | grep "${PhyPortName}" -B1)
    echo "${hinicinfo}" | grep hinic0  >/dev/null 2>&1
    if [ $? -eq 0 ]; then
        cardname="hinic0"
    else
        cardname="hinic1"
    fi

    #1822 network card statistics, only displayed if it is not equal to 0
    cause_discard_roce_ppe_rx_e_psn_err=$(hinicadm counter -i "${cardname}" -t 1 -x 30 | grep 'cause_discard_roce_ppe_rx_e_psn_err')
    LOG "[$FUNCNAME][${LINENO}]cardname=${cardname}, cause_discard_roce_ppe_rx_e_psn_err = $cause_discard_roce_ppe_rx_e_psn_err."

    if [ -n "${cause_discard_roce_ppe_rx_e_psn_err}" ]; then
        pfc_rx=$(ethtool -S "$PhyPortName" | egrep "mac_rx_pfc_pri3_pkt_num"  | awk '{print $2}')
        pfc_tx=$(ethtool -S "$PhyPortName" | egrep "mac_tx_pfc_pri3_pkt_num"  | awk '{print $2}')
        LOG "[$FUNCNAME][${LINENO}]pfc_rx=${pfc_rx}, pfc_tx=${pfc_tx}."
        # PFC_TX > 0 and PRC_RX=0 ？
        if [ "${pfc_tx}" != "0" -a "${pfc_rx}" == "0" ]; then
            # 建议优化，请检查交换机的pfc配置
            LOG "[$FUNCNAME][${LINENO}]please confirm that the lan-switch's pfc has taken effect."
            PrintRoCECheckTip "${CurInspectFun}" 4 "${RESULTFILE}" "PFC packages have not been received on ${PhyPortName}."
            return 4
        fi
    fi

    # 3、如果是RoCE v2，ECN机制是否生效？
    /usr/bin/cma_roce_mode | grep "RoCE v2"
    if [ $? -eq 0 ]; then
        # Check if the ECN mechanism takes effect
        handle_ecn=$(hinicadm counter -i $cardname -t 1 -x 33 | grep cnp_hdle_rx_flow |  awk '{print $2}')
        LOG "[$FUNCNAME][${LINENO}]It is RoCEV2. handle_ecn=$handle_ecn."
        if [ -z "${handle_ecn}" ]; then
            # 原始信息，用正常字体显示“没有收到CNP报文，可能需要排查交换机的ECN配置”；巡检通过
            LOG "[$FUNCNAME][${LINENO}]ECN has not taken effect."
            PrintRoCECheckTip "${CurInspectFun}" 0 "${RESULTFILE}" "No CNP packets received; the switch may not have ECN enabled."
            return 0
        fi
    else
        PrintRoCECheckTip "${CurInspectFun}" 0 "${RESULTFILE}" "It is not RoCEV2."
        LOG "[$FUNCNAME][${LINENO}]It is not RoCEV2."
        return 3
    fi
    PrintRoCECheckTip "${CurInspectFun}" 0 "${RESULTFILE}" "${PhyPortName} check is OK."
    return 0
}

# RoCE网络状态检查
function CheckRoCENetworkStatus()
{
    local isPass=3
    LOG "[$FUNCNAME][${LINENO}]CheckRoCENetwork start!"

    ROCE_LIST=$(GetRoCECardNum)
    RoCENum=$?
    if [ ${RoCENum} -eq 0 ];then
        # 后端不是ROCE，返回“不涉及”
        LOG "[$FUNCNAME][${LINENO}]There is no RoCE on the node, so ‘not involve’."
        PrintRoCECheckTip "${CurInspectFun}" "${isPass}" "${RESULTFILE}" "Node is without RoCE card, is not involved"
        PrintRoCECheckResult "${CurInspectFun}" "${isPass}" "${RESULTFILE}"
        return ${isPass}
    fi

    #Lite节点部署不需要后端网卡，返回“不涉及”
    get_cluster_lite_deploy 2>/dev/null
    if  [ $? -eq 0 ]; then
        LOG "[$FUNCNAME][${LINENO}]There is lite node, so ‘not involve’."
        PrintRoCECheckTip "${CurInspectFun}" "${isPass}" "${RESULTFILE}" "Cluster with single node is not involved"
        PrintRoCECheckResult "${CurInspectFun}" "${isPass}" "${RESULTFILE}"
        return ${isPass}
    fi

    # 正常情况后端都是2个网卡，所以记录2个结果
    for portname in $(sort -u <<< "$ROCE_LIST" || echo "${ROCE_LIST}")
    do
        PhyPortName="${portname//\.[1-9]/}"
        driver_name=$(ethtool -i "${PhyPortName}" | grep 'driver:' | awk '{print $2}')
        LOG "[$FUNCNAME][${LINENO}]portname=${portname}, driver_name=${driver_name}."

        ret=0
        if [ "$driver_name" == "mlx5_core" ]; then
            Checkport_Dcb_mlnx "$PhyPortName"
            ret=$?
        elif [ "$driver_name" == "hinic" ]; then
            Checkport_Dcb_1822 "$PhyPortName"
            ret=$?
        fi
        isPass=$(check_pass $isPass $ret)
    done
    PrintRoCECheckResult "${CurInspectFun}" "${isPass}" "${RESULTFILE}"
}

CheckRoCENetworkStatus
exit 0


