#!/bin/bash

#############################################################
#
# 名  称：检查MDC/ZK/VBS/VFS/KVS进程和FSA监控是否匹配
# 错误码：
#         0000 正常
#         0001 该节点无 /opt/dsware/agent/conf/agentMonitor 监控文件
#         0002 存在 MDC/ZK/OSD/VBS/VFS/KVS 中一个或多个进程和FSA监控不匹配
#
#############################################################

UPG_LIB_PATH="/opt/omm/oma/atoms/Inspect/lib"

source "${UPG_LIB_PATH}/log.sh" || { echo "source ${UPG_LIB_PATH}/log.sh failed."; exit 130; }
source "${UPG_LIB_PATH}/out_put.sh" >> ${LOG_FILE} 2>&1 || { log ERROR "source ${UPG_LIB_PATH}/out_put.sh failed."; exit 130; }
source "${UPG_LIB_PATH}/version.sh" >> ${LOG_FILE} 2>&1 || { log ERROR "source ${UPG_LIB_PATH}/version.sh failed."; exit 130; }

checkItemId="4361"
resultCode=0
errorKey=""
params=""
originalInfo=""


# 检查进程是否和FSA监控文件不匹配
function CheckProcess()
{
    process_type=$1
    process_monitor_count=`cat /opt/dsware/agent/conf/agentMonitor | grep -i ${process_type} | grep -v alarm | grep -E 'True' | wc -l`
    originalInfo="${originalInfo}""command:cat /opt/dsware/agent/conf/agentMonitor | grep -i ${process_type} | grep -v alarm | grep -E 'True' | wc -l\n"
    originalInfo="${originalInfo}""review:${process_type}_monitor_count=${process_monitor_count}\n"
    if [ ${process_monitor_count} -eq 0 ] 
    then
        originalInfo="${originalInfo}""result:This node does not have ${process_type}.\n"
        return 0
    fi

    count=0

    if [ "${process_type}" == "ZK" ]
    then
        originalInfo="${originalInfo}""command:ps -efww |grep QuorumPeerMain |grep zookeeper|grep Dzookeeper.log.dir | grep dsware | grep '/opt/dsware/agent/zk/data' | grep -v grep | wc -l\n"
    else
        originalInfo="${originalInfo}""command:ps -efww| grep -i dsware_${process_type} | grep -v grep | wc -l\n"
    fi

    for i in {1..5}
    do
        if [ "${process_type}" == "ZK" ]
        then
            process_running_count=`ps -efww |grep QuorumPeerMain |grep zookeeper|grep Dzookeeper.log.dir | grep dsware | grep '/opt/dsware/agent/zk/data' | grep -v grep | wc -l`
            originalInfo="${originalInfo}""review:${process_type}_process_running_count=${process_running_count}\n"
        else
            process_running_count=`ps -efww| grep -i dsware_${process_type} | grep -v grep | wc -l`
            originalInfo="${originalInfo}""review:${process_type}_process_running_count=${process_running_count}\n"
        fi
        
        if [ ${process_monitor_count} -ne ${process_running_count} ]
        then
            count=$[${count}+1]
            originalInfo="${originalInfo}""result:This is ${count} times of '${process_type} process's monitor_count is not equal running_count , process_monitor_count=${process_monitor_count} , process_running_count=${process_running_count}.'\n"
            log ERROR "FSA_${checkItemId}:This is ${count} times of '${process_type} process's monitor_count is not equal running_count , process_monitor_count=${process_monitor_count} , process_running_count=${process_running_count}.'"
        fi

        sleep 1
    
    done

    # 判断检查结果 并且有2次以上检查结果不匹配 则检查失败
    if [ ${count} -gt 2 ]
    then
        resultCode=1
        originalInfo="${originalInfo}""result:${process_type} status error , count=${count}.\n\n"
        log ERROR "FSA_${checkItemId}:${process_type} status error , count=${count}."
        return 1
    else
        originalInfo="${originalInfo}""result:${process_type} process's monitor_count is equal running_count.\n\n"
    fi
    return 0
}


function main()
{
    agent_items_check ${checkItemId}
    if [ $? -ne 0 ]
    then
        log INFO "${checkItemId} do not select, pass"
        return 0
    fi

    if [ ! -f "/opt/dsware/agent/conf/agentMonitor" ]
    then
        resultCode=1
        errorKey="${checkItemId}0001"  # 0001 该节点无 /opt/dsware/agent/conf/agentMonitor 监控文件
        originalInfo="${originalInfo}""result:There is no /opt/dsware/agent/conf/agentMonitor file."
        log ERROR "FSA_{$checkItemId}:There is no /opt/dsware/agent/conf/agentMonitor file."
        log ERROR "FSA_{$checkItemId}:unok"
        FSA_json_output "${checkItemId}" "${resultCode}" "${errorKey}" "${params}" "${originalInfo}"
        return 1
    fi

    result=""
    CheckProcess "MDC" || result="${result}""MDC "
    CheckProcess "ZK" || result="${result}""ZK "
    CheckProcess "OSD" || result="${result}""OSD "
    CheckProcess "VBS" || result="${result}""VBS "
    CheckProcess "VFS" || result="${result}""VFS "
    CheckProcess "KVS" || result="${result}""KVS " 

    if [ -n "${result}" ]
    then
        resultCode=1
        errorKey="${checkItemId}0002"  # 0002 存在 MDC/ZK/OSD/VBS/VFS/KVS 中一个或多个进程和FSA监控不匹配
        originalInfo="${originalInfo}""result:${result} process status error."
        log ERROR "FSA_${checkItemId}:${result} process status error."
        log ERROR "FSA_${checkItemId}:unok"
        FSA_json_output "${checkItemId}" "${resultCode}" "${errorKey}" "${params}" "${originalInfo}"
        return 1
    else
        originalInfo="${originalInfo}""result:All of process in this node is healthy."
        log INFO "FSA_${checkItemId}:All of process in this node is healthy."
        log INFO "FSA_${checkItemId}:ok"
        FSA_json_output "${checkItemId}" "${resultCode}" "${errorKey}" "${params}" "${originalInfo}"
        return 0
    fi

}

log MUST "enter [$0],para=[$@]"
main $@
retValue=$?
log MUST "leave [$0],retValue=${retValue}"
exit ${retValue}

