#!/bin/bash
MY_NAME=$(basename $0)
CFG="/opt/huawei/snas/etc/snas.ini"
KEY="cm_mon_exclusion"
IAM_KEY="ignore_if_connect_to_iam"
SWITCH_ON=1
SWITCH_OFF=0
LOG ()
{
    local time_now=$(date "+%Y/%m/%d %H:%M:%S")
    echo "[${MY_NAME}][$0][$time_now] $@" >> /var/log/snas_CM.log
}


#Բͬɫҵ
function showWithColor()
{
    local GRA='\E[1;30m'
    local RED='\E[1;31m'
    local GRN='\E[1;32m'
    local YEL='\E[1;33m'
    local SUR='\E[1;34m'
    local MAG='\E[1;35m'
    local CYN='\E[1;36m'
    
    
    local RES='\E[0m'

    local para="$*"
    local ret=${para%%:*}


    case "${ret}" in
        "ERROR"|"ERROR:")
            echo -e "${MAG}${para}${RES}"
            return 0
            ;;
        "WARNING"|"WARNING:")
            echo -e "${CYN}${para}${RES}"
            return 0
            ;;
        "OK"|"OK:")
            echo -e "${GRN}${para}${RES}"
            return 0
            ;;
        "INFO"|"INFO:")
            echo -e "${YEL}${para}${RES}"
            return 0
            ;;
        "PROGRESS"|"PROGRESS:")
            echo -e "${SUR}${para}${RES}"
            return 0
            ;;
        "EXEC_RESULT"|"EXEC_RESULT:")
            echo -e "${RED}${para}${RES}"
            return 0
            ;;
        "TIPS"|"TIPS:")
            echo -e "${GRA}${para}${RES}"
            return 0
            ;; 
        *)
            echo "${para}"
            return 0
            ;;
    esac
}


function usage()
{
    showWithColor "TIPS: check              leader of cm and mon are on the same node ?"
    showWithColor "TIPS: repair             let cm leader to be other node. CAUTION: After execute this command, the cluster manager ip will disconnect, your should reconnect it!"
}

function do_check()
{
    local cm_pid=$(pidof snas_cm)
    if [ $? -ne 0 ];then        
        showWithColor "WARNING: snas_cm is not running! Please execute on other node!"
        LOG [$FUNCNAME][$LINENO] "ERROR: get pid of snas_cm fail!"
        return 1
    fi
    local rec=$(netstat -anpt | grep -w LISTEN | grep -w "${cm_pid}\/snas_cm" | grep "\:4016")
    if [ -z "${rec}" ];then
        showWithColor "ERROR: snas_cm is Abnormal! Please execute on other node!"
        LOG [$FUNCNAME][$LINENO] "ERROR: cm is not listen the port!"
        return 1
    fi
    local cm_master_ip=$(/opt/huawei/snas/script/getCmMaster.sh)
    if [ -z "${cm_master_ip}" ];then
        showWithColor "ERROR: get leader IP of cm fail! Please execute on other node!"
        LOG [$FUNCNAME][$LINENO] "ERROR: et leader IP of cm fail!"
        return 1
    fi
    local mon_leader=$(cat /proc/monc_leader | grep -Po '(?<=Mon leader: )[0-9.]*')
    if [ -z "${mon_leader}" ];then
        showWithColor "ERROR: get leader of mon fail! Please execute on other node!"
        LOG [$FUNCNAME][$LINENO] "ERROR: get leader of mon fail!"
        return 1
    fi
    echo ${cm_master_ip} | grep -wFq ${mon_leader}
    if [ $? -eq 0 ];then
        showWithColor "ERROR: cm and mon's leader are on the same node! Please execute command: 'sh ./${MY_NAME} repair' to repaie!"
        LOG [$FUNCNAME][$LINENO] "ERROR: cm and mon's leader are on the same node!!"
        return 1
    else
        showWithColor "OK: cm and mon's leader are on the different node!"
        LOG [$FUNCNAME][$LINENO] "ERROR: cm and mon's leader are on the different node!"
        return 1
    fi
    LOG [$FUNCNAME][$LINENO] "OK: cm and mon's leader check end!"
    return 0
}

function is_cm_master()
{
    local cluster_om_ip=$(grep -Po '(?<=^omip=)[0-9.]*' /opt/huawei/snas/etc/cm.ini)
    if [ -n "${cluster_om_ip}" ];then
        ifconfig | grep -wFq ${cluster_om_ip}
        if [ $? -eq 0 ];then
            return 0
        fi
    fi
    return 1
}

function i_is_mon_leader()
{
    # should call on cm master node
    if [ -f /proc/monc_leader ];then
        local mon_leader=$(cat /proc/monc_leader | grep -Po '(?<=Mon leader: )[0-9.]*')
        if [ -n "${mon_leader}" ];then
            ifconfig | grep -wFq ${mon_leader}
            if [ $? -eq 0 ];then
                return 0               
            fi
        fi
    fi
    return 1
}

function route_correct()
{
    local max=120
    local sleep_sconds=10
    local cnt=0
    local cluster_om_ip=$(grep -Po '(?<=^omip=)[0-9.]*' /opt/huawei/snas/etc/cm.ini)
    local mylocalip=$(grep "Localomip=" /opt/huawei/snas/etc/cm.ini | awk -F '=' '{print $2}' | sed 's/"//g' | grep -Po "([0-9]*.[0-9]*.[0-9]*.[0-9]*)")
    
    if [ -z "${cluster_om_ip}" ];then
        LOG [$FUNCNAME][$LINENO] "ERROR: get cm's master IP fail!"
        return 1
    fi
    if [ -z "${mylocalip}" ];then
        LOG [$FUNCNAME][$LINENO] "ERROR: had no local manager IP"
        return 2
    fi
    
    while [ ${cnt} -lt ${max} ]
    do
        is_cm_master
        if [ $? -ne 0 ];then
            # eg. cluster manager ip: xx.xx.xx.xx, and local manager ip:xx.xx.xx.xx
            # repair : default via xx.xx.xx.xx NIC0 src xx.xx.xx.xx -> default via xx.xx.xx.xx NIC0 src xx.xx.xx.xx
            local old_def=$(ip route show table om1 | grep default | grep -Fw "${mylocalip}")
            if [ -z "${old_def}" ];then
                # should repair route. default via xx.xx.xx.xx dev NIC0 src $cluster_manager_ip -> $local_manager_ip
                LOG [$FUNCNAME][$LINENO] "ERROR: old route: $(ip route show table om1)"
                local suffix=$(ip route show table om1 | grep default | awk -F "src" '{print $1}')
                if [ -n "${suffix}" ];then
                    local def_cmd="ip route replace ${suffix} src ${mylocalip} table om1"
                    local rc=0                    
                    ${def_cmd}
                    rc=$?
                    local new_def=$(ip route show table om1 | grep default)
                    LOG [$FUNCNAME][$LINENO] "ERROR: ${old_def} -> ${new_def} return ${rc}, cmd:${def_cmd}"
                fi
            fi
            # repair: xx.xx.xx.xx/16 dev NIC0 scope link src xx.xx.xx.xx -> xx.xx.xx.xx/16 dev NIC0 scope link src xx.xx.xx.xx
            local old_rec=$(ip route show table om1 | grep "scope" | grep -Fw "${mylocalip}")
            if [ -z "${old_rec}" ];then
                LOG [$FUNCNAME][$LINENO] "ERROR: old route: $(ip route show table om1)"
                local suffix=$(ip route show table om1 | grep "scope" | awk -F "scope" '{print $1}')
                if [ -n "${suffix}" ];then
                    local rec_cmd="ip route replace ${suffix} scope link src ${mylocalip} table om1"
                    local rc=0
                    ${rec_cmd}
                    rc=$?
                    local new_rec=$(ip route show table om1 | grep scope)
                    LOG [$FUNCNAME][$LINENO] "ERROR: ${old_rec} -> ${new_rec} return ${rc}, cmd:${rec_cmd}"
                fi
            fi
        fi
        let cnt+=1
        sleep ${sleep_sconds}
    done
}

function restart_cm()
{
    is_cm_master
    if [ $? -ne 0 ];then
        LOG [$FUNCNAME][$LINENO] "INFO: I'm not cm master "
        return 1
    fi
    local mylocalip=$(grep "Localomip=" /opt/huawei/snas/etc/cm.ini | awk -F '=' '{print $2}' | sed 's/"//g' | grep -Po "([0-9]*.[0-9]*.[0-9]*.[0-9]*)")
    if [ -n "${mylocalip}" ];then
        # should check after cm leader changed, the route should be ok. donot run multi scirpt
        local old_pid=$(ps -ef | grep "cm_mon_leader.sh route" | grep -v grep | awk '{print $2}')
        if [ -n "${old_pid}" ];then
            kill ${old_pid}
            LOG [$FUNCNAME][$LINENO] "WARN: there had old running one:${old_pid}."
        fi
        nohup sh $0 route &
        LOG [$FUNCNAME][$LINENO] "INFO: execute nohup sh $0 route & return $?"        
    fi
    /opt/huawei/deploy/bin/daemon /opt/huawei/snas/bin/snas_cm -s ; sleep 5; /opt/huawei/deploy/bin/daemon /opt/huawei/snas/bin/snas_cm
    LOG [$FUNCNAME][$LINENO] "WARN: had restart cm."
    return 0
}

function check_and_separate()
{
    is_cm_master
    if [ $? -ne 0 ];then
        showWithColor "ERROR: should execute this command on the node which have the cluster manager IP!"
        return 1
    fi
    i_is_mon_leader
    if [ $? -eq 0 ];then
        # cm and mon's leaders are on the same node!
        LOG [$FUNCNAME][$LINENO] "WARN: will restart cm."
        restart_cm
        LOG [$FUNCNAME][$LINENO] "WARN: had restart cm."
        sleep 5
        is_cm_master
        if [ $? -ne 0 ];then
            showWithColor "OK: repair successfully!"
            return 0
        else
            showWithColor "ERROR: repair fail!"
            return 1
        fi
    else
        showWithColor "OK: cm and mon's leader are on the different nodes, no need to repair."
    fi
    return 0
}

function main()
{
    local rc=0
    LOG [$FUNCNAME][$LINENO] "INFO: begin, input:$*"
    if [ $# -lt 1 ];then
        usage
        return 1
    fi    
    
    case $1 in
        "check")
            do_check
            rc=$?
            LOG [$FUNCNAME][$LINENO] "INFO: end, do_check return with ${rc}"
            return ${rc}
            ;;
        "repair")
            check_and_separate
            rc=$?
            LOG [$FUNCNAME][$LINENO] "INFO: end, repair return with ${rc}"
            return ${rc}
            ;;
        "route")
            # inner command
            # should be called by restart_cm
            route_correct
            rc=$?
            LOG [$FUNCNAME][$LINENO] "INFO: end, route return with ${rc}"
            return ${rc}
            ;;
        "change")
            # inner command
            restart_cm
            rc=$?
            LOG [$FUNCNAME][$LINENO] "INFO: end, change return with ${rc}"
            return ${rc}
            ;;
        *)
            usage
            LOG [$FUNCNAME][$LINENO] "ERROR: end, invalid input"
            return 1
            ;;
    esac
    return 0
}

main $*
