#!/bin/bash
# Copyright © Huawei Technologies Co., Ltd.2019-2022. All rights reserved.
function get_work_path() {
    dir=$(dirname "$0")
    ispointstart=$(echo "${dir}" | awk -F/ '{print $1}')
    if [ "${ispointstart}" == "." ]; then
        dir=$(echo "${dir}" | sed 's/^.//')
        CURRENT_PATH="$(pwd)${dir}"
    elif [ "${ispointstart}" == ".." ]; then
        CURRENT_PATH="$(pwd)/${dir}"
    else
        CURRENT_PATH="${dir}"
    fi
    return 0
}

function init_check_etcd_health() {
    CHECK_IP=$1
    INIT="init"
    NORMAL="normal"
    ABNORMAL="abnormal"
    TIME_OUT=1
    TIME_OUT_RET=124
    CHECK_INTERVAL=5
    ABNORMAL_MAX_COUNT=3
    ABNORMAL_COUNT=0
    LOG_FILE="${ARBITTRATION_WATCHDOG_LOG}/arbitration_watchdog.log"
    RESTART_FLAG=${ARBITTRATION_WATCHDOG_LOG}/restart.flag
}

function logger() {
    local current_time=$(date "+%Y-%m-%d %H:%M:%S")
    echo "[$current_time] [check_etcd ${CHECK_IP}] $*" >>"${LOG_FILE}"
}

function restart_service() {
    if [ ! -f "${RESTART_FLAG}" ]; then
        logger "restart etcd and monitor"
        touch "${RESTART_FLAG}"
        sudo -u arbiter bash ${ETCD_HOME}/script/service.sh restart >/dev/null 2>&1
        sleep 10
        sudo -u arbiter bash ${MONITOR_HOME}/script/service.sh restart >/dev/null 2>&1
        rm -f "${RESTART_FLAG}"
    else
        logger "restart done"
    fi
}

function update_etcd_health() {
    local arbiter_ip=$1
    local new_status=$2
    if [ "${new_status}" == "${ABNORMAL}" ]; then
        let ABNORMAL_COUNT+=1
    else
        ABNORMAL_COUNT=0
    fi
    local new_flag="${arbiter_ip}=${new_status}"
    local etcd_health_flag="${CURRENT_PATH}"/../conf/check_etcd_health.flag
    local old_status="${INIT}"
    [ -f "${etcd_health_flag}" ] || touch "${etcd_health_flag}"
    cat "${etcd_health_flag}" | grep -w "${arbiter_ip}"
    if [ $? -eq 0 ]; then
        # 更新状态
        local old_status=$(cat "${etcd_health_flag}" | grep -w "${arbiter_ip}" | awk -F= '{print$2}')
        if [ "${new_status}" != "${ABNORMAL}" -o "${ABNORMAL_COUNT}" -eq "${ABNORMAL_MAX_COUNT}" ]; then
            sed -i "/${arbiter_ip}/d" "${etcd_health_flag}"
            echo "${new_flag}" >>"${etcd_health_flag}"
        fi
    else
        sed -i "/${arbiter_ip}/d" "${etcd_health_flag}"
        echo "${new_flag}" >>"${etcd_health_flag}"
    fi

    [ "${ABNORMAL_COUNT}" -eq "${ABNORMAL_MAX_COUNT}" ] && ABNORMAL_COUNT=0
    logger "end: new_status ${new_status}, old_status ${old_status} count ${ABNORMAL_COUNT}"
    if [ "${old_status}" == "${ABNORMAL}" -a "${new_status}" == "${NORMAL}" ]; then
        # 状态从abnormal到normal，重启etcd和monitor
        restart_service
    fi
}

function check_etcd_health_single() {
    local arbiter_ip="${CHECK_IP}"
    # 不检查本节点连通性
    ip addr | grep -w "${arbiter_ip}"
    if [ $? -ne 0 ]; then
        output=$(timeout ${TIME_OUT} ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -v -p "${ETCD_PORT}" arbiter@"${arbiter_ip}" 2>&1)
        local result=$?
        echo "${output}" | grep -w "Connection established" >/dev/null 2>&1
        local result1=$?
        if [ ${result} -ne ${TIME_OUT_RET} -a ${result1} -eq 0 ]; then
            update_etcd_health "${arbiter_ip}" "${NORMAL}"
        else
            update_etcd_health "${arbiter_ip}" "${ABNORMAL}"
        fi
    fi
}

function check_etcd_health() {
    while true; do
        check_etcd_health_single
        sleep "${CHECK_INTERVAL}"
    done
}

function main() {
    get_work_path
    source ${CURRENT_PATH}/../env/env.properties
    ARBITRATION_WATCHDOG_LOG_FILE_PATH="${ARBITTRATION_WATCHDOG_LOG}/arbitration_watchdog.log"
    init_check_etcd_health "$@"
    check_etcd_health
}

main "$@"
