#!/bin/bash
export PATH=$PATH:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/root/bin
SCRIPT_HOME=$(cd `dirname $0`;pwd)
G_LOG_FILE="/tmp/precheck.log"
function log_error() 
{
     echo "`date "+%Y-%m-%d %T"`[ERROR]: $1" >> ${G_LOG_FILE}
}

function log_info() 
{
     echo "`date "+%Y-%m-%d %T"`[INFO]: $1" >> ${G_LOG_FILE}
}

function get_server_role()
{
    local role=
    role=$(cat /opt/huawei-data-protection/ebackup/conf/hcpconf.ini | grep MachineRole |awk -F "=" '{print int($2)}')
    echo ${role}
}

function checkout_os_type()
{
    if [[ -f "/etc/SuSE-release" ]]; then
        G_OS_TYPE="SuSE"
    fi

    if [[ -f "/etc/redhat-release" ]]; then
        G_OS_TYPE="Euler"
    fi
}
#check cpu used < 95%
function calc_avarage_idle()
{
    log_info "[calc_avarage_idle(),$LINENO]Begin to calc cpu avarage idle."
    which top >/dev/null 2>&1
    if [ $? -ne 0 ];then
        log_info "[calc_avarage_idle(),$LINENO]There is no cmd top and sar,now skip to check cpu health."
        echo 90
        return 1
    fi

    for i in {1..5}
    do
        local idle=`top -bn 1 -i -c | grep -i "cpu(s)" | awk -F "," '{print $4}' | sed 's/^[ ]*//' | awk -F "." '{print $1}'`
        if [[ $? -ne 0 || -z "${idle}" ]];then
            log_error "[calc_avarage_utilization(),$LINENO]Excute the command to check cpu health failed."
            echo 90
            return 1
        fi
        echo $idle | grep '^[[:digit:]]*$' >/dev/null 2>&1
        if [ $? -ne 0 ];then
            log_error "[calc_avarage_utilization(),$LINENO]Exception occurs when check cpu health,the cpu average idle is $cpu_idle"
            echo 90
            return 1
        fi
        log_info "[calc_avarage_utilization(),$LINENO]Check cpu idle at $ist time: $idle"
        cpu_idle=$((cpu_idle+idle))
        sleep 1
    done
    cpu_idle=`expr $cpu_idle / 5`
    echo ${cpu_idle}
    return 0
}

#check cpu used < 95%
function check_cpu_health()
{
    log_info "[check_cpu_health(),$LINENO]Begin to check CPU health."
    which sar >/dev/null 2>&1
    if [ $? -ne 0 ];then
        log_info "[check_cpu_health(),$LINENO]There is no sar cmd,now skip to check cpu health."
        return 0
    fi

    local ret=0
    local threshold=$1
    local cpu_idle=`sar -u 1 6 | grep -i "average" | awk -F " " '{print $NF}' | awk -F "." '{print $1}'`
    if [[ $? -ne 0 || -z "${cpu_idle}" ]];then
        log_error "[check_cpu_health(),$LINENO]Excute the command to check cpu health failed."
        return 1
    fi
    echo $cpu_idle | grep '^[[:digit:]]*$' >/dev/null 2>&1
    if [ $? -ne 0 ];then
        log_error "[check_cpu_health(),$LINENO]Exception occurs when check cpu health,the cpu average idle is $cpu_idle"
        return 1
    fi
    local cpu_used=$((100-cpu_idle))
    log_info "[check_cpu_health(),$LINENO]the average cpu used is ${cpu_used}%."
    if [ ${cpu_used} -gt ${threshold} ];then
        log_error "[check_cpu_health(),$LINENO]CPU used greater than ${threshold}%."
        ret=1
    fi
    if [ ${ret} -ne 0 ];then
        set_check_result "cpu_health" "The CPU utilization ratio is greater than ${threshold}%."
    fi
    log_info "[check_cpu_health(),$LINENO]Check CPU health completed."
    return ${ret}
}

#check memory used < 90%
function check_free_memory()
{
    log_info "[check_free_memory(),$LINENO]Begin to check free memory."
    local ret=0

    log_info "[check_free_memory(),$LINENO]Check free memory completed."
    return ${ret}
}

function check_disk_health()
{
    log_info "[check_disk_health(),$LINENO]Begin to check disk health."
    local ret=0
    local result=""
    local check_dir=("/opt" "/tmp" "/var" "/etc")
    for dir in ${check_dir[@]}
    do
         echo "Test" > ${dir}/check_disk_health
         if [[ $? -ne 0 || ! -f "${dir}/check_disk_health" ]];then
            log_error "[check_disk_health(),$LINENO]create file[${dir}/check_disk_health] failed"
            ret=1
            result=$result" ${dir}"
        else
            rm -f "${dir}/check_disk_health"
        fi
    done

    if [ ${ret} -ne 0 ];then
        set_check_result "disk_health" "Write disk failed at directory [${result}]"
    fi
    log_info "[check_disk_health(),$LINENO]Check disk health completed."
    return ${ret}
}

function check_disk_space()
{   
    # threshold GB
    log_info "[check_disk_space(),$LINENO]Begin to check free disk space."
    local ret=0
    local tmp_threshold=`expr $1 \* 1024 \* 1024`
    local var_threshold=`expr $2 \* 1024 \* 1024`
    local opt_threshold=`expr $3 \* 1024 \* 1024`
    local opt_min_threshold=`expr 5 \* 1024 \* 1024` # 5GB
    
    if [ "x${G_UPDATE_SCENE}" == "xupgrade" ];then
        log_info "[check_disk_space(),$LINENO]Begin to remove /tmp/upgrade and /var/ebackup_bak"
        rm -rf /tmp/upgrade
        rm -rf /var/ebackup_bak
        log_info "[check_disk_space(),$LINENO]Remove /tmp/upgrade and /var/ebackup_bak completed"
    fi
    
    local tmp_free_space=$(df -k /tmp | sed -n '2p' |awk -F' ' '{print $4}')
    local var_free_space=$(df -k /var | sed -n '2p' |awk -F' ' '{print $4}')
    local opt_free_space=$(df -k /opt | sed -n '2p' |awk -F' ' '{print $4}')
    if [[ -z "${tmp_free_space}" || -z "${var_free_space}" || -z "${opt_free_space}" ]];then
        log_error "[check_disk_space(),$LINENO]Check disk free space failed."
        set_check_result "disk_space" "The free disk sapce check command failed."
        return 1
    fi

    echo $tmp_free_space | grep '^[[:digit:]]*$' >/dev/null 2>&1 && echo $var_free_space | grep '^[[:digit:]]*$' >/dev/null 2>&1 && echo $opt_free_space | grep '^[[:digit:]]*$' >/dev/null 2>&1
    if [ $? -ne 0 ];then
        log_error "[check_disk_space(),$LINENO]Exception occurs when check disk free space,the free space are: [/tmp:$tmp_free_space], [/var:$var_free_space], [/opt:$opt_free_space]"
        set_check_result "disk_space" "The free disk sapce check command failed."
        return 1
    fi
    local error_msg=""
    if [ ${opt_free_space} -lt ${opt_threshold} ];then
        
        if [ ${tmp_free_space} -lt ${tmp_threshold} ];then
            error_msg=$error_msg"/tmp < $1 GB, "    
            log_error "[check_disk_space(),$LINENO]The free space of /tmp is ${tmp_free_space}KB,which is less than $1 GB"
            ret=1
        fi

        if [ ${var_free_space} -lt ${var_threshold} ];then
            error_msg=$error_msg"/var < $2 GB, "
            log_error "[check_disk_space(),$LINENO]The free space of /var is ${var_free_space}KB,which is less than $2 GB"
            ret=1
        fi

        if [ ${opt_free_space} -lt ${opt_min_threshold} ];then
            error_msg=$error_msg"/opt < $3 GB."
            log_error "[check_disk_space(),$LINENO]The free space of /opt is ${var_free_space}KB,which is less than $3 GB"
            ret=1
        fi
    else
        if [ ${tmp_free_space} -lt ${tmp_threshold} ];then
            error_msg=$error_msg"/tmp < $1 GB, "    
            log_error "[check_disk_space(),$LINENO]The free space of /tmp is ${tmp_free_space}KB,which is less than $1 GB"
            ret=1
        fi
    fi

    if [ ${ret} -ne 0 ];then
        set_check_result "disk_space" "$error_msg"
    fi
    log_info "[check_disk_space(),$LINENO]Check free disk space completed."
    return ${ret}
}

#the delay between ebackup and DMK must less than 200ms
function check_network_health()
{
    log_info "[check_network_health(),$LINENO]Begin to check network health."
    local ret=0
    local des_ip=$1
    local threshold=$2
    
    local network_delay_ms=`${PING_CMD} -c 4 ${des_ip} | grep -w "min/avg/max/mdev" | awk -F " " '{print $4}' | awk -F "/" '{print $2}' | awk -F "." '{print $1}'`
    if [[ $? -ne 0 || -z "${network_delay_ms}" ]];then
        log_error "[check_network_health(),$LINENO]Check network health failed,the delay time is $network_delay_ms"
        set_check_result "network_health" "Ping $des_ip failed."
        return 1
    fi
    echo $network_delay_ms | grep '^[[:digit:]]*$' >/dev/null 2>&1
    if [ $? -ne 0 ];then
        log_error "[check_network_health(),$LINENO]Check network health failed,the delay time is $network_delay_ms"
        set_check_result "network_health" "Ping $des_ip failed."
        return 1
    fi

    if [ ${network_delay_ms} -gt ${threshold} ];then
        log_error "[check_network_health(),$LINENO]Network latency is ${network_delay_ms},which is greater than ${threshold}ms"
        ret=1
    fi
    if [ ${ret} -ne 0 ];then
        set_check_result "network_health" "The network delay time between this node and dmk[$des_ip] is greater than ${threshold}ms."
    fi
    log_info "[check_network_health(),$LINENO]Check network health completed."
    return ${ret}
}

function check_service_health()
{
    log_info "[check_service_health(),$LINENO]Begin to check service health."
    local ret=0
    local dead_process_num=`service hcp status 2>/dev/null | grep -v "dsware_agent" | grep -c "isn't running"`
    if [ ${dead_process_num} -ne 0 ];then
        log_error "[check_service_health(),$LINENO]service status is abnormal."
        ret=1
    fi
    if [ ${ret} -ne 0 ];then
        set_check_result "service_health" "Service status is abnormal"
    fi
    log_info "[check_service_health(),$LINENO]Check service health completed."
    return ${ret}
}

function check_ha_health()
{
    log_info "[check_ha_health(),$LINENO]Begin to check HA health."
    local ret=0
    local server_role=`get_server_role`
    if [[ "x${server_role}" == "x0" || "x${server_role}" == "x2" ]];then
        local ha_role=`sh /opt/huawei-data-protection/ebackup/bin/config_omm_ha.sh query|grep "HaLocalName" |grep -E -o "\(.*\)"`
        if [[ "x${ha_role}" != "x(active)" && "x${ha_role}" != "x(standby)" ]];then
            log_error "[check_ha_health(),$LINENO]HA health is exception,the ha role is ${ha_role}"
            ret=1
        else
            log_info "[check_ha_health(),$LINENO]The HA health is good."
        fi
    else
        log_info "[check_ha_health(),$LINENO]This is a proxy node.No need do this."
    fi
    if [ ${ret} -ne 0 ];then
        set_check_result "ha_health" "HA status is abnormal, the ha role is ${ha_role}"
    fi
    log_info "[check_ha_health(),$LINENO]Check HA health completed."
    return ${ret}
}

function set_check_result()
{
    local check_item=$1
    local check_description=$2

    sed -i "/${check_item}/ c ${check_item}:${check_description}" ${G_RESULT_FILE}
}

function set_account_expire_to_never()
{
    local account_name=$1
    local expire_type=$2
    if [ "${account_name}" != "hcpprocess" ] && [ "${account_name}" != "root" ]; then
        log_error "[set_account_expire_to_never,$LINENO] Set account to never failed, Invalid account name."
        return 1
    fi
    if [ "${expire_type}" != "account" ] && [ "${expire_type}" != "password" ]; then
        log_error "[set_account_expire_to_never,$LINENO] Set account to never failed, Invalid expire type."
        return 1
    fi
    if [ "${expire_type}" == "password" ]; then
        chage -M -1 "${account_name}"
        if [ $? -ne 0 ]; then
            log_error "[set_account_expire_to_never,$LINENO] Set ${account_name} password expire to never failed."
            return 1
        fi
        set_res=$(chage -l "${account_name}" | grep -i 'Password expires' | awk '{print $4}')
        if [ "${set_res}" != "never" ]; then
            log_error "[set_account_expire_to_never,$LINENO] Set ${account_name} password expire to never failed. current status is ${set_res}."
            return 1
        fi
    elif [ "${expire_type}" == "account" ]; then
        chage -E -1 "${account_name}"
        if [ $? -ne 0 ]; then
            log_error "[set_account_expire_to_never,$LINENO] Set ${account_name} account expire to never failed."
            return 1
        fi
        set_res=$(chage -l "${account_name}" | grep -i 'Account expires' | awk '{print $4}')
        if [ "${set_res}" != "never" ]; then
            log_error "[set_account_expire_to_never,$LINENO] Set ${account_name} account expire to never failed. current status is ${set_res}."
            return 1
        fi
    fi
    log_info "[set_account_expire_to_never(),$LINENO]Success to set expire to never."

}

function check_account_status()
{
    log_info "[check_account_status(),$LINENO]Begin to check account status."
    local account_list=("hcpprocess" "root")
    for account_name in "${account_list[@]}"
    do
        account_status=$(chage -l "${account_name}" | grep -i 'Account expires' | awk '{print $4}')
        password_status=$(chage -l "${account_name}" | grep -i 'Password expires' | awk '{print $4}')
        [ "${account_status}" != "never" ] && set_account_expire_to_never "${account_name}" "account"
        [ "${password_status}" != "never" ] && set_account_expire_to_never "${account_name}" "password"
    done
}


function init_env()
{
    G_RESULT_FILE="${SCRIPT_HOME}/precheck_result"
    >$G_RESULT_FILE
    PING_CMD=ping
    checkout_os_type
    echo "cpu_health:pass"     >> ${G_RESULT_FILE}
    echo "disk_health:pass"    >> ${G_RESULT_FILE}
    echo "disk_space:pass"     >> ${G_RESULT_FILE}
    echo "network_health:pass" >> ${G_RESULT_FILE}
    echo "service_health:pass" >> ${G_RESULT_FILE}
    echo "ha_health:pass"      >> ${G_RESULT_FILE}
}

function check_version()
{
    local update_scene=$1
    local dest_version=$2
    log_info "[check_version(),$LINENO]Begin to check version."
    if [ x"${update_scene}" == x"upgrade" ];then
        local current_version=`grep "System Version" /opt/huawei-data-protection/ebackup/conf/versions.conf | awk -F "=" '{print $2}'`
        log_info "[check_version(),$LINENO]Current version is $current_version."
        if [ x"${current_version}" == x"${dest_version}" ];then
            log_info "[check_version(),$LINENO]Current version is equal the target version."
            return 1
        fi
    elif [ x"${update_scene}" == x"cipher_change" ]; then
        cat /opt/huawei-data-protection/ebackup/conf/hcpconf.ini | grep "KmcAlgMode"
        if [ $? -eq 1 ];then
            log_info "[check_version(),$LINENO]Current version do not support cipher change."
            return 1
        fi
    else
        grep "Version:" /opt/huawei-data-protection/ebackup/tmp/patch_*.info 2>/dev/null| grep -w $dest_version >/dev/null 2>&1
        if [ $? -eq 0 ];then
            log_info "[check_version(),$LINENO]Current version is equal the target version."
            return 1
        fi
    fi
    log_info "[check_version(),$LINENO]Check version completed."
    return 0
}

function modify_host_name()
{
    log_info "[modify_host_name(),$LINENO]Begin to modify host info."
    if [ -f /etc/SuSE-release ] ;then
        log_info "[modify_host_name(),$LINENO]SuSE os, no need to do this."
        return 0
    fi

    echo "eBackup" > /etc/hostname
    local local_ipv4_addrress=`grep "localhost4.localdomain4"  /etc/hosts | awk -F " " '{print $1}'`
    local local_ipv6_addrress=`grep "localhost6.localdomain6" /etc/hosts | awk -F " " '{print $1}'`

    sed -i "/localhost4.localdomain4/ c ${local_ipv4_addrress}    localhost localhost.localdomain localhost4 eBackup localhost4.localdomain4" /etc/hosts
    sed -i "/localhost6.localdomain6/ c ${local_ipv6_addrress}          localhost localhost.localdomain localhost6 eBackup localhost6.localdomain6" /etc/hosts

    log_info "[modify_host_name(),$LINENO]Modify host info successfully."
    return 0
}

function modify_audit_config()
{
    log_info "[modify_audit_config(),$LINENO]Begin to modify audit config."
    local config_file="/etc/audit/auditd.conf"
    lsattr ${config_file} | grep "\----i" >/dev/null 2>&1
    local isProtected=$?
    chattr -i /etc/audit/auditd.conf
    sed -i '/max_log_file_action/ c max_log_file_action = ROTATE' ${config_file}
    if [ ${isProtected} -eq 0 ];then
        chattr +i ${config_file}
    fi
    service auditd restart >/dev/null 2>&1
    if [ $? -ne 0 ];then
        sleep 5
        log_error "[modify_audit_config(),$LINENO]Restart autitd failed.Now try again"
        service auditd restart >/dev/null 2>&1
    fi
    sleep 5
    log_info "[modify_audit_config(),$LINENO]Modify audit config completed."
}

function main()
{
    if [ $# -ne 3 ];then
        log_error "[main(),$LINENO]Input parameters are Invalid."
        return 1
    fi
    #common config
    log_info "[main(),$LINENO]Do common test firstly."
    #6.5.1~8.0.0 no need to do neemodify_host_name
    #6.5.1~8.0.0 no need to do modify_audit_config
    log_info "[main(),$LINENO]Begin to precheck."
    init_env
    local update_scene=$1
    local dmk_float_ip=$2
    local dest_version=$3
    G_UPDATE_SCENE=$update_scene
    #0.check version
    check_version "$update_scene" "$dest_version"
    local Ret=$?
    if [ "${Ret}" != "0" ] && [ "${G_UPDATE_SCENE}" != "cipher_change" ] ;then
        log_info "[main(),$LINENO]Current version is the target version.No need to do the precheck."
        return 0
    elif [ "${Ret}" != "0" ] && [ "${G_UPDATE_SCENE}" == "cipher_change" ]; then
        log_error "[main(),$LINENO]Current version do not support cipher change."
        return 0
    fi
    # for root account lock issue
    chmod 750 /etc/sudoers.d/
    #1.check weather cpu used greater than 90%
    check_cpu_health 90
    #2.write disk
    check_disk_health
    #3.check free disk space >20GB
    if [ "${G_UPDATE_SCENE}" != "cipher_change" ]; then
        check_disk_space 3 20 20
    fi
    #4.check network delay time between eBackup and DMK <200ms
    check_network_health $dmk_float_ip 200
    #5.check hcp status
    check_service_health
    #6.check ha status
    check_ha_health
    #7.check account
    check_account_status
    log_info "[main(),$LINENO]Precheck completed."
}

main "$@"
exit $?