#!/bin/bash
#########################################################
# Copyright © Huawei Technologies Co., Ltd. 2021-2023. All rights reserved.
# File name     : upgrade_rollback_osmediation.sh
# Description   : 欧方升级回滚入口脚本
###########################################################

# 初始化脚本工作路径和脚本名
SCRIPT_PATH=$(cd $(dirname $0);pwd)
SELF_FILE=$(basename $0)
# 当前任务时间戳
TAIL=$(date +%Y%m%d%H%M%S)
# 初始化日志路径和日志文件名
LOG_PATH="/opt/oss/log/manager/easysuite_upgrade/scriptlog"
LOG_FILE="${LOG_PATH}/${SELF_FILE//.sh/}_${TAIL}.log"
# 节点升级回滚超时时间
TIME_OUT=1200
# timeout命令，执行超时的返回值
TIME_OUT_RESULT=124

##################################################
# Description: 校验执行用户
#              脚本要求使用ossadm用户执行
##################################################
function check_user() {
    local user=$(whoami)
    oss_user=$(id -nu 3001)
    if [ "${user}" != "${oss_user}" ];then
        echo "[$(date +'%Y-%m-%d %H:%M:%S')]| User have no permission to run this script" | tee -a "${LOG_FILE}"
        return 1
    fi
    return 0
}

##################################################
# Description: 记录Easysuite前台展示日志
# @Param1:     日志级别 ERROR/WARN/INFO
# @Param2:     日志内容
##################################################
function TASK_LOG() {
    BASH_PID=$$
    echo "[$(date +%Y)-$(date +%m)-$(date +%d) $(date +%H):$(date +%M):$(date +%S)] [${BASH_PID}] | $1 $2" | tee -a "${TASK_LOG_FILE}"
}

##################################################
# Description: 记录Easysuite前台展示日志及后台日志
# @Param1:     日志级别 ERROR/WARN/INFO
# @Param2:     日志内容
##################################################
function DOUBLE_LOG() {
    TASK_LOG "$1" "$2"
    LOG "$1" "$2"
}

##################################################
# Description: 定义相关全局变量
##################################################
function init_variable() {
    # Easysuite前台进度值
    PROGRESS=0
    # 升级框架工作目录
    WORK_PATH="/opt/upgrade"
    # Easysuite任务相关日志/进度/状态文件
    TASK_ID_PATH="/opt/upgrade/easysuite_upgrade/taskmgr/${TASKID}"
    TASK_LOG_FILE="${TASK_ID_PATH}/task.log"
    TASK_PROGRESS_FILE="${TASK_ID_PATH}/task.progress"
    TASK_STATUS_FILE="${TASK_ID_PATH}/task.status"
    # 是否是新任务标志位
    NEW_TASK="TRUE"
    [ -d "${TASK_ID_PATH}" ] && NEW_TASK="FALSE"
    # 欧方包路径
    OS_MEDIATATION="${UPLOAD}/${PKGNAME}"
    # 欧方包版本
    VERSION=$(echo "${PKGNAME}" | awk -F_ '{print $2}')
    # 内部通信ip列表
    NODE_LIST=""
    # 传包时，临时存放欧方包的目录
    SCP_PATH=/opt/oss/NCEEngr/tmp
    # 升级目录
    OSM_PATH="/opt/oss/NCEEngr/O2_path/upgrade_config_${VERSION}"
    # 欧方升级结果标志文件存放路径，成功则删除
    FLAG_PATH="${OS_MEDIATATION//.zip/}"
    [ ! -d "${FLAG_PATH}" ] && mkdir -p "${FLAG_PATH}"
    # 升级和回滚结果记录文件
    FLAG_FILE="${FLAG_PATH}/${ACTION}_result"
}

##################################################
# Description: 入参格式化，将相关参数赋值给对应变量
# @Param1:     -pkgname
# @Param2:     欧方包名，不含路径
# @Param3:     -action
# @Param4:     upgrade/rollback
# @Param5:     -upload
# @Param6:     欧方包所在路径，即/opt/upgrade
# @Param7:     -taskid
# @Param8:     任务id字符串
##################################################
function init_params() {
    DOUBLE_LOG "INFO" "get parameters:${*}."
    ARGS=$(getopt -a -o h -l pkgname:,action:,upload:,taskid:,help -- "$@")
    if [ $? != 0 ]
    then
        LOG "ERROR" "Failed to parse the parameter."
        return 1
    fi
    eval set -- "${ARGS}"
    while :
    do
        case $1 in
            --pkgname) PKGNAME=$2;shift ;;
            --action) ACTION=$2;shift ;;
            --upload) UPLOAD=$2;shift ;;
            --taskid) TASKID=$2;shift ;;
            -h|--help) exit 0 ;;
            --) shift;break ;;
            *) LOG "ERROR" "Failed to parse the parameter.";return 1 ;;
        esac
    shift
    done
}

##################################################
# Description: 刷新Easysuite任务标志文件
# @Param1:     进度 0-100
# @Param2:     状态 fail/running/finish
##################################################
function fresh_result() {
    echo "Progress=$1" >"${TASK_PROGRESS_FILE}" || DOUBLE_LOG "ERROR" "Failed to echo 'Progress=$1' to ${TASK_PROGRESS_FILE}"
    echo "Status=$2" >"${TASK_STATUS_FILE}" || DOUBLE_LOG "ERROR" "Failed to echo 'Status=$2' to ${TASK_STATUS_FILE}"
}

##################################################
# Description: 刷新Easysuite任务标志文件为“失败”状态
##################################################
function fresh_fail() {
    fresh_result "100" "fail"
    DOUBLE_LOG "ERROR" "Failed to excute task:${TASKID}."
}

##################################################
# Description: 刷新Easysuite任务标志文件为“完成”状态
##################################################
function fresh_finish() {
    fresh_result "100" "finish"
    DOUBLE_LOG "INFO" "Finished to excute task:${TASKID}."
}

##################################################
# Description: 刷新Easysuite任务进度文件
# @Param1:     进度增加值，达到100后不再增加
##################################################
function fresh_progress() {
    local addnum=$1
    local base=$(expr 100 - ${addnum})
    if [ ${PROGRESS} -le ${base} ]
    then
        PROGRESS=$(expr ${PROGRESS} + ${addnum})
        echo "Progress=${PROGRESS}">"${TASK_PROGRESS_FILE}"
    fi
}

##################################################
# Description: 检查欧方包是否存在
##################################################
function pre_check() {
    if [ ! -f "${OS_MEDIATATION}" ]
    then
        DOUBLE_LOG "ERROR" "The ${OS_MEDIATATION} package does not exist."
        return 1
    fi
    fresh_progress 10
}

##################################################
# Description: 检查上次任务是否完成，完成直接退出
##################################################
function check_finish() {
    if [ "${NEW_TASK}" == "FALSE" ]
    then
        grep -q "Finished to excute task:${TASKID}." "${TASK_LOG_FILE}"
        if [ $? -eq 0 ]
        then
            fresh_finish
            return 0
        fi
    fi
    return 1
}


##################################################
# Description: 初始化相关变量，并进行前置检查
##################################################
function init() {
    # 校验调用用户
    check_user || return 1
    # 加载公共方法
    . "${SCRIPT_PATH}"/common.sh
    LOG "INFO" "Start to init ${LOG_FILE}."
    # 校验入参，初始化参数
    init_params "$@" || return 1
    # 导入平台环境变量
    . /opt/oss/manager/bin/engr_profile.sh
    # 定义任务相关全局变量
    init_variable
    # 初始化任务文件夹
    init_taskmgr "${TASKID}"
    return 0
}

##################################################
# Description: 传包
# @Param1:     节点ip
##################################################
function scp_package() {
    local ip="$1"
    for try_count in $(seq 3);do
        # 5分钟超时
        timeout 300 scp -o StrictHostKeyChecking=no "${OS_MEDIATATION}" [${ip}]:"${SCP_PATH}" &>> /dev/null
        local res=$?
        if [ ${res} -ne 0 ];then
            # 超时的返回值是124
            DOUBLE_LOG "WARN" "result of scp package:${res}."
            sleep 5
            continue
        fi
        break
    done
    [ "${res}" -ne 0 ] && DOUBLE_LOG "ERROR" "scp ${OS_MEDIATATION} to ${ip}:${SCP_PATH} by ossadm failed." && return 1
    DOUBLE_LOG "INFO" "success to scp ${OS_MEDIATATION} to ${ip}"
    return 0
}

##################################################
# Description: 欧方执行前置准备
#              1-创建升级目录
#              2-传包；解压包
# @Param1:     节点ip
##################################################
function pre_operate()
{
    local ip="$1"
    DOUBLE_LOG "INFO" "Start to init osmediation files for ${ip}."
    if [ "${ACTION}" == "upgrade" ];then
        local mkdir_cmd="mkdir -p ${OSM_PATH} ${SCP_PATH};rm -rf ${OSM_PATH}/*;chown -R ossadm:ossgroup ${OSM_PATH}"
    else
        local mkdir_cmd="mkdir -p ${OSM_PATH} ${SCP_PATH};chown -R ossadm:ossgroup ${OSM_PATH}"
    fi
    run_ssh_cmd "${ip}" "${mkdir_cmd}" || return 1

    scp_package "${ip}" || return 1

    local unzip_cmd="unzip -o ${SCP_PATH}/${PKGNAME} -d ${OSM_PATH} >/dev/null 2>&1;rm -f ${SCP_PATH}/${PKGNAME}"
    run_ssh_cmd "${ip}" "${unzip_cmd}" || return 1
    DOUBLE_LOG "INFO" "Finished to init osmediation files for ${ip}."
    return 0
}

############################################################
# Description: 获取内部通信ip
############################################################
function get_node_list() {
    local all_node=$(
python <<PEOF
import json
import sys
node_file = r'/opt/oss/manager/etc/sysconf/nodelists.json'
manager_ips = []
try:
    with open(node_file, mode="r") as f:
        node_info = json.load(f)
    for node_id, one_node in node_info.get("nodeList", {}).items():
        manager_ips.extend([one_ip.get("IP") for one_ip in one_node.get("IPAddresses",{}) if "maintenance" in one_ip.get("usage")])
    print(" ".join(manager_ips))
    sys.exit(0)
except Exception as _:
    print("exception occur when read {}".format(node_file))
    sys.exit(1)
PEOF
)
    if [[ "${all_node}" =~ "exception" ]];then
        DOUBLE_LOG "WARN" "query node info:${all_node}."
        return
    fi
    NODE_LIST=$(echo "${all_node}" | xargs -n1 | sort -u)
}

############################################################
# Description: 执行ssh连接，执行命令
# @Param1:     节点ip
# @Param1:     执行的命令
############################################################
function run_ssh_cmd() {
    local node_ip="${1}"
    local cmd="${2}"
    local ssh_para="-o ConnectTimeout=30 -o stricthostkeychecking=no -o ConnectionAttempts=5 -o ServerAliveInterval=3 -o ServerAliveCountMax=20"
    local ssh_cmd="ssh ${ssh_para} ${node_ip} ${cmd} 2>>/dev/null"
    ssh ${ssh_para} "${node_ip}" "${cmd}" 2>>/dev/null
    local res=$?
    if [ "${res}" -ne 0 ];then
        # 超时返回值为124
        DOUBLE_LOG "ERROR" "failed to execute: ${ssh_cmd}, result:${res}, please check."
        return 1
    else
        DOUBLE_LOG "INFO" "succeed to execute: ${ssh_cmd}."
        return 0
    fi
}

############################################################
# Name:        check_flag
# Description: 如果执行升级或回滚脚本返回非0，需要避免ssh断连导致的问题
#              需要在返回后，10分钟内持续检查升级目录内的标志文件
############################################################
function check_flag() {
    local ip=$1
    for try_count in $(seq 20);do
        res_ok=$(run_ssh_cmd "${ip}" "[ -f ${OSM_PATH}/${ACTION}_succeed_flag ] && echo exec_ok")
        # 排除掉run_ssh_cmd自身打印出来的日志信息，只取ssh远程执行命令结果
        res_ok=$(echo "${res_ok}" | grep -v "run_ssh_cmd" 2>/dev/null)
        if [ "${res_ok}" = "exec_ok" ];then
            return 0
        fi
        res_nok=$(run_ssh_cmd "${ip}" "[ -f ${OSM_PATH}/${ACTION}_fail_flag ] && echo exec_nok")
        # 排除掉run_ssh_cmd自身打印出来的日志信息，只取ssh远程执行命令结果
        res_nok=$(echo "${res_nok}" | grep -v "run_ssh_cmd" 2>/dev/null)
        if [ "${res_nok}" = "exec_nok" ];then
            return 1
        fi
        DOUBLE_LOG "INFO" "check result of ${ACTION} in ${ip} for ${try_count} times"
        sleep 30
    done
    DOUBLE_LOG "ERROR" "check ${ACTION} flag timeout, please check the process."
    return 1
}

##################################################
# Description: 远程执行升级回滚的入口脚本
# @Param1:     节点ip
##################################################
function execute() {
    local ip="$1"
    pre_operate "${ip}" || return 1
    # 20分钟超时
    local upgrade_cmd="timeout ${TIME_OUT} bash ${OSM_PATH}/execute_upgrade.sh ${ACTION} ${VERSION}"
    run_ssh_cmd "${ip}" "${upgrade_cmd}"
    local res=$?
    if [ "${res}" -eq "${TIME_OUT_RESULT}" ];then
        DOUBLE_LOG "ERROR" "execute: ${ACTION} timeout in ${ip}, please check the process."
        return 1
    fi
    if [ "${res}" -ne 0 ];then
        # 返回值非0，需要去升级目录检查是否有标志文件，以此判断是否真的执行失败
        check_flag "${node}"
        local check_res=$?
        if [ "${check_res}" -eq 0 ];then
            return 0
        else
            return 1
        fi
    fi
    return 0
}

##################################################
# Description: 执行升级或回滚欧方
##################################################
function operate_osmediation() {
    # 对所有节点，并发执行升级或者回滚
    if [ "${ACTION}" != "upgrade" ] && [ "${ACTION}" != "rollback" ];then
        DOUBLE_LOG "ERROR" "The operation parameter is incorrect."
        return 1
    fi
    fresh_progress 30
    # 本次升级或回滚，重试的话，如有失败节点，则删除失败记录重试，并跳过已经成功的节点
    [ -f "${FLAG_FILE}" ] && sed -i "/fail/d" "${FLAG_FILE}"
    for node in ${NODE_LIST};do
        # 跳过成功节点
        grep "${node}" "${FLAG_FILE}" 2>/dev/null | grep "success" >/dev/null 2>&1
        [ $? -eq 0 ] && DOUBLE_LOG "INFO" "task of ${node} already success." && continue
        # 升级/回滚
        { execute "${node}"
        if [ $? -eq 0 ];then
            echo "${node}:success" >> "${FLAG_FILE}"
        else
            echo "${node}:fail" >> "${FLAG_FILE}"
        fi } &
    done
    wait

    local res=$(grep "fail" "${FLAG_FILE}")
    if [ ! -z "${res}" ];then
        DOUBLE_LOG "ERROR" "The operation is failed."
        DOUBLE_LOG "ERROR" "The information is as follows:"
        for info in ${res};do
            DOUBLE_LOG "ERROR" "${info}"
        done
        DOUBLE_LOG "ERROR" "please login to failed node by ossadm, and read log in path:${OSM_PATH}"
        return 1
    fi
    DOUBLE_LOG "INFO" "The operation is success."
    # 所有节点执行成功后，删除标志文件存放目录
    rm -rf "${FLAG_PATH}"
    return 0
}

##################################################
# Description: 执行升级或回滚欧方
##################################################
function upgrade_rollback_osmediation() {
    # NCE升级，因为nodelist.json不稳定，获取节点列表ip获取需要重试
    for i in $(seq 3);do
        get_node_list
        if [ -z "${NODE_LIST}" ];then
          sleep 20 && continue
        fi
        break
    done
    if [ -z "${NODE_LIST}" ];then
        DOUBLE_LOG "ERROR" "Failed to get ip list, please check /opt/oss/manager/etc/sysconf/nodelists.json"
        return 1
    fi
    # 检查包是否存在
    pre_check || return 1
    # 执行升级
    operate_osmediation || { fresh_fail;return 1; }
    # 完成，刷新进度100%
    fresh_finish
    return 0
}

##################################################
# Description: 入口函数
##################################################
function main() {
    # 初始化及前置检查
    init "$@" || return 1
    # 重入结果检查
    check_finish && return 0
    # 启动升级回滚欧方
    upgrade_rollback_osmediation
    return $?
}

main "$@"
exit $?