# -*- coding: UTF-8 -*-
# Copyright (c) Huawei Technologies Co., Ltd. 2021-2021. All rights reserved.

"""
6u4c设备更换控制器，控制器异常复位的补丁工具化
主要涉及:
    1. 补丁分支的开关检查
    2. 管理板检查
    2. 告警通知的屏蔽与取消屏蔽;告警清除
    3. 管理口的关闭与开启
    4. 管理板固件的升级与回退
    5. 故障控制器的隔离与取消隔离
"""
import json
import stat
import time
import os
import functools

import ConfigParser
import traceback
import codecs

import BaseFactory

from FuncFactory import getFruListInfo
from BaseFactory import log, result
from cbb.frame.context import contextUtil
from cbb.frame.rest import restData, restUtil
from cbb.frame.cli import cliUtil
from cbb.frame.cli.cli_con_mgr import get_ctrl_cli
from cbb.frame.cli.cli_con_mgr import create_available_conn_for_svp
from cbb.frame.cli.cliFactory import CliConnection
from cbb.frame.base import baseUtil

import java.lang.System as javaSystem
import java.lang.Exception as jException
from com.huawei.ism.tool.obase.exception import ToolException

# 支持型号
PRODUCT_MODELS_FOR_6U4C_PATCH = \
    ["18500 V5", "18500F V5", "18500 V3", "18500F V3",
     "18800 V5", "18800F V5", "18800 V3", "18800F V3",
     "6800 V5", "6800F V5", "6800 V3", "6800F V3"]
# 支持版本
PRODUCT_VERSIONS_FOR_6U4C_PATCH = ["V500R007C10",
                                   "V500R007C30SPC100",
                                   "V500R007C61",
                                   "V300R006C20",
                                   "V300R006C50SPC100"]

REQUIRED_PATCH_LIST_FOR_6U4C_PATCH = [
    {
        "product_version": "V500R007C10",
        "required_patch": "V500R007C10SPH027",
        "product_series": "ConvergedStorage",
        "product_models": PRODUCT_MODELS_FOR_6U4C_PATCH
    },
    {
        "product_version": "V500R007C30SPC100",
        "required_patch": "V500R007C30SPH121",
        "product_series": "ConvergedStorage",
        "product_models": PRODUCT_MODELS_FOR_6U4C_PATCH
    },
    {
        "product_version": "V500R007C61",
        "required_patch": "V500R007C61SPH012",
        "product_series": "ConvergedStorage",
        "product_models": PRODUCT_MODELS_FOR_6U4C_PATCH
    },
    {
        "product_version": "V300R006C20",
        "required_patch": "V300R006C20SPH027",
        "product_series": "ConvergedStorage",
        "product_models": PRODUCT_MODELS_FOR_6U4C_PATCH
    },
    {
        "product_version": "V300R006C50SPC100",
        "required_patch": "V300R006C50SPH121",
        "product_series": "ConvergedStorage",
        "product_models": PRODUCT_MODELS_FOR_6U4C_PATCH
    }
]

BEGIN_TIME_KEY = "6u4c_patch_begin_time"

# 打补丁更换控制器过程中告警白名单
PATCH_ALARM_TRUSTLIST = (
    "0xF010D0032",  # 管理模块固件版本与阵列软件不兼容
    "0xF00D10013",  # 管理模块部分状态无法被监控
    "0xF00CE002A",  # 硬盘框单链路
    "0xF0D30003",  # 风扇模块被拔出
    "0xF00D10038",  # 接口模块和控制器的PCIe通道异常
    "0xF0D10005",  # 接口模块被拔出
    "0xF00C90015",  # 控制器写Cache被关闭
    "0xF0D20004",  # BBU模块被拔出
    "0xF00CF004A",  # 控制器部分状态无法被监控。
    "0xF00CF005F",  # 控制器故障
    "0xF00D1003B",  # 管理模块被拔出
    "0xF00C9013D",  # 端口IP地址发生冲突
    "0xF01320006",  # 辅助散热模块与控制器模块不兼容
    "0xF00170014",  # 电源模块无输入
    "0xF0D1000A",  # 接口模块被异常拔出，可能导致控制器工作异常
    "0xF00CF0071",  # 控制器上电失败
    "0xF00CF0014",  # 系统无法监控控制器
    "0xF00CF007E",  # 控制器发生内部错误，正在尝试修复
    "0xF00C90151",  # 系统内部有部件工作异常，已完成自修复
    "0xF00CF0073",  # 控制器上电超时
)

# 不同版本对应的管理口命令与模式有所不同
# disable-关闭命令; enable-开启命令; show-查询命令; runModel-运行模式
MGMT_PORT_PATCH_CMD_DICT = {
    ("V500R007C61",): {
        "disable": "eam.sh showloopmap SPH012_MGT_DISABLE",
        "enable": "eam.sh showloopmap SPH012_MGT_ENABLE",
        "show": "eam.sh showloopmap SPH012_MGT_SHOW",
        "runModel": "minisystem"
    },
    ("V500R007C30SPC100", "V300R006C50SPC100"): {
        "disable": "eam.sh showloopmap SPH_MGT_DISABLE",
        "enable": "eam.sh showloopmap SPH_MGT_ENABLE",
        "show": "eam.sh showloopmap SPH_MGT_SHOW",
        "runModel": "minisystem"
    },
    ("V500R007C10", "V300R006C20"): {
        "disable": "eam showintfboardinfo SPH_MGT_DISABLE",
        "enable": "eam showintfboardinfo SPH_MGT_ENABLE",
        "show": "eam showintfboardinfo SPH_MGT_SHOW",
        "runModel": "debug"
    },
}

# 升级或回退管理板命令
UPGRADE_MM_VERSION_CMD_DICT = {
    "upgrade": {
        "execute": "upgradesmm",
        "check": "checkupgradesmmversion"
    },
    "rollback": {
        "execute": "rollbacksmm",
        "check": "checkrollbacksmmversion"
    }
}

node_id_dict = dict(A=0, B=1, C=2, D=3)


class SkipSteps:
    """
    跳过步骤的常量定义
    """
    CANCEL_ISOLATE_CTRL = "cancel_isolate_ctrl"
    ROLLBACK_M_BOARD_FIRMWARE = "rollback_m_board_firmware"
    MULTI_STARTUP_FLAG_RESUME = "multi_startup_flag_resume"
    OPEN_MANAGE_PORT = "open_manage_port"
    RECOVERY_ALARM_NOTIFY = "recovery_alarm_notify"
    CLEAR_ALARM = "clear_alarm"


class LangKey:
    """
    此补丁相关错误信息定义
    """
    MGMT_BOARD_CHECK_STATUS_FAILED = "MGMT_BOARD_CHECK_STATUS_FAILED"
    MGMT_BOARD_CHECK_VERSION_FAILED = "MGMT_BOARD_CHECK_VERSION_FAILED"
    MASK_ALARM_NOTIFICATIONS_FAILED = "MASK_ALARM_NOTIFICATIONS_FAILED"
    DISABLE_MANAGEMENT_PORT_FAILED = "DISABLE_MANAGEMENT_PORT_FAILED"
    UPGRADE_MANAGEMENT_BOARD_FAILED = "UPGRADE_MANAGEMENT_BOARD_FAILED"
    ISOLATE_CONTROLLER_FAILED = "ISOLATE_CONTROLLER_FAILED"
    CANCEL_ISOLATION_OF_CONTROLLER_FAILED = \
        "CANCEL_ISOLATION_OF_CONTROLLER_FAILED"
    ROLLBACK_MANAGEMENT_FIRMWARE = "ROLLBACK_MANAGEMENT_FIRMWARE"
    OPEN_MANAGEMEENT_PORT_FAILED = "OPEN_MANAGEMEENT_PORT_FAILED"
    CLEAR_PATCH_ALARM_FAILED = "CLEAR_PATCH_ALARM_FAILED"
    REVERT_ALARM_NOTIFICATION_FAILED = "REVERT_ALARM_NOTIFICATION_FAILED"
    TCP_AND_PATCH_BOTH_FAILED = "TCP_AND_PATCH_BOTH_FAILED"
    MULTI_ENGINE_PAUSE_CTRL_FAILED = "MULTI_ENGINE_PAUSE_CTRL_FAILED"
    MULTI_ENGINE_RESUME_CTRL_FAILED = "MULTI_ENGINE_RESUME_CTRL_FAILED"


def log_decorator(func):
    @functools.wraps(func)
    def wrapper(self, *args, **kwargs):
        start_time = time.strftime("%Y-%m-%d %H:%M:%S")
        log_entry = ("{},start calling Method: {},Kwargs: {}\n".format(
            start_time, func.__name__,
            '&'.join(['{}={}'.format(param, value) for param, value in kwargs.items()])))
        fd = os.open(self.log_file_name, os.O_CREAT | os.O_WRONLY | os.O_APPEND, stat.S_IRUSR | stat.S_IWUSR)
        with os.fdopen(fd, 'a') as log_file:
            log_file.write(log_entry)
        func_result = func(*args, **kwargs)
        end_time = time.strftime("%Y-%m-%d %H:%M:%S")
        log_entry = "{},end called Method:{},Kwargs:{},Start Time:{},End Time:{},Result:{}\n".format(
            end_time, func.__name__,
            '&'.join(['{}={}'.format(param, value) for param, value in kwargs.items()]),
            start_time, end_time, func_result)
        fd = os.open(self.log_file_name, os.O_CREAT | os.O_WRONLY | os.O_APPEND, stat.S_IRUSR | stat.S_IWUSR)
        with os.fdopen(fd, 'a') as log_file:
            log_file.write(log_entry)

        return func_result

    return wrapper


class ControllerPatchFunction(object):
    _instance = None

    def __init__(self, context):
        self.context = context
        self.lang = contextUtil.getLang(context)
        self.cli = get_ctrl_cli(context)
        self.dev_obj = contextUtil.getDevObj(context)
        self.rest = contextUtil.getRest(context)
        # 日志文件，记录关键步骤执行情况
        # 创建文件
        sn = context.get("dev").get("sn")
        BaseFactory.log.error(context, "device_sn:{}".format(sn))
        step_file_path = javaSystem.getProperty("user.dir") + os.sep + "cfg" + os.sep + "{}_step.log".format(sn)
        BaseFactory.log.error(context, "step_file_path:{}".format(step_file_path))
        self.log_file_name = step_file_path

    def __new__(cls, *args, **kwargs):
        if cls._instance is None:
            cls._instance = super(ControllerPatchFunction, cls).__new__(
                cls, *args, **kwargs)
        return cls._instance

    @staticmethod
    def get_skip_steps(skip_steps, step_status):
        # 未执行 屏蔽告警通知
        if ('modify_alarm_notification/is_mask=1' not in step_status
                # 或者 屏蔽告警通知 失败
                or step_status.get('modify_alarm_notification/is_mask=1') == '0'
                # 或者 恢复告警通知 成功
                or step_status.get('modify_alarm_notification/is_mask=0') == '1'):
            skip_steps.append(SkipSteps.RECOVERY_ALARM_NOTIFY)
        # 未执行 关闭管理口
        if ('enable_or_disable_management_port/is_enable=0' not in step_status
                # 或者 关闭管理口 失败
                or step_status.get('enable_or_disable_management_port/is_enable=0') == '0'
                # 或者 打开管理口 成功
                or step_status.get('enable_or_disable_management_port/is_enable=1') == '1'):
            skip_steps.append(SkipSteps.OPEN_MANAGE_PORT)
        # 未执行 停止控制器启动
        if ('set_control_pause/&' not in step_status
                # 或者 停止控制器启动 失败
                or step_status.get('set_control_pause/') == '0'
                # 或者 多引擎启动标记复原 成功
                or step_status.get('set_control_resume/') == '1'):
            skip_steps.append(SkipSteps.MULTI_STARTUP_FLAG_RESUME)
        # 未执行 升级管理板
        if ('upgrade_or_rollback_mm_version/is_upgrade=1' not in step_status
                # 或者 升级管理板 失败
                or step_status.get('upgrade_or_rollback_mm_version/is_upgrade=1') == '0'
                # 或者 回退管理板固件 成功
                or step_status.get('upgrade_or_rollback_mm_version/is_upgrade=0') == '1'):
            skip_steps.append(SkipSteps.ROLLBACK_M_BOARD_FIRMWARE)
        # 未执行 隔离控制器
        if ('isolate_or_cancel_fault_controller/is_isolate=1' not in step_status
                # 或者 隔离控制器 失败
                or step_status.get('isolate_or_cancel_fault_controller/is_isolate=1') == '0'
                # 或者 取消控制器隔离 成功
                or step_status.get('isolate_or_cancel_fault_controller/is_isolate=0') == '1'):
            skip_steps.append(SkipSteps.CANCEL_ISOLATE_CTRL)
        return skip_steps

    def patch_operation_before_power_off(self, ctrl_id):
        """
        离线控制器前的补丁配合操作
        1,管理板状态检查   2、管理版版本检查 3、记录已屏蔽的告警 4、屏蔽告警通知   5,关闭管理口

        :param ctrl_id: 待更换控制器ID
        :return: True/False 操作是否成功
        """
        log.info(self.context, "6u4c patch scene in pre offline controller.")
        contextUtil.setItem(
            self.context, 'fault_node_id', self.get_ctrl_node_id(ctrl_id))
        cliUtil.enterCliModeFromSomeModel(self.cli, self.lang)
        # 1, 管理板状态检查
        if not self.is_two_board_normal():
            log.error(self.context, "mgmtboard status check failed.")
            result.setResultFailByKey(self.context,
                                      LangKey.MGMT_BOARD_CHECK_STATUS_FAILED)
            return False
        # 统一回退
        if not self.patch_operation_final_v2():
            log.error(self.context, "rollback failed.")
            return False

        # 2、管理板版本检查
        if not self.check_manage_board_version():
            self.upgrade_or_rollback_mm_version(self, is_upgrade=False)
            log.error(self.context, "manager board version check failed.")
            result.setResultFailByKey(self.context,
                                      LangKey.MGMT_BOARD_CHECK_VERSION_FAILED)
            return False

        # 3、记录已屏蔽的告警
        self.record_existing_alarm_mask()

        # 4, 屏蔽告警通知
        if not self.modify_alarm_notification(self, is_mask=True):
            log.error(self.context, "mask alarm notificaitons failed.")
            result.setResultFailByKey(self.context,
                                      LangKey.MASK_ALARM_NOTIFICATIONS_FAILED)
            return False
        # 5, 关闭管理口
        if not self.enable_or_disable_management_port(self, is_enable=False):
            log.error(self.context, "disable management port failed.")
            result.setResultFailByKey(
                self.context, LangKey.DISABLE_MANAGEMENT_PORT_FAILED)
            return False
        return True

    def patch_operation_after_power_off(self, ctrl_id):
        """离线控制器后的补丁配合操作
        1,升级管理板     2,隔离控制器

        :param ctrl_id: 待更换控制器ID
        :return: True/False 操作成功/失败
        """
        if cliUtil.getAvaliableCli(self.cli).isConnected():
            return self.patch_operation_post_offline(ctrl_id)
        return False

    def patch_operation_post_offline(self, ctrl_id):
        """离线控制器后的补丁配合操作
        0,多引擎处理：停止控制器启动  1,升级管理板   2,隔离控制器 3,多引擎处理：等待10min

        :param ctrl_id: 待更换控制器ID
        :return: True/False 操作成功/失败
        """
        log.info(self.context,
                 "6u4c patch scene in post offline controller.")
        # 0, 多引擎处理:停止控制器启动
        if not self.set_control_pause(self):
            log.error(self.context, 'Multi-engine pause ctrl failed.')
            result.setResultFailByKey(
                self.context, LangKey.MULTI_ENGINE_PAUSE_CTRL_FAILED)
            return False
        # 1, 升级管理板
        if not self.upgrade_or_rollback_mm_version(self, is_upgrade=True):
            log.error(self.context, "upgrade mm version failed.")
            result.setResultFailByKey(
                self.context, LangKey.UPGRADE_MANAGEMENT_BOARD_FAILED)
            return False
        # 升级成功后，等待120s再执行隔离
        time.sleep(120)
        # 2, 隔离故障控制器
        if not self.isolate_or_cancel_fault_controller(self, ctrl_id,
                                                       is_isolate=True):
            log.error(self.context, "isolate fault controller failed.")
            result.setResultFailByKey(
                self.context, LangKey.ISOLATE_CONTROLLER_FAILED)
            return False
        cliUtil.enterCliModeFromSomeModel(self.cli, self.lang)
        # 3, 多引擎处理：等待10分钟
        if not self.is_ctrl_keep_down_after_ten_min(ctrl_id):
            log.error(self.context,
                      'Multi-engine, fault ctrl wake up after ten min.')
            result.setResultFailByKey(self.context,
                                      LangKey.MULTI_ENGINE_PAUSE_CTRL_FAILED)
            return False
        return True

    def is_patch_support_model(self):
        """判断是否是6u4c更换控制器补丁支持的型号

        :return: True/False
        """
        dev_type = contextUtil.getProductModel(self.context)
        dev_version = contextUtil.getCurVersion(self.context)
        log.info(self.context, "check is patch support model. "
                               "dev_type: %s ; dev_version: %s"
                 % (dev_type, dev_version))
        return dev_type in PRODUCT_MODELS_FOR_6U4C_PATCH and \
            dev_version in PRODUCT_VERSIONS_FOR_6U4C_PATCH

    def get_engine_node_info(self):
        """查询引擎、node信息
        :return (cur_engine_id, current_node, engine_node_dict)
        """
        flag, cli_ret, err_msg, ctlrs_topography_tuple = \
            cliUtil.getControllerEngineTopographyMiniSys(self.cli, self.lang)
        if not flag:
            raise Exception("query system status failed:%s" % str(err_msg))
        return ctlrs_topography_tuple

    def get_sel_ctrl_engine_id(self):
        """获取待更换的控制器引擎ID

        :return: 控制器引擎ID
        """
        sel_info = contextUtil.getSelectedItem(
            self.context, "input_selectfru_controllerSelectedRow")
        log.info(self.context, "patch scene sel ctrl info : %s"
                 % str(sel_info))
        return sel_info.get("id")[:-1]

    def get_sel_ctrl_parent_id(self):
        """获取待更换控制器的parentID

        :return: 待更换控制器的parentID
        """
        sel_info = contextUtil.getSelectedItem(
            self.context, "input_selectfru_controllerSelectedRow")
        log.info(self.context, "patch scene sel ctrl info : %s"
                 % str(sel_info))
        return sel_info.get("parentID")

    def is_four_ctrl_engine(self):
        """判断待更换控制器所在引擎是否有4个控制器

        :return: True/False
        """
        log.info(self.context, "check is four ctrl engine")
        engine_node_dict = self.get_engine_node_info()[2]
        engine_id = self.get_sel_ctrl_engine_id()
        cliUtil.enterCliModeFromSomeModel(self.cli, self.lang)
        # 只要引擎内控制器数量大于2
        return len(engine_node_dict.get(engine_id)) > 2

    def is_multi_engine(self):
        """判断是否是多引擎设备

        :return: True/False
        """
        log.info(self.context, 'check is multi engine')
        is_multi = len(self.get_engine_node_info()[2]) > 1
        cliUtil.enterCliModeFromSomeModel(self.cli, self.lang)
        contextUtil.setItem(self.context, "is_multi_engine", is_multi)
        return is_multi

    def get_patch_switch(self):
        """获取配置文件中的补丁开关

        :return: True/False 开关打开/关闭
        """
        conf_path = javaSystem.getProperty("user.dir") + os.sep + "cfg" \
                                                       + os.sep + "fruCfg.conf"
        log.info(self.context, "conf_path:%s" % conf_path)
        cf = ConfigParser.ConfigParser()
        with codecs.open(conf_path, encoding="utf-8-sig") as file_open:
            cf.readfp(file_open)
            switch = cf.get("fru", "6u4c_patch_switch")
            log.info(self.context, "patch switch is : %s" % switch)
            return switch == "on"

    def is_patch_scene(self, is_replace_controller):
        """判断是否是6u4c换控补丁场景
        1，更换控制器场景 2，补丁开关开启 3，型号版本符合 4，待更换控制器所在引擎有4控

        :param is_replace_controller: 是否是更换控制器
        :return: True/False 是否符合场景条件
        """
        # 由于更换其它硬件时，self.is_four_ctrl_engine()方法中不存在选择的控制器信息，所以需要提前判断是否为更换控制器
        if not is_replace_controller:
            return False
        is_replace_ctrl_patch_scene = all([self.get_patch_switch(),
                                           self.is_patch_support_model(),
                                           self.is_four_ctrl_engine()])
        contextUtil.setItem(self.context, "is_patch_scene", is_replace_ctrl_patch_scene)
        return is_replace_ctrl_patch_scene

    def is_two_board_normal(self):
        """检查两个管理板是否都正常

        :return: True/False 两个管理板是否正常
        """
        engine_id = self.get_sel_ctrl_parent_id()
        board_recs = getFruListInfo(self.context,
                                    restData.Enum.ObjEnum.INTF_MODULE)
        condition0 = restUtil.Tlv2Rest.getCondition(
            restData.Hardware.IntfModule.MODEL,
            restData.Enum.ConditionTypeEnum.EQ,
            restData.Enum.IntfModelEnum.ManagerBoard
        )
        condition1 = restUtil.Tlv2Rest.getCondition(
            restData.PublicAttributes.PARENT_ID,
            restData.Enum.ConditionTypeEnum.EQ, engine_id)
        condition2 = restUtil.Tlv2Rest.getCondition(
            restData.PublicAttributes.RUNNING_STATUS,
            restData.Enum.ConditionTypeEnum.EQ,
            restData.Enum.RunningStatusEnum.RUNNING
        )
        condition3 = restUtil.Tlv2Rest.getCondition(
            restData.PublicAttributes.HEALTH_STATUS,
            restData.Enum.ConditionTypeEnum.EQ,
            restData.Enum.HealthStatusEnum.NORMAL
        )
        condition_list = restUtil.Tlv2Rest.getConditionList(condition0,
                                                            condition1,
                                                            condition2,
                                                            condition3)
        normal_board_list = restUtil.Tlv2Rest.filter(board_recs,
                                                     condition_list)
        return len(normal_board_list) == 2

    def is_ctrl_keep_down_after_ten_min(self, fault_ctrl_id):
        """多引擎场景：等待10分钟后，判断故障控制器是否不在

        :param fault_ctrl_id: 故障控制器ID
        :return: True/False 是否不在
        """
        if contextUtil.getItem(self.context, "is_multi_engine"):
            log.info(self.context, 'begin wait..')
            time.sleep(10 * 60)
            log.info(self.context, 'end wait..')
            logger = self.context.get("logger")
            # 10分钟后, 连接可能断开; 可能是重启控制器导致，可能是时间太长导致
            try:
                CliConnection.check_connect(self.cli, logger)
            except (Exception, jException):
                log.error(self.context, 'connect failed, create another conn')
                self.cli.close()
                if baseUtil.has_svp_module(self.context):
                    self.cli = create_available_conn_for_svp(
                        self.context, logger)
                else:
                    return True  # 连接失败, 认为控制器不在
            engine_id = fault_ctrl_id[0]
            node_id = contextUtil.getItem(self.context, "fault_node_id")
            engine_node_dict = self.get_engine_node_info()[2]
            return node_id not in engine_node_dict.get(engine_id)
        return True

    @log_decorator
    def set_control_pause(self):
        """多引擎场景设置控制器停止启动

        :param: fault_ctrl_id 故障控制器ID
        """
        if not contextUtil.getItem(self.context, "is_multi_engine"):
            return True
        cur_node = self.get_engine_node_info()[1]
        fault_ctrl_node = contextUtil.getItem(
            self.context, "fault_node_id")
        ssh_res = self.ssh_to_fault_node(fault_ctrl_node, cur_node)
        if not ssh_res:
            exe_result = cliUtil.getAvaliableCli(self.cli).isConnected()
        else:
            set_pwr_pause_cmd = "setpwronpausestage.sh pausethirdinsmod"
            flag, cli_ret, err_msg = cliUtil.excuteCmdInMinisystemModel(
                self.cli, set_pwr_pause_cmd, self.lang)
            exe_result = "Please reboot the system" in cli_ret
            if fault_ctrl_node != cur_node:
                cliUtil.exitHeartbeatCli(self.cli, self.lang)
        return exe_result

    @log_decorator
    def set_control_resume(self):
        """
        （多引擎）控制器启动标记复原
        :param: fault_ctrl_id 故障控制器ID
        """
        if not contextUtil.getItem(self.context, "is_multi_engine"):
            return True
        cur_node = self.get_engine_node_info()[1]
        fault_ctrl_node = contextUtil.getItem(
            self.context, "fault_node_id")
        ssh_res = self.ssh_to_fault_node(fault_ctrl_node, cur_node)
        if not ssh_res:
            exe_result = cliUtil.getAvaliableCli(self.cli).isConnected()
        else:
            set_pwr_pause_cmd = "setpwronpausestage.sh resumethirdinsmod"
            flag, cli_ret, err_msg = cliUtil.excuteCmdInMinisystemModel(
                self.cli, set_pwr_pause_cmd, self.lang)
            exe_result = flag
            if fault_ctrl_node != cur_node:
                cliUtil.exitHeartbeatCli(self.cli, self.lang)
        return exe_result

    def ssh_to_fault_node(self, fault_ctrl_node, cur_node):
        """心跳到故障控制器(已离线)

        :param: fault_ctrl_node: 故障控制器node id
        :param: cur_node: 当前节点node_id
        :return: True/False 心跳是否成功
        """
        if fault_ctrl_node != cur_node:
            heart_cmd = "sshtoremoteExt %s" % str(fault_ctrl_node)
            flag, heart_ret, errmsg = cliUtil.ssh_remote_ctrl_even_mini_sys(
                self.cli, heart_cmd, self.dev_obj.get("pawd"), self.lang)
            if "Input minisystem to minisystem mode" in heart_ret:
                cli_ret = cliUtil.execCliCmd(self.cli, "minisystem", True)
                return "System Information" in cli_ret
            else:
                return flag
        return True

    @log_decorator
    def modify_alarm_notification(self, is_mask,
                                  alarm_list=PATCH_ALARM_TRUSTLIST):
        """修改告警通知设置，屏蔽或打开

        :param is_mask: True/False True关闭通知，False打开通知
        :param alarm_list: 告警名单
        :return: True/False 修改成功/失败
        """
        log.info(self.context,
                 "modify alarm notification, is_mask: %s" % str(is_mask))
        recover_alarm_list = list(alarm_list)
        if not is_mask:
            for alarm_id in BaseFactory.persist.getModule(self.context, "alarm_mask_id_list"):
                if alarm_id in recover_alarm_list:
                    recover_alarm_list.remove(alarm_id)
        if not recover_alarm_list:
            return True
        # 心跳到非故障控制器
        self.ssh_to_non_fault_ctrl()
        alarm_mask_cmd = "change alarm_mask alarm_id_list=%s mask_switch=%s" \
                         % ((",".join(recover_alarm_list)), "on" if is_mask else "off")
        cliUtil.enterCliModeFromSomeModel(self.cli, self.lang)
        cli_ret = cliUtil.execCliCmd(self.cli, alarm_mask_cmd, True)
        while "(y/n)" in cli_ret:
            cli_ret = cliUtil.execCliCmd(self.cli, "y", True)
        return "successfully" in cli_ret

    def get_board_cmd_dict(self):
        """获取补丁版本对应的管理板命令

        :return: 补丁对应命令字典
        """
        dev_version = self.dev_obj.get("version")
        for key, item in MGMT_PORT_PATCH_CMD_DICT.items():
            if dev_version in key:
                return item
        raise Exception("can`t get patch match cmd dict. dev version is %s" %
                        dev_version)

    def is_fault_ctrl(self, node_id):
        """判断控制器node_id是否为待更换的控制器

        :param node_id: 控制器
        :return: True/False 是否为待更换的控制器
        """
        return contextUtil.getItem(self.context, "fault_node_id") == node_id

    def get_ctrl_node_id(self, ctrl_id):
        """获取控制器的node_id

        :param ctrl_id: 控制器ID
        :return node_id
        """
        ctrl_letter = ctrl_id.strip().upper()[-1]
        engine_id = ctrl_id[0]
        engine_node_dict = self.get_engine_node_info()[2]
        node_list = engine_node_dict.get(engine_id)
        return node_list[node_id_dict.get(ctrl_letter)]

    def ssh_to_node(self, node_id, is_enable):
        """心跳到node_id, 故障控制器心跳失败不抛出异常

        :param node_id: 控制器node_id
        :param is_enable: True/False 打开/关闭管理口场景
        :return: True/False 心跳成功/故障控制器心跳失败
        """
        heart_cmd = "sshtoremoteExt %s" % str(node_id)
        flag, heart_ret, errmsg = cliUtil.sshToRemoteContr(
            self.cli, heart_cmd, self.dev_obj.get("pawd"), self.lang)
        if not flag:
            # 待更换控制器无法登录则不用执行关闭操作
            if self.is_fault_ctrl(node_id):
                # 当心跳到对端时，需要执行回退
                if "System Information" in heart_ret:
                    cliUtil.exitHeartbeatCli(self.cli, self.lang)
                return False
            raise Exception(
                "%s execute failed: %s" % (heart_cmd, errmsg))
        return True

    def polling_controller_modify_port(self, node_list, is_enable):
        """进入本引擎四个控制器执行管理口操作

        :param node_list: node_list
        :param is_enable: True/False 打开/关闭
        :return:
        """
        patch_cmd_dict = self.get_board_cmd_dict()
        run_model = patch_cmd_dict.get("runModel")
        log.info(self.context, "patch_cmd_dict is %s; node_list is %s."
                 % (str(patch_cmd_dict), node_list))
        # 进入本引擎的四个控制器
        for node_id in node_list:
            # 1, 选择进入控制器
            cur_node = self.get_engine_node_info()[1]
            if cur_node != node_id:
                if not self.ssh_to_node(
                        node_id, is_enable):
                    continue
            # 2, 关闭或打开管理口功能; 并查询结果
            switch_target = "enable" if is_enable else "disable"
            port_cmd = patch_cmd_dict.get(switch_target)
            show_cmd = patch_cmd_dict.get("show")
            show_recs = False, "", ""
            if run_model == "minisystem":
                cliUtil.excuteCmdInMinisystemModel(
                    self.cli, port_cmd, self.lang)
                show_recs = cliUtil.excuteCmdInMinisystemModel(
                    self.cli, show_cmd, self.lang)
            try:
                if run_model == "debug":
                    cliUtil.executeCmdInDebugMode(
                        self.cli, port_cmd, True, self.lang)
                    show_recs = cliUtil.executeCmdInDebugMode(
                        self.cli, show_cmd, True, self.lang)
            except (Exception, jException) as e:
                log.error(context=self.context, info=str(e))
                log.error(self.context, "exec debug cmd failed.")
            # 退出心跳
            if cur_node != node_id:
                cliUtil.exitHeartbeatCli(self.cli, self.lang)
            if show_recs[0] and switch_target not in show_recs[1]:
                raise Exception("%s ctrl failed to %s port." %
                                (str(node_id), switch_target))

    @log_decorator
    def enable_or_disable_management_port(self, is_enable):
        """打开或关闭四个控制器的管理口

        :param is_enable: True/False 打开/关闭
        :return: True/False 关闭成功/失败
        """
        try:
            log.info(self.context,
                     "modify management port, is_enable: %s" % str(is_enable))
            engine_id = self.get_sel_ctrl_engine_id()
            engine_node_dict = self.get_engine_node_info()[2]
            node_list = engine_node_dict.get(engine_id)
            # 进入本引擎的四个控制器执行操作
            self.polling_controller_modify_port(node_list, is_enable)
            return True
        except Exception:
            log.error(
                self.context,
                "%s management failed :%s" %
                ("enable" if is_enable else "disable",
                 str(traceback.format_exc())))
            return False

    def check_mm_version_result(self, cmd):
        """查询升级或回退管理板结果

        :param cmd: 命令
        :return: True/False 成功/失败
        """
        flag, recs, errmsg = \
            cliUtil.excuteCmdInMinisystemModel(self.cli, cmd, self.lang)
        return flag and "status: success" in recs.lower()

    def ssh_to_non_fault_ctrl(self):
        """心跳到待更换控制器所在引擎中的非待更换控制器

        :return:
        """
        engine_node_info = self.get_engine_node_info()
        cur_node = engine_node_info[1]
        fault_ctrl_node_id = contextUtil.getItem(self.context, "fault_node_id")
        fault_ctrl_engine_id = self.get_sel_ctrl_engine_id()
        fault_engine_node_list = engine_node_info[2].get(fault_ctrl_engine_id)
        # 在本引擎非故障控制器时，不用心跳
        if cur_node in fault_engine_node_list \
                and cur_node != fault_ctrl_node_id:
            return
        for node in fault_engine_node_list:
            if fault_ctrl_node_id != node:
                cmd = "sshtoremoteExt %s" % str(node)
                flag, heart_ret, errmsg = cliUtil.sshToRemoteContr(
                    self.cli, cmd, self.dev_obj.get("pawd"), self.lang)
                if not flag:
                    raise Exception(
                        "%s execute failed: %s" % (cmd, errmsg))
                return

    def query_mm_version_result(self, check_cmd):
        """查询管理板升级或回退结果

        :param check_cmd: 查询命令
        """
        limit_time = 300
        interval_time = 10
        begin_time = time.time()
        while time.time() - begin_time <= limit_time:
            try:
                if self.check_mm_version_result(check_cmd):
                    return True
                time.sleep(interval_time)
            except ToolException:
                log.error(self.context, "reconnect cli.")
                time.sleep(interval_time * 2)
                # 重连, 并确保当前控制器为非故障控制器
                if cliUtil.getAvaliableCli(self.cli).isConnected():
                    self.ssh_to_non_fault_ctrl()
        return False

    @log_decorator
    def upgrade_or_rollback_mm_version(self, is_upgrade):
        """升级或回退管理板

        :param is_upgrade: True/False 升级/回退
        :return: True/False 操作是否成功
        """
        try:
            log.info(self.context,
                     "modify mm version, is_upgrade: %s" % str(is_upgrade))
            # 1, 心跳到非故障控制器
            self.ssh_to_non_fault_ctrl()
            # 2, 升级或回退管理板
            version_cmd = UPGRADE_MM_VERSION_CMD_DICT.get(
                "upgrade" if is_upgrade
                else "rollback")
            cliUtil.excuteCmdInMinisystemModel(
                self.cli, version_cmd.get("execute"), self.lang)
            # 3, 轮询查询结果, 可能断开连接
            return self.query_mm_version_result(version_cmd.get("check"))
        except Exception:
            log.error(
                self.context,
                "%s mm version failed: %s" %
                ("upgrade" if is_upgrade else "rollback",
                 str(traceback.format_exc())))
            return False

    def execute_isolate_cmd(self, cmd):
        """执行隔离或取消隔离命令

        :param cmd: 命令
        :return: True/False 执行成功/失败
        """
        flag, cli_ret, err_msg = \
            cliUtil.excuteCmdInMinisystemModel(self.cli, cmd, self.lang)
        return flag and "success" in cli_ret.lower()

    @log_decorator
    def isolate_or_cancel_fault_controller(self, fault_ctrl_id, is_isolate):
        """隔离或取消隔离待更换的故障控制器

        :param fault_ctrl_id: 故障控制器ID
        :param is_isolate: True/False 隔离/取消隔离
        :return: True/False 操作成功/失败
        """
        log.info(self.context,
                 "isolate fault controller, is_isolate: %s" % str(is_isolate))
        self.ssh_to_non_fault_ctrl()  # 在非故障控制器执行
        tdm_id = node_id_dict.get(fault_ctrl_id[-1])
        is_isolate_num = "1" if is_isolate else "0"
        execute_cmd = "settdmcomm %s %s" % (str(tdm_id), is_isolate_num)
        # 1, 隔离或解除隔离故障控制器, 失败重试一次(120s)
        flag = self.execute_isolate_cmd(execute_cmd)
        if not flag and is_isolate:  # 隔离失败需要重试一次，先取消再隔离
            time.sleep(120)
            # 取消隔离命令参数拼接， 当前处于隔离分支， 最后的is_isolate_num 直接取0
            execute_cmd_cancel = "settdmcomm %s %s" % (str(tdm_id), "0")
            # 如果取消成功，再次隔离，否则不再进行隔离
            if self.execute_isolate_cmd(execute_cmd_cancel):
                self.execute_isolate_cmd(execute_cmd)
            else:
                return False
        # 2, 查看执行结果, 可能连接断开
        show_cmd = "showtdmcomm %s" % tdm_id
        flag, cli_ret, err_msg = False, "", ""
        try:
            flag, cli_ret, err_msg = cliUtil.excuteCmdInMinisystemModel(
                self.cli, show_cmd, self.lang)
        except ToolException:
            time.sleep(10)
            log.error(self.context, "reconnect cli.")
            if cliUtil.getAvaliableCli(self.cli).isConnected():
                self.ssh_to_non_fault_ctrl()
                flag, cli_ret, err_msg = cliUtil.excuteCmdInMinisystemModel(
                    self.cli, show_cmd, self.lang)
        return all([flag, "timeout" in cli_ret.lower(), is_isolate]) or \
            all([flag, "00" in cli_ret.lower(), not is_isolate])

    def get_system_current_time(self):
        """获取设备当前时间 时间格式：2017-07-19/12:00:00

        :return: 时间戳
        """
        cliUtil.enterCliModeFromSomeModel(self.cli, self.lang)
        cur_date, msg = cliUtil.getSystemDate(self.cli, self.lang)
        return cur_date

    def clear_patch_alarm(self):
        """清除补丁更换期间产生的白名单告警

        :return: True/False 操作成功/失败
        """
        log.info(self.context, "clear patch alarm")
        begin_time = self.context.get(BEGIN_TIME_KEY)
        # 心跳到非故障控制器
        self.ssh_to_non_fault_ctrl()
        cur_time = self.get_system_current_time()
        # 1, 查询更换期间产生的白名单告警
        show_alarm_cmd = "show alarm from_time=%s to_time=%s" \
                         % (begin_time, cur_time)
        alarm_ret = cliUtil.execCmdInCliMode(self.cli, show_alarm_cmd,
                                             True, self.lang)
        cli_ret = alarm_ret[1]
        info_list = cliUtil.getHorizontalCliRet(cli_ret)
        sequence_ids = []
        for info in info_list:
            # ID like 0xF00CF0014
            alarm_id = info.get("ID", "")
            # Sequence like 102632
            sequence_id = info.get("Sequence", "")
            if alarm_id in PATCH_ALARM_TRUSTLIST:
                sequence_ids.append(sequence_id)
        if not sequence_ids:
            return True
        # 2, 清除告警
        cliUtil.enterCliModeFromSomeModel(self.cli, self.lang)
        sequence_ids_str = ",".join(sequence_ids)
        clear_alarm_cmd = "change alarm clear sequence_list=%s" \
                          % sequence_ids_str
        cli_ret = cliUtil.execCliCmd(self.cli, clear_alarm_cmd, True)
        while "(y/n)" in cli_ret:
            cli_ret = cliUtil.execCliCmd(self.cli, "y", True)
        return "successfully" in cli_ret

    def check_manage_board_version(self):
        # 执行minisystem命令，检查管理板版本
        show_alarm_cmd = "checkupgradesmmversion"
        # 如果返回：Status: Fail.Detail: Management module version check failed.则继续进行下一步，否则进入回退流程后再次
        # 检查管理板版本
        alarm_ret = cliUtil.excuteCmdInMinisystemModel(self.cli, show_alarm_cmd, self.lang)
        cli_ret = alarm_ret[1]
        if "Status: Fail." in cli_ret:
            return True
        else:
            return False

    def rollback_minisysteam_firmware(self):
        # 回退管理板固件
        if not self.upgrade_or_rollback_mm_version(self, is_upgrade=False):
            log.error(self.context, "rollback mm version failed.")
            result.setResultFailByKey(self.context,
                                      LangKey.ROLLBACK_MANAGEMENT_FIRMWARE)
        return False

    def terminate_patch_process(self):
        """
        终止更换过程
        :return: True/False 操作成功/失败
        """
        log.info(self.context, "terminate patch process")

    def record_existing_alarm_mask(self):
        # 记录当前已有告警屏蔽
        self.ssh_to_non_fault_ctrl()
        show_alarm_cmd = "show alarm_mask"
        cliUtil.enterCliModeFromSomeModel(self.cli, self.lang)
        alarm_ret = cliUtil.execCmdInCliMode(self.cli, show_alarm_cmd, True, self.lang)
        cli_ret = alarm_ret[1]
        info_list = cliUtil.getHorizontalCliRet(cli_ret)
        alarm_id_list = []
        for info in info_list:
            # ID like 0xF0006001A
            alarm_id = info.get("Alarm ID")
            alarm_id_list.append(alarm_id)
        # 将当前屏蔽告警内容保存至上下文
        BaseFactory.persist.setModule(self.context, "alarm_mask_id_list", alarm_id_list)

    def patch_operation_final_v2(self):
        """
        所有操作完成后的补丁配合操作 新版流程
        :return: True/False 操作成功/失败
        """
        selected = json.loads(self.context.get("input_selectfru_controllerSelectedRow"))
        ctrl_id = selected.get("id")
        log.info(self.context, "6u4c patch scene final.")
        lines = list()
        step_status = dict()
        skip_steps = []
        # 读取文件
        try:
            with open(self.log_file_name, 'r') as file:
                lines = file.readlines()
        except Exception as e:
            log.error(context=self.context, info=str(e))
        if not lines:
            return True
        # 记录每一步的状态，同名步骤的状态会被后续的行覆盖
        # start calling开始调用的行记为False，防止意外退出没有对应步骤的执行状态
        """
        日志样例:
            调用开始
                2024-09-08 10:30:36,start calling Method: modify_alarm_notification,Kwargs: is_mask=1
            调用完成
                2024-09-08 10:30:38,end called Method:modify_alarm_notification,Kwargs:is_mask=1,
                Start Time:2024-09-08 10:30:36.618000,End Time:2024-09-08 10:30:38.543000,Result:1
        """
        for line in lines:
            step_status[line.split(',')[1].split(':')[1].strip() + '/' +
                        line.split(',')[2].split(':')[1].strip()] = line.split(',')[5].split(':')[1].strip() \
                if line.split(',')[1].startswith('end called') else '0'
        skip_steps = self.get_skip_steps(skip_steps, step_status)
        log.info(self.context, "skip_steps:{}".format(skip_steps))

        if not self.execute_rollback(skip_steps, ctrl_id):
            return False

        # 删除步骤记录文件
        if os.path.exists(self.log_file_name):
            os.remove(self.log_file_name)
        return True

    def execute_rollback(self, skip_steps, ctrl_id):
        # 执行统一步骤，根据skip_steps进行跳过操作
        if skip_steps.count(SkipSteps.CANCEL_ISOLATE_CTRL) == 0:
            # 1, 取消隔离待更换控制器
            if not self.isolate_or_cancel_fault_controller(self, ctrl_id,
                                                           is_isolate=False):
                log.error(self.context, "cancel isolate fault controller failed.")
                result.setResultFailByKey(
                    self.context, LangKey.CANCEL_ISOLATION_OF_CONTROLLER_FAILED)
                return False
        if skip_steps.count(SkipSteps.ROLLBACK_M_BOARD_FIRMWARE) == 0:
            # 2, 回退管理板固件
            if not self.upgrade_or_rollback_mm_version(self, is_upgrade=False):
                log.error(self.context, "rollback mm version failed.")
                result.setResultFailByKey(self.context,
                                          LangKey.ROLLBACK_MANAGEMENT_FIRMWARE)
                return False
        if skip_steps.count(SkipSteps.MULTI_STARTUP_FLAG_RESUME) == 0:
            # 3，多引擎启动标记复原
            if not self.set_control_resume(self):
                log.error(self.context, 'Multi-engine resume ctrl failed.')
                result.setResultFailByKey(
                    self.context, LangKey.MULTI_ENGINE_PAUSE_CTRL_FAILED)
                return False
        if skip_steps.count(SkipSteps.OPEN_MANAGE_PORT) == 0:
            # 3, 打开管理口
            if not self.enable_or_disable_management_port(self, is_enable=True):
                log.error(self.context, "enable management port failed.")
                result.setResultFailByKey(self.context,
                                          LangKey.OPEN_MANAGEMEENT_PORT_FAILED)
                return False
        if skip_steps.count(SkipSteps.RECOVERY_ALARM_NOTIFY) == 0:
            if not self.modify_alarm_notification(self, is_mask=False):
                log.error(self.context, "open alarm notifications failed.")
                result.setResultFailByKey(self.context,
                                          LangKey.REVERT_ALARM_NOTIFICATION_FAILED)
                return False
        if skip_steps.count(SkipSteps.CLEAR_ALARM) == 0:
            # 5, 清除告警
            if not self.clear_patch_alarm():
                log.error(self.context, "clear patch alarm failed.")
                result.setResultFailByKey(self.context,
                                          LangKey.CLEAR_PATCH_ALARM_FAILED)
                return False
        return True
