# encoding=utf-8
"""
功 能：记录日志
版权信息：华为技术有限公司，版本所有(C) 2019-2029
修改记录：2019-12-11 12:00 创建
"""
import os
import sys
import time
import json
import shutil
import subprocess
import queue
import threading

from commonlog import Logger
from taskmgr_util import Taskmgrutil
from datetime import datetime

logger = Logger().getinstance(sys.argv[0])
config_dir = "/opt/oss/log/manager/easysuite_upgrade_config"
# 数据库状态检查超时（1小时）
TIME_OUT = 60 * 60
# 检查间隔（10s）
TIME_INTERVAL = 10


class LfnDifferStopService:
    def format_time(self):
        """
        返回当前时间
        :return:
        """
        return datetime.strptime(datetime.now().strftime('%Y-%m-%d %H:%M:%S'), '%Y-%m-%d %H:%M:%S')

    def query_service_node_ip(self, productname):
        """
        获取业务节点的ip
        :param productname:
        :return:
        """
        try:
            oss_root = os.getenv('OSS_ROOT')
            logger.info(f'Start to query {productname} information.')
            tail = int(time.time())
            if not os.path.isdir(config_dir):
                os.makedirs(config_dir)
            tmp_path = f'{config_dir}/{productname}_information_{tail}'
            if os.path.isdir(tmp_path):
                shutil.rmtree(tmp_path)
            os.mkdir(tmp_path)
            query_info_cmd = f'bash {oss_root}/tools/resmgr/queryproduct.sh -pn {productname} -output {tmp_path}'
            exitcode, data = subprocess.getstatusoutput(query_info_cmd)
            if exitcode != 0:
                logger.error(f'Fail to bash cmd: {query_info_cmd}, output: {data}')
                return False
            logger.info(f'Success to bash cmd: {query_info_cmd}, output: {data}')
            # 解析nodes_{productname}.json获取所有业务节点的ip
            nodes_json_name = f'nodes_{productname}.json'
            nodes_json_path = os.path.join(tmp_path, nodes_json_name)
            with open(nodes_json_path, "r") as fob:
                node_info = json.load(fob)
            node_ip_list = []
            host_list = node_info['hostlist']
            for host in host_list:
                node_mgr_ip = host.get('nodemgrip')
                node_ip_list.append(node_mgr_ip)
            # 删除临时文件
            if os.path.isdir(tmp_path):
                shutil.rmtree(tmp_path)
            logger.info(f'node_ip_list is: {node_ip_list}')
            return node_ip_list
        except Exception as e:
            logger.error(f'Fail to query product info, because Exception {e}')
            return False

    def stop_one_node_app(self, node_ip, productname, result_queue):
        """
        停单个节点的服务
        :param node_ip:
        :param productname:
        :param result_queue:
        :return:
        """
        ssh_cmd = f'ssh -o ConnectTimeout=3600 -o stricthostkeychecking=no ' \
                  f'-o ConnectionAttempts=3 -o ServerAliveInterval=10 {node_ip}'
        not_stop_app_list = ['RouterAgent', 'ServiceAwareWatchAgent', 'DRMgrAgent',
                             'DBAgent', 'CloudbProxyService', 'Etcd', 'HIROIRService',
                             'SSAMgrWebsite', 'DBProxyService', 'ServiceCenter', 'OMMHAService',
                             'HIROBERService', 'BusService', 'UniEPAgent', 'NodeAgent']
        add_temp_list = [f"ipmc_adm -cmd addtemplist -templist listname -app {x}"
                         for x in not_stop_app_list]
        run_cmd = f'{ssh_cmd} \'. /opt/oss/manager/bin/engr_profile.sh;' \
                  f'{";".join(add_temp_list)};' \
                  f'ipmc_adm -cmd stopapp -tenant {productname} ' \
                  f'-ignore templist -templist listname\''
        logger.info(f'Node {node_ip}, start to run cmd:{run_cmd}.')
        exitcode, output = subprocess.getstatusoutput(run_cmd)
        logger.info(f'Node {node_ip}, run_cmd exitcode:{exitcode}, output:{output}')
        del_temp_cmd = f'{ssh_cmd} \'. /opt/oss/manager/bin/engr_profile.sh;' \
                       f'ipmc_adm -cmd deltemplist -templist listname -app all\''
        logger.info(f'Node {node_ip}, start to run cmd:{del_temp_cmd}.')
        del_temp_cmd_code, del_temp_cmd_output = subprocess.getstatusoutput(del_temp_cmd)
        logger.info(f'Node {node_ip}, run_cmd exitcode:{del_temp_cmd_code},'
                    f' output:{del_temp_cmd_output}')
        result_queue.put({"node_ip": node_ip, "stop_cmd_result": exitcode,
                          "stop_cmd_output": output})

    def stop_all_nodes_app(self, productname, task_path):
        """
        停整个站点的服务
        :param productname:
        :param task_path:
        :return:
        """
        stop_result = True
        node_ip_list = self.query_service_node_ip(productname)
        if not node_ip_list:
            logger.error("Fail to get node ip list.")
            return False
        result_queue = queue.Queue()
        threads_list = []
        for node_ip in node_ip_list:
            threads_list.append(
                threading.Thread(target=self.stop_one_node_app, args=(node_ip, productname, result_queue)))
        for thread in threads_list:
            thread.start()
        for thread in threads_list:
            thread.join()
        if result_queue.qsize() != len(node_ip_list):
            logger.error("Thread execution exception.")
            return False
        for _ in range(0, result_queue.qsize()):
            q_result = result_queue.get()
            if q_result:
                node_ip = q_result.get("node_ip")
                stop_cmd_result = q_result.get("stop_cmd_result")
                stop_cmd_output = q_result.get("stop_cmd_output")
                if stop_cmd_result != 0:
                    logger.error("Failed to stop the service on node %s." % node_ip)
                    Taskmgrutil.set_msg_append(task_path,
                                               "[%s] " % self.format_time() + "Failed to stop the service on node %s:" % node_ip + stop_cmd_output)
                    stop_result = False
        return stop_result

    def query_dbinstance_status(self, productname):
        """
        单次查询数据库状态
        :param productname:
        :return:
        """
        cmd = "bash /opt/oss/manager/apps/DBAgent/bin/dbsvc_adm -cmd query-db-instance -type zenith -tenant %s" % productname
        exitcode = 1

        # 兼容平台命令失败的场景，查询失败时等待5秒再重试一次
        for i in range(1, 3):
            exitcode, output = subprocess.getstatusoutput(cmd)
            logger.info(f"query_bdinstance_status, exitcode:{exitcode}, output:{output}")
            if exitcode != 0:
                logger.error("Fail to query dbinstance status, times {%s}" % i)
                time.sleep(5)
            else:
                break
        if exitcode != 0:
            return "query_failed"
        result_list = output.strip().split('\n')[1:]
        for line in result_list:
            class_id = line.strip().split()[1]
            inst_number = line.strip().split()[2]
            rpl_status = line.strip().split()[11]
            if class_id and rpl_status and class_id.lower() == "primary" \
                    and rpl_status.lower() != "normal":
                logger.info(f"{inst_number} DbInstance's RplStatus is {rpl_status}, not NORMAL")
                return "abnormal"
        return "normal"

    def lfn_differ_stop_service(self, productname, task_path):
        """
        停止运维面服务
        :param productname:
        :param task_path:
        :return:
        """
        Taskmgrutil.set_msg_append(task_path, "[%s] " % self.format_time() + "Stop product services, please wait.")
        if not self.stop_all_nodes_app(productname, task_path):
            self.taskmgr_set_log_info(task_path, "Fail to stop product services.", "fail", "100")
            return False
        self.taskmgr_set_log_info(task_path, "Success to stop product services.", "success", "100")
        return True

    def lfn_differ_check_db_status(self, productname, task_path):
        """
        检查数据库状态
        :param productname:
        :param task_path:
        :return:
        """
        Taskmgrutil.set_msg_append(task_path, "[%s] " % self.format_time()
                                   + "Check the Replication Status of Database Instances, please wait.")
        # 停服务后，循环检查数据库实例复制状态，全部正常才允许继续升级
        # 超时时间60分钟，两次查询间隔10s
        end_time = int(time.monotonic()) + TIME_OUT
        while int(time.monotonic()) <= end_time:
            result = self.query_dbinstance_status(productname)
            if result == "normal":
                logger.info("All dbinstance is NORMAL, can continue to upgrade the database.")
                self.taskmgr_set_log_info(task_path, "The replication status of the database instance is normal.",
                                          "success", "100")
                return True
            time.sleep(TIME_INTERVAL)
        logger.error("The replication status of the database instance is still abnormal after 5 minutes timeout.")
        self.taskmgr_set_log_info(task_path,
                                  "The replication status of the database instance is abnormal, please check.",
                                  "fail", "100")
        return False

    def taskmgr_set_log_info(self, task_path, msg, status, progress):
        """
        设置任务的运行状态信息
        :param task_path: 任务信息路径
        :param msg:  任务运行日志信息
        :param status:  任务运行状态
        :param progress:  任务运行进度
        """
        Taskmgrutil.set_msg_append(task_path, "[%s] " % self.format_time() + msg)
        Taskmgrutil.set_e_taskstatus(task_path, status)
        Taskmgrutil.set_e_taskprogress(task_path, progress)


def main(argv):
    productname = argv[1]
    e_task_id = argv[2]
    # “停止运维面服务”步骤包含两个独立的小步骤：“停止运维面服务”和“检查运维面数据库状态”
    # 用task_operation来区分两个小步骤
    # task_operation="stop" --> “停止运维面服务”
    # task_operation="check" --> “检查运维面数据库状态”
    task_operation = argv[3]

    e_taskmgr_path = "/opt/upgrade/easysuite_upgrade/taskmgr"
    e_task_path = os.path.join(e_taskmgr_path, e_task_id)

    # 初始化方法
    taskmgr_function = Taskmgrutil()
    Stopservice_function = LfnDifferStopService()

    # 初始化任务
    taskmgr_function.init_e_taskmgr(e_task_path)
    if task_operation == "stop":
        Stopservice_function.lfn_differ_stop_service(productname, e_task_path)
    elif task_operation == "check":
        Stopservice_function.lfn_differ_check_db_status(productname, e_task_path)
    else:
        Stopservice_function.taskmgr_set_log_info(e_task_path, "Wrong task_operation.", "fail", "100")
    return 0


if __name__ == '__main__':
    main(sys.argv)
