#!/usr/bin/env bash

###############################################################################################
function fn_get_log_file_path()
{
    echo "${c_root_path}/cmd.repair.log"
    return 0
}

###################################################
#
# 执行远端命令
#
###################################################
function fn_run_command_remote()
{
    fn_info_screen "Start executing remote cmds."
    fn_sys_log_info "Start executing remote cmds."
    host_ip="$1"
    user="$2"
    passwd="$3"
    command="$4"
    timeout="$5"
    is_log="$6"
    exec_cmd_remote "${host_ip}" "${user}" "${timeout}" "${command}" "${is_log}" << EOF
${passwd}
EOF
    result=$?
    [[ ${result} -ne 0 ]] && return ${result}
    fn_info_screen "Success to execute the remote command." && return 0
    fn_sys_log_info "Success to execute the remote command." && return 0
}

function fn_cp_file_remote()
{
    fn_info_screen "Copy the file/dict to the remote node."
    fn_sys_log_info "Copy the file/dict to the remote node."
    host_ip="$1"
    timeout="$2"
    c_src_path="$3"
    c_path_des="$4"
    passwd="$5"
    cp_file_to_remote "${host_ip}" "root" "${timeout}" "${c_src_path}" "${c_path_des}" "0" <<EOF
${passwd}
EOF
    result=$?
    [[ ${result} -ne 0 ]] && fn_sys_log_error "Failed to copy files to the remote end." && fn_error_screen "[ERROR]Failed to copy files to the remote end." && return 1
    fn_info_screen "Success to copy files to the remote end."
    fn_sys_log_info "Success to copy files to the remote end." && return 0
}

###############################################################################################
function fn_init()
{
    readonly c_root_path=$(dirname $(readlink -f $0 ))
    source ${c_root_path}/shscript/utils/load_all_util.sh
    fn_sys_set_logging_verbose "$SYS_LOGGING_INFO"
    readonly c_log_file="$(fn_get_log_file_path)"
    rm -f "${c_log_file}"
    fn_sys_create_log_file "$c_log_file"
    fn_sys_set_log_file "$c_log_file"
    readonly l_cluster_ini_file=`ls /opt/oss/share/*/NdpToolService/nce_inst.cfg`
    readonly l_cluster_instance_file=`ls /opt/oss/share/*/NdpToolService/cluster_instance.ini`
    readonly l_zenith_config_file=`ls /opt/oss/*/apps/NdpStatusService/etc/sysconf/NdpStatusService*.json`
    export PYTHONPATH=${c_root_path}/pyscript:${PYTHONPATH}

    # py编译pyc后，需要指定PYTHONHOME位置
    PYTHONHOME="/opt/entTools/script/python"
    ls ${c_root_path}/python-*.zip >/dev/null 2>&1;
    [[ $? -eq 0 ]] && python_rtsp_path=$(ls ${c_root_path}/python-*.zip)
    if [[ -f ${python_rtsp_path} ]]; then
        rm -rf "${c_root_path}/python" >/dev/null 2>&1;
        unzip ${python_rtsp_path} -d "${c_root_path}/python" >/dev/null 2>&1;
        PYTHONHOME="${c_root_path}/python"
    fi
    return 0
}

###############################################################################################
# 1 执行用户是否是root
###############################################################################################
function fn_precheck()
{
    fn_info_screen "Precheck before restore."
    fn_sys_log_info "Precheck before restore."
    fn_sys_assert_current_user_matched "root" || return 1
    return 0
}

###############################################################################################

###############################################################################################
function fn_init_node_passwd()
{
    echo -n "Enter the password of the root user: "
    IFS=$'\n'
    read -sr remote_password
    echo -e "\n"
    echo "Start to repair ... "
}

###############################################################################################
function fn_prepare()
{
    fn_info_screen "Prepare for pre-installation check."
    fn_sys_log_info "Prepare for pre-installation check."
    master_ip="$(cat "${l_cluster_ini_file}" | egrep '^Default=.*role=.*Manager1( |,|$)' | awk '{ for(i=1; i<=NF; i++) print $i }' | grep '^Base=' | awk -F'=' '{ print $2 }')"
    slave_ip="$(cat "$l_cluster_ini_file" | egrep '^Default=.*role=.*Manager2( |,|$)' | awk '{ for(i=1; i<=NF; i++) print $i }' | grep '^Base=' | awk -F'=' '{ print $2 }')"
    all_data_ip="$(cat "$l_cluster_ini_file" | egrep '^Default=.*role=.*DataA( |,|$)' | awk '{ for(i=1; i<=NF; i++) print $i }' | grep '^Base=' | awk -F'=' '{ print $2 }')"
    journal_node_ip="$(cat "$l_cluster_instance_file" | egrep 'cluster=journal_node' | awk '{ for(i=1; i<=NF; i++) print $i }' | grep '^base_ip=' | awk -F'=' '{ print $2 }')"
    third_ip_instance_id="$(cat "$l_cluster_instance_file" | egrep 'cluster=journal_node' | awk -F 'instance_id=' '{print $2}' | sort -n | sed -n 3p)"
    fn_info_screen "third ip instance id is ${third_ip_instance_id}"
    third_ip="$(cat "$l_cluster_instance_file" | egrep 'cluster=journal_node' | egrep "${third_ip_instance_id}" | awk -F 'base_ip=' '{print $2}' | awk '{print $1}')"

    fn_info_screen "master ip is ${master_ip}"
    fn_info_screen "slave ip is ${slave_ip}"
    fn_info_screen "third ip is ${third_ip}"
    fn_info_screen "journalnode ip is ${journal_node_ip}"
    return 0
}

function fn_repair_redis() {
  fn_info_screen "Start to repair redis..."
  fn_sys_log_info "Start to repair redis..."
  su - ossuser -c "source /opt/oss/${product_name}/apps/NdpRedisService/script/sh/profile/app_profile.sh && bash /opt/oss/${product_name}/apps/NdpRedisService/sbin/create_cluster.sh create" > /dev/null 2>&1
  if [[ $? -ne 0 ]]
  then
    su - ossuser -c "source /opt/oss/${product_name}/apps/NdpRedisService/script/sh/profile/app_profile.sh && bash /opt/oss/${product_name}/apps/NdpRedisService/sbin/create_cluster.sh reset" > /dev/null 2>&1
    su - ossuser -c "source /opt/oss/${product_name}/apps/NdpRedisService/script/sh/profile/app_profile.sh && bash /opt/oss/${product_name}/apps/NdpRedisService/sbin/create_cluster.sh create" > /dev/null 2>&1
  fi
  fn_info_screen "repair redis done..."
  fn_sys_log_info "repair redis done..."
}

function fn_repair_ldap() {
  fn_info_screen "Start to cp ldap file..."
  fn_sys_log_info "Start to cp ldap file..."
  local source_path=$1
  shift 1
  local manager_ip_list=$*
  for manager_ip in ${manager_ip_list[*]};do
      fn_run_command_remote ${manager_ip} "root" ${remote_password} "rm -rf /opt/oss/share/${product_name}/NdpLdapService/randomPass; mkdir -p /opt/oss/share/${product_name}/NdpLdapService/randomPass" "5" "0"
      fn_cp_file_remote ${manager_ip} "1800" "${source_path}/randomPass" "/opt/oss/share/${product_name}/NdpLdapService/" ${remote_password}
      fn_run_command_remote ${manager_ip} "root" ${remote_password} "chown -R ossuser:ossgroup /opt/oss/share/${product_name}/NdpLdapService/randomPass" "5" "0"
  done
  fn_info_screen "Cp ldap file Done..."
  fn_sys_log_info "Cp ldap file Done..."
}

function fn_repair_kerberos() {
  fn_info_screen "Start to cp kerberos file..."
  fn_sys_log_info "Start to cp kerberos file..."
  local source_path=$1
  shift 1
  local manager_ip_list=$*
  for manager_ip in ${manager_ip_list[*]};do
      fn_run_command_remote ${manager_ip} "root" ${remote_password} "rm -rf /opt/oss/share/${product_name}/NdpKerberosService/randomPass; mkdir -p /opt/oss/share/${product_name}/NdpKerberosService/randomPass" "5" "0"
      fn_cp_file_remote ${manager_ip} "1800" "${source_path}/randomPass" "/opt/oss/share/${product_name}/NdpKerberosService/" ${remote_password}
      fn_run_command_remote ${manager_ip} "root" ${remote_password} "chown -R ossuser:ossgroup /opt/oss/share/${product_name}/NdpKerberosService/randomPass" "5" "0"
      fn_run_command_remote ${manager_ip} "root" ${remote_password} "rm -rf /opt/oss/share/${product_name}/NdpKerberosService/kstash; mkdir -p /opt/oss/share/${product_name}/NdpKerberosService/kstash" "5" "0"
      fn_cp_file_remote ${manager_ip} "1800" "${source_path}/kstash" "/opt/oss/share/${product_name}/NdpKerberosService/" ${remote_password}
      fn_run_command_remote ${manager_ip} "root" ${remote_password} "chown -R ossuser:ossgroup /opt/oss/share/${product_name}/NdpKerberosService/kstash" "5" "0"
  done
  fn_info_screen "Cp kerberos file Done..."
  fn_sys_log_info "Cp kerberos file Done..."
}

function fn_repair_journalnode() {
  fn_sys_log_info "Start to stop journal node on ${restore_ip} ..."
  fn_run_command_remote ${restore_ip} "root" ${remote_password} "su - ossadm -c '/opt/oss/manager/agent/bin/ipmc_adm -cmd stopapp -app NdpJournalNodeService'" "3600" "0"
  fn_run_command_remote ${restore_ip} "root" ${remote_password} "rm -rf /srv/BigData/journalnode;mkdir -p /srv/BigData/journalnode/hacluster/current;chown -R ossuser:ossgroup /srv/BigData/journalnode" "30" "0"
  fn_cp_file_remote ${restore_ip} "1800" "/srv/BigData/journalnode/hacluster/current/VERSION" "/srv/BigData/journalnode/hacluster/current" ${remote_password}
  fn_run_command_remote ${restore_ip} "root" ${remote_password} "chown -R ossuser:ossgroup /srv/BigData/journalnode" "30" "0"
  fn_run_command_remote ${restore_ip} "root" ${remote_password} "su - ossadm -c '/opt/oss/manager/agent/bin/ipmc_adm -cmd startapp -app NdpJournalNodeService'" "3600" "0"
  return 0
}

function fn_repair_namenode() {
  fn_run_command_remote ${restore_ip} "root" ${remote_password} "su - ossadm -c '/opt/oss/manager/agent/bin/ipmc_adm -cmd stopapp -app NdpNameNodeService'" "3600" "0"
  [[ $? -ne 0 ]] && fn_sys_log_error "stop namenode on ${restore_ip} failed" && return 1
  fn_run_command_remote ${restore_ip} "root" ${remote_password} "rm -rf /srv/BigData/namenode;mkdir -p /srv/BigData/namenode;chown -R ossuser:ossgroup /srv/BigData/namenode" "30" "0"
  fn_sys_log_info "Start to bootstrapStandby for namenode ..."
  fn_run_command_remote ${restore_ip} "root" ${remote_password} "su - ossuser -c 'source /opt/oss/${product_name}/apps/NdpNameNodeService/script/sh/profile/app_profile.sh;bash /opt/oss/${product_name}/apps/NdpNameNodeService/script/sh/service/init_component_ms.sh'" "3600" "1"
  [[ $? -ne 0 ]] && fn_sys_log_error "bootstrapStandby for namenode on ${restore_ip} failed" && return ${result}
  fn_sys_log_info "bootstrapStandby for namenode success..."
  return 0
}

function restart_sssd() {
  fn_info_screen "Start to restart sssd..."
  fn_sys_log_info "Start to restart sssd..."
  local all_data_ip=$*
  for data_ip in ${all_data_ip[*]};do
    fn_run_command_remote ${data_ip} "root" ${remote_password} "rm /run/sssd.pid -f" "10" "0"
    fn_run_command_remote ${data_ip} "root" ${remote_password} "rm -rf /var/lib/sss/db/*" "10" "0"
    local l_retry_round=1
    local l_max_retry_round=10
    while [ "$l_retry_round" -le "$l_max_retry_round" ]; do
        fn_run_command_remote ${data_ip} "root" ${remote_password} "systemctl restart sssd" "30" "0"
        if [ "$?" -eq "0" ]; then
            fn_sys_log_info "restart sssd on ${data_ip}  success."
            return 0
        fi
        fn_sys_log_warn "restart sssd on ${data_ip} failed,retrying ${l_retry_round}/${l_max_retry_round}"
        l_retry_round=$((l_retry_round+1))
        sleep 3
    done
    [ "$l_retry_round" -gt "$l_max_retry_round" ] fn_sys_log_error "Failed to restart sssd on ${data_ip}." && fn_error_screen "[ERROR]Failed to restart sssd on ${data_ip}." && return 1
  done
  fn_info_screen "End to restart sssd..." && return 0
  fn_sys_log_info "End to restart sssd..." && return 0
}

###############################################################################################
function fn_do()
{
    fn_info_screen "Start to do repair after..."
    fn_sys_log_info "Start to do repair after..."
    if [[ "${master_ip}" == "${restore_ip}" ]]; then
      fn_info_screen "Start to repair master..."
      fn_sys_log_info "Start to repair master..."

      for data_ip in ${all_data_ip[*]};do
        fn_cp_file_remote ${data_ip} "1800" "${c_root_path}/conf/ldap/sssd.conf" "/etc/sssd" ${remote_password}
        fn_cp_file_remote ${data_ip} "1800" "${c_root_path}/conf/ldap/ldap.conf" "/etc" ${remote_password}
        fn_cp_file_remote ${data_ip} "1800" "${c_root_path}/conf/ldap/ldap.conf" "/etc/openldap" ${remote_password}
      done
      manager_ip_list=(${master_ip} ${slave_ip})
      for manager_ip in ${manager_ip_list[*]};do
        fn_run_command_remote ${manager_ip} "root" ${remote_password} "rm -rf /srv/BigData/ldapData" "5" "0"
        fn_cp_file_remote ${manager_ip} "1800" "${c_root_path}/conf/data/ldapData" "/srv/BigData" ${remote_password}
        fn_run_command_remote ${manager_ip} "root" ${remote_password} "chown -R ossuser:ossgroup /srv/BigData/ldapData" "5" "0"
      done

      fn_repair_ldap "${c_root_path}/conf/data/ldap" ${manager_ip_list[*]}
      fn_repair_kerberos "${c_root_path}/conf/data/kerberos" ${manager_ip_list[*]}

      local system_type=$(cat /proc/version | grep "SUSE")
      if [[ -z "$system_type" ]];then
        restart_sssd ${all_data_ip[*]}
        result=$?
        [[ ${result} -ne 0 ]] && return ${result}
      fi

      fn_repair_journalnode

      fn_repair_namenode || return 1

      fn_repair_redis

      fn_sys_log_info "start to clear repair flag."
      fn_run_command_remote ${restore_ip} "root" ${remote_password} "rm -f /tmp/flag_restore_repair_node;" "5" "0"
      fn_run_command_remote ${restore_ip} "root" ${remote_password} "rm -f /tmp/flag_namenode_restore_repair_node;" "5" "0"

      fn_info_screen "Wait fo ${restore_ip} start microservice."
      echo ${remote_password} | ${PYTHONHOME}/bin/python ${c_root_path}/pyscript/actions/repair/repair_stop_start_ndp.pyc ${c_log_file} "${restore_ip}" "NdpLdapService,NdpKerberosService,NdpNameNodeService" "restart"
      result=$?
      [[ ${result} -ne 0 ]] && fn_sys_log_error "Failed to restart Ndp service." && fn_error_screen "[ERROR]Failed to restart Ndp service." && return ${result}

      su - ossuser -c "source /opt/oss/${product_name}/apps/NdpNameNodeService/script/sh/profile/app_profile.sh;bash /opt/oss/${product_name}/apps/NdpNameNodeService/script/sh/service/backup_restore_full_report.sh >> /tmp/ndp_node_repair.log 2>&1"
      sleep 10
      fn_sys_log_info "Start to fsck / corrupt blocks ..."
      su - ossuser -c "source /opt/oss/${product_name}/apps/NdpNameNodeService/etc/HDClient/bigdata_env;kinit -kt /opt/oss/${product_name}/apps/NdpNameNodeService/etc/hdfs.keytab hdfs/hadoop.hadoop.com;hdfs fsck / >> /tmp/ndp_node_repair.log 2>&1"
      su - ossuser -c "source /opt/oss/${product_name}/apps/NdpNameNodeService/etc/HDClient/bigdata_env;kinit -kt /opt/oss/${product_name}/apps/NdpNameNodeService/etc/hdfs.keytab hdfs/hadoop.hadoop.com;hdfs fsck / -delete >> /tmp/ndp_node_repair.log 2>&1"
      su - ossuser -c "source /opt/oss/${product_name}/apps/NdpNameNodeService/etc/HDClient/bigdata_env;kinit -kt /opt/oss/${product_name}/apps/NdpNameNodeService/etc/hdfs.keytab hdfs/hadoop.hadoop.com;hdfs fsck / >> /tmp/ndp_node_repair.log 2>&1"


    elif [[ "${slave_ip}" == "${restore_ip}" ]]; then
      fn_info_screen "Start to repair slave..."
      fn_sys_log_info "Start to repair slave..."

      fn_cp_file_remote ${restore_ip} "1800" "${c_root_path}/conf/ldap/sssd.conf" "/etc/sssd" ${remote_password}
      fn_cp_file_remote ${restore_ip} "1800" "${c_root_path}/conf/ldap/ldap.conf" "/etc" ${remote_password}
      fn_cp_file_remote ${restore_ip} "1800" "${c_root_path}/conf/ldap/ldap.conf" "/etc/openldap" ${remote_password}

      ip_list=(${restore_ip})
      fn_repair_ldap "/opt/oss/share/${product_name}/NdpLdapService/" ${ip_list[*]}

      fn_repair_kerberos "/opt/oss/share/${product_name}/NdpKerberosService/" ${ip_list[*]}

      fn_repair_journalnode

      fn_repair_namenode || return 1

      fn_repair_redis

      su - ossuser -c "source /opt/oss/${product_name}/apps/NdpNameNodeService/etc/HDClient/bigdata_env;hdfs dfsadmin -safemode forceExit >> /tmp/ndp_node_repair.log 2>&1"

      fn_sys_log_info "Start to clear repair flag."
      fn_run_command_remote ${restore_ip} "root" ${remote_password} "rm -f /tmp/flag_restore_repair_node;" "5" "0"
      fn_run_command_remote ${restore_ip} "root" ${remote_password} "rm -f /tmp/flag_namenode_restore_repair_node;" "5" "0"
      fn_info_screen "Wait fo ${restore_ip} start microservice."
      sleep 60
      echo ${remote_password} | ${PYTHONHOME}/bin/python ${c_root_path}/pyscript/actions/repair/repair_stop_start_ndp.pyc ${c_log_file} "${restore_ip}" "NdpLdapService,NdpKerberosService,NdpNameNodeService" "restart"
      result=$?
      [[ ${result} -ne 0 ]] && fn_sys_log_error "Failed to restart Ndp service." && fn_error_screen "[ERROR]Failed to restart Ndp service." && return ${result}

      su - ossuser -c "source /opt/oss/${product_name}/apps/NdpNameNodeService/script/sh/profile/app_profile.sh;bash /opt/oss/${product_name}/apps/NdpNameNodeService/script/sh/service/backup_restore_full_report.sh >> /tmp/ndp_node_repair.log 2>&1"
      sleep 10
      su - ossuser -c "source /opt/oss/${product_name}/apps/NdpNameNodeService/etc/HDClient/bigdata_env;kinit -kt /opt/oss/${product_name}/apps/NdpNameNodeService/etc/hdfs.keytab hdfs/hadoop.hadoop.com;hdfs fsck / >> /tmp/ndp_node_repair.log 2>&1"
      su - ossuser -c "source /opt/oss/${product_name}/apps/NdpNameNodeService/etc/HDClient/bigdata_env;kinit -kt /opt/oss/${product_name}/apps/NdpNameNodeService/etc/hdfs.keytab hdfs/hadoop.hadoop.com;hdfs fsck / -delete >> /tmp/ndp_node_repair.log 2>&1"
      su - ossuser -c "source /opt/oss/${product_name}/apps/NdpNameNodeService/etc/HDClient/bigdata_env;kinit -kt /opt/oss/${product_name}/apps/NdpNameNodeService/etc/hdfs.keytab hdfs/hadoop.hadoop.com;hdfs fsck / >> /tmp/ndp_node_repair.log 2>&1"
    elif [[ "${third_ip}" == "${restore_ip}" ]]; then
      fn_info_screen "Start to repair data node..."
      fn_sys_log_info "Start to repair data node..."
      fn_cp_file_remote ${restore_ip} "1800" "${c_root_path}/conf/ldap/sssd.conf" "/etc/sssd" ${remote_password}
      fn_cp_file_remote ${restore_ip} "1800" "${c_root_path}/conf/ldap/ldap.conf" "/etc" ${remote_password}
      fn_cp_file_remote ${restore_ip} "1800" "${c_root_path}/conf/ldap/ldap.conf" "/etc/openldap" ${remote_password}

      fn_repair_redis
      fn_repair_journalnode
      fn_sys_log_info "start to clear repair flag."
      fn_run_command_remote ${restore_ip} "root" ${remote_password} "rm -f /tmp/flag_restore_repair_node;" "5" "0"
      fn_run_command_remote ${restore_ip} "root" ${remote_password} "rm -f /tmp/flag_namenode_restore_repair_node;" "5" "0"
    fi

    fn_info_screen "repair ${restore_ip} done."
    fn_sys_log_info "repair ${restore_ip} done."
    return 0
}

###############################################################################################
function fn_main()
{
    readonly c_start_time="$(date +%s)"
    restore_ip=$1
    product_name=$2
    fn_init_node_passwd
    fn_init || return 1
    fn_sys_log_enter
    fn_precheck || return 1
    fn_prepare || return 1
    fn_do || return 1

    local l_result="$?"
    local l_elapsed_seconds="$(fn_sys_get_elapsed_time "$c_start_time")"
    fn_info_screen "Result [${l_result}]. It takes [${l_elapsed_seconds}] seconds"
    fn_sys_log_leave "Result [${l_result}]. It takes [${l_elapsed_seconds}] seconds"
    return "$l_result"
}

###############################################################################################

fn_main "$@"
result="$?"
exit "${result}"
