/* Copyright (C) 2003 MySQL AB

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */

#include <NdbRestarter.hpp>
#include <NdbOut.hpp>
#include <NdbSleep.h>
#include <NdbTick.h>
#include <LocalConfig.hpp>
#include <mgmapi_debug.h>
#include <NDBT_Output.hpp>
#include <random.h>
#include <kernel/ndb_limits.h>
#include <ndb_version.h>

#define MGMERR(h) \
  ndbout << "latest_error="<<ndb_mgm_get_latest_error(h) \
	 << ", line="<<ndb_mgm_get_latest_error_line(h) \
	 << endl;


NdbRestarter::NdbRestarter(const char* _addr): 
  connected(false),
  addr(_addr), 
  host(NULL),
  port(-1),
  handle(NULL)
{
  if (addr == NULL){
    LocalConfig lcfg;
    if(!lcfg.init()){
      lcfg.printError();
      lcfg.printUsage();
      g_err  << "NdbRestarter - Error parsing local config file" << endl;
      return;
    }

    if (lcfg.items == 0){
      g_err << "NdbRestarter - No management servers configured in local config file" << endl;
      return;
    }
  
    for (int i = 0; i<lcfg.items; i++){
      MgmtSrvrId * m = lcfg.ids[i];
      
      switch(m->type){
      case MgmId_TCP:
	char buf[255];
	snprintf(buf, 255, "%s:%d", m->data.tcp.remoteHost, m->data.tcp.port);
	addr = strdup(buf);
	host = strdup(m->data.tcp.remoteHost);
	port = m->data.tcp.port;
	break;
      case MgmId_File:
	break;
      default:
	break;
      }
      if (addr != NULL)
	break;
    }
  }

}

NdbRestarter::~NdbRestarter(){
  disconnect();
}

int NdbRestarter::getDbNodeId(int _i){
  if (!isConnected())
    return -1;

  if (getStatus() != 0)
    return -1;

  for(size_t i = 0; i < ndbNodes.size(); i++){     
    if (i == (unsigned)_i){
      return ndbNodes[i].node_id;
    }
  }
  return -1;
}


int
NdbRestarter::restartOneDbNode(int _nodeId,
			       bool inital,
			       bool nostart,
			       bool abort){
  if (!isConnected())
    return -1;

  int ret = 0;
  
  if ((ret = ndb_mgm_restart2(handle, 1, &_nodeId,
			      inital, nostart, abort)) <= 0) {
    /**
     * ndb_mgm_restart2 returned error, one reason could
     * be that the node have not stopped fast enough!
     * Check status of the node to see if it's on the 
     * way down. If that's the case ignore the error
     */ 

    if (getStatus() != 0)
      return -1;

    g_info << "ndb_mgm_restart2 returned with error, checking node state" << endl;

    for(size_t i = 0; i < ndbNodes.size(); i++){
      if(ndbNodes[i].node_id == _nodeId){
	g_info <<_nodeId<<": status="<<ndbNodes[i].node_status<<endl;
	/* Node found check state */
	switch(ndbNodes[i].node_status){
	case NDB_MGM_NODE_STATUS_RESTARTING:
	case NDB_MGM_NODE_STATUS_SHUTTING_DOWN:
	  return 0;
	default:
	  break;
	}
      }
    }
    
    MGMERR(handle);
    g_err  << "Could not stop node with id = "<< _nodeId << endl;
    return -1;
  }

  return 0;
}

int
NdbRestarter::getMasterNodeId(){
  if (!isConnected())
    return -1;
  
  if (getStatus() != 0)
    return -1;
  
  int min = 0;
  int node = -1;
  for(size_t i = 0; i < ndbNodes.size(); i++){
    if(min == 0 || ndbNodes[i].dynamic_id < min){
      min = ndbNodes[i].dynamic_id;
      node = ndbNodes[i].node_id;
    }
  }

  return node;
}

int
NdbRestarter::getRandomNotMasterNodeId(int rand){
  int master = getMasterNodeId();
  if(master == -1)
    return -1;

  Uint32 counter = 0;
  rand = rand % ndbNodes.size();
  while(counter++ < ndbNodes.size() && ndbNodes[rand].node_id == master)
    rand = (rand + 1) % ndbNodes.size();
  
  if(ndbNodes[rand].node_id != master)
    return ndbNodes[rand].node_id;
  return -1;
}

int
NdbRestarter::getRandomNodeOtherNodeGroup(int nodeId, int rand){
  if (!isConnected())
    return -1;
  
  if (getStatus() != 0)
    return -1;
  
  int node_group = -1;
  for(size_t i = 0; i < ndbNodes.size(); i++){
    if(ndbNodes[i].node_id == nodeId){
      node_group = ndbNodes[i].node_group;
      break;
    }
  }
  if(node_group == -1){
    return -1;
  }

  Uint32 counter = 0;
  rand = rand % ndbNodes.size();
  while(counter++ < ndbNodes.size() && ndbNodes[rand].node_group == node_group)
    rand = (rand + 1) % ndbNodes.size();
  
  if(ndbNodes[rand].node_group != node_group)
    return ndbNodes[rand].node_id;

  return -1;
}

int 
NdbRestarter::waitClusterStarted(unsigned int _timeout){
  return waitClusterState(NDB_MGM_NODE_STATUS_STARTED, _timeout);
}

int 
NdbRestarter::waitClusterStartPhase(int _startphase, unsigned int _timeout){
  return waitClusterState(NDB_MGM_NODE_STATUS_STARTING, _timeout, _startphase);
}

int 
NdbRestarter::waitClusterSingleUser(unsigned int _timeout){
  return waitClusterState(NDB_MGM_NODE_STATUS_SINGLEUSER, _timeout);
}

int 
NdbRestarter::waitClusterNoStart(unsigned int _timeout){
  return waitClusterState(NDB_MGM_NODE_STATUS_NOT_STARTED, _timeout);
}

int 
NdbRestarter::waitClusterState(ndb_mgm_node_status _status,
			       unsigned int _timeout,
			       int _startphase){

  int nodes[MAX_NDB_NODES];
  int numNodes = 0;

  if (getStatus() != 0)
    return -1;
  
  // Collect all nodes into nodes
  for (size_t i = 0; i < ndbNodes.size(); i++){
    nodes[i] = ndbNodes[i].node_id;
    numNodes++;
  }

  return waitNodesState(nodes, numNodes, _status, _timeout, _startphase);
}
 

int 
NdbRestarter::waitNodesState(int * _nodes, int _num_nodes,
			     ndb_mgm_node_status _status,
			     unsigned int _timeout,
			     int _startphase){
  
  if (!isConnected()){
    g_err << "!isConnected"<<endl;
    return -1;
  }

  unsigned int attempts = 0;
  unsigned int resetAttempts = 0;
  const unsigned int MAX_RESET_ATTEMPTS = 10;
  bool allInState = false;    
  while (allInState == false){
    if (_timeout > 0 && attempts > _timeout){
      /**
       * Timeout has expired waiting for the nodes to enter
       * the state we want
       */
      bool waitMore = false;
      /** 
       * Make special check if we are waiting for 
       * cluster to become started
       */
      if(_status == NDB_MGM_NODE_STATUS_STARTED){
	waitMore = true;
	/**
	 * First check if any node is not starting
	 * then it's no idea to wait anymore
	 */
	for (size_t n = 0; n < ndbNodes.size(); n++){
	  if (ndbNodes[n].node_status != NDB_MGM_NODE_STATUS_STARTED &&
	      ndbNodes[n].node_status != NDB_MGM_NODE_STATUS_STARTING)
	    waitMore = false;

	}
      } 

      if (!waitMore || resetAttempts > MAX_RESET_ATTEMPTS){
	g_err << "waitNodeState("
	      << ndb_mgm_get_node_status_string(_status)
	      <<", "<<_startphase<<")"
	      << " timeout after " << attempts <<" attemps" << endl;
	return -1;
      } 

      g_err << "waitNodeState("
	    << ndb_mgm_get_node_status_string(_status)
	    <<", "<<_startphase<<")"
	    << " resetting number of attempts "
	    << resetAttempts << endl;
      attempts = 0;
      resetAttempts++;
      
    }

    allInState = true;
    if (getStatus() != 0){
      g_err << "getStatus != 0" << endl;
      return -1;
    }

    // ndbout << "waitNodeState; _num_nodes = " << _num_nodes << endl;
    // for (int i = 0; i < _num_nodes; i++)
    //   ndbout << " node["<<i<<"] =" <<_nodes[i] << endl;

    for (int i = 0; i < _num_nodes; i++){
      ndb_mgm_node_state* ndbNode = NULL;
      for (size_t n = 0; n < ndbNodes.size(); n++){
	if (ndbNodes[n].node_id == _nodes[i])
	  ndbNode = &ndbNodes[n];
      }

      if(ndbNode == NULL){
	allInState = false;
	continue;
      }

      g_info << "State node " << ndbNode->node_id << " "
	     << ndb_mgm_get_node_status_string(ndbNode->node_status)<< endl;

      assert(ndbNode != NULL);

      if(_status == NDB_MGM_NODE_STATUS_STARTING && 
	 ((ndbNode->node_status == NDB_MGM_NODE_STATUS_STARTING && 
	   ndbNode->start_phase >= _startphase) ||
	  (ndbNode->node_status == NDB_MGM_NODE_STATUS_STARTED)))
	continue;

      if (_status == NDB_MGM_NODE_STATUS_STARTING){
	g_info << "status = "  
	       << ndb_mgm_get_node_status_string(ndbNode->node_status)
	       <<", start_phase="<<ndbNode->start_phase<<endl;
	if (ndbNode->node_status !=  _status) {
	  if (ndbNode->node_status < _status)
	    allInState = false;
	  else 
	    g_info << "node_status(" << ndbNode->node_status
		   <<") != _status("<<_status<<")"<<endl;
	} else if (ndbNode->start_phase < _startphase)
	  allInState = false;
      } else {
	if (ndbNode->node_status !=  _status) 
	  allInState = false;
      }
    }
    g_info << "Waiting for cluster enter state" 
	    << ndb_mgm_get_node_status_string(_status)<< endl;
    NdbSleep_SecSleep(1);
    attempts++;
  }
  return 0;
}

int NdbRestarter::waitNodesStarted(int * _nodes, int _num_nodes,
		     unsigned int _timeout){
  return waitNodesState(_nodes, _num_nodes, 
			  NDB_MGM_NODE_STATUS_STARTED, _timeout);  
}

int NdbRestarter::waitNodesStartPhase(int * _nodes, int _num_nodes, 
			int _startphase, unsigned int _timeout){
  return waitNodesState(_nodes, _num_nodes, 
			  NDB_MGM_NODE_STATUS_STARTING, _timeout,
			  _startphase);  
}

int NdbRestarter::waitNodesNoStart(int * _nodes, int _num_nodes,
		     unsigned int _timeout){
  return waitNodesState(_nodes, _num_nodes, 
			  NDB_MGM_NODE_STATUS_NOT_STARTED, _timeout);  
}

bool 
NdbRestarter::isConnected(){
  if (connected == true)
    return true;
  return connect() == 0;
}

int 
NdbRestarter::connect(){
  handle = ndb_mgm_create_handle();   
  if (handle == NULL){
    g_err << "handle == NULL" << endl;
    return -1;
  }
  g_info << "Connecting to mgmsrv at " << addr << endl;
  if (ndb_mgm_connect(handle, addr) == -1) {
    MGMERR(handle);
    g_err  << "Connection to " << addr << " failed" << endl;
    return -1;
  }

  connected = true;
  return 0;
}

void 
NdbRestarter::disconnect(){
  if (handle != NULL){
    ndb_mgm_disconnect(handle);
    ndb_mgm_destroy_handle(&handle);
  }
  connected = false;
}

int 
NdbRestarter::getStatus(){
  int retries = 0;
  struct ndb_mgm_cluster_state * status;
  struct ndb_mgm_node_state * node;
  
  ndbNodes.clear();
  mgmNodes.clear();
  apiNodes.clear();

  if (!isConnected())
    return -1;
  
  while(retries < 10){
    status = ndb_mgm_get_status(handle);
    if (status == NULL){
      ndbout << "status==NULL, retries="<<retries<<endl;
      MGMERR(handle);
      retries++;
      continue;
    }
    for (int i = 0; i < status->no_of_nodes; i++){
      node = &status->node_states[i];      
      switch(node->node_type){
      case NDB_MGM_NODE_TYPE_NDB:
	ndbNodes.push_back(*node);
	break;
      case NDB_MGM_NODE_TYPE_MGM:
	mgmNodes.push_back(*node);
	break;
      case NDB_MGM_NODE_TYPE_API:
	apiNodes.push_back(*node);
	break;
      default:
	if(node->node_status == NDB_MGM_NODE_STATUS_UNKNOWN ||
	   node->node_status == NDB_MGM_NODE_STATUS_NO_CONTACT){
	  retries++;
	  ndbNodes.clear();
	  mgmNodes.clear();
	  apiNodes.clear();
	  free(status); 
	  status = NULL;
	  i = status->no_of_nodes;

	  ndbout << "kalle"<< endl;
	  break;
	}
	abort();
	break;
      }
    }
    if(status == 0){
      ndbout << "status == 0" << endl;
      continue;
    }
    free(status);
    return 0;
  }
   
  g_err  << "getStatus failed" << endl;
  return -1;
}


int NdbRestarter::getNumDbNodes(){
  if (!isConnected())
    return -1;

  if (getStatus() != 0)
    return -1;

  return ndbNodes.size();
}

int NdbRestarter::restartAll(bool initial,
			     bool nostart,
			     bool abort){
  
  if (!isConnected())
    return -1;

  if (ndb_mgm_restart2(handle, 0, NULL, initial, 1, abort) == -1) {
    MGMERR(handle);
    g_err  << "Could not restart(stop) all nodes " << endl;
    // return -1; Continue anyway - Magnus
  }
  
  if (waitClusterNoStart(60) != 0){
    g_err << "Cluster didnt enter STATUS_NOT_STARTED within 60s" << endl;
    return -1;
  }
  
  if(nostart){
    g_debug << "restartAll: nostart == true" << endl;
    return 0;
  }

  if (ndb_mgm_start(handle, 0, NULL) == -1) {
    MGMERR(handle);
    g_err  << "Could not restart(start) all nodes " << endl;
    return -1;
  }
  
  return 0;
}

int NdbRestarter::startAll(){
  if (!isConnected())
    return -1;

  if (ndb_mgm_start(handle, 0, NULL) == -1) {
    MGMERR(handle);
    g_err  << "Could not start all nodes " << endl;
    return -1;
  }
  
  return 0;
  
}

int NdbRestarter::startNodes(int * nodes, int num_nodes){
  if (!isConnected())
    return -1;
  
  if (ndb_mgm_start(handle, num_nodes, nodes) != num_nodes) {
    MGMERR(handle);
    g_err  << "Could not start all nodes " << endl;
    return -1;
  }
  
  return 0;
}

int NdbRestarter::insertErrorInNode(int _nodeId, int _error){
  if (!isConnected())
    return -1;

  ndb_mgm_reply reply;
  reply.return_code = 0;

  if (ndb_mgm_insert_error(handle, _nodeId, _error, &reply) == -1){
    MGMERR(handle);
    g_err << "Could not insert error in node with id = "<< _nodeId << endl;
  }
  if(reply.return_code != 0){
    g_err << "Error: " << reply.message << endl;
  }
  return 0;
}

int NdbRestarter::insertErrorInAllNodes(int _error){
  if (!isConnected())
    return -1;

  if (getStatus() != 0)
    return -1;

  int result = 0;
 
  for(size_t i = 0; i < ndbNodes.size(); i++){     
    g_debug << "inserting error in node " << ndbNodes[i].node_id << endl;
    if (insertErrorInNode(ndbNodes[i].node_id, _error) == -1)
      result = -1;
  }
  return result;

}



int NdbRestarter::dumpStateOneNode(int _nodeId, int * _args, int _num_args){
 if (!isConnected())
    return -1;

  ndb_mgm_reply reply;
  reply.return_code = 0;

  if (ndb_mgm_dump_state(handle, _nodeId, _args, _num_args, &reply) == -1){
    MGMERR(handle);
    g_err << "Could not dump state in node with id = "<< _nodeId << endl;
  }

  if(reply.return_code != 0){
    g_err << "Error: " << reply.message << endl;
  }
  return reply.return_code;  
}

int NdbRestarter::dumpStateAllNodes(int * _args, int _num_args){
 if (!isConnected())
    return -1;

 if (getStatus() != 0)
   return -1;

 int result = 0;
 
 for(size_t i = 0; i < ndbNodes.size(); i++){     
   g_debug << "dumping state in node " << ndbNodes[i].node_id << endl;
   if (dumpStateOneNode(ndbNodes[i].node_id, _args, _num_args) == -1)
     result = -1;
 }
 return result;

}


int NdbRestarter::enterSingleUserMode(int _nodeId){
  if (!isConnected())
    return -1;

  ndb_mgm_reply reply;
  reply.return_code = 0;

  if (ndb_mgm_enter_single_user(handle, _nodeId, &reply) == -1){
    MGMERR(handle);
    g_err << "Could not enter single user mode api node = "<< _nodeId << endl;
  }
  
  if(reply.return_code != 0){
    g_err << "Error: " << reply.message << endl;
  }
  
  return reply.return_code;  
}


int NdbRestarter::exitSingleUserMode(){
  if (!isConnected())
    return -1;

  ndb_mgm_reply reply;
  reply.return_code = 0;

  if (ndb_mgm_exit_single_user(handle, &reply) == -1){
    MGMERR(handle);
    g_err << "Could not exit single user mode " << endl;
  }

  if(reply.return_code != 0){
    g_err << "Error: " << reply.message << endl;
  }
  return reply.return_code;  
}