Commit 3cea3705 authored by unknown's avatar unknown

BUG#13985 ndb_mgm "status" command can return incorrect data node status

Second half of the fix for this bug.

This patch forces a heartbeat to be sent and will wait (a little while)
for replies. This way we can get

> all status
X starting
Y started
X started
>

which is okay as the new status comes after the old status, always.
There is the slimmest of opportunities to get output like above where only half
the cluster appears started.

This is about the best we can do with a command line interactive program.


ndb/src/mgmsrv/MgmtSrvr.cpp:
  Add updateStatus method to MgmtSrvr.
  
  Used to force an update of node status for the nodes.
ndb/src/mgmsrv/MgmtSrvr.hpp:
  add prototype for updateStatus(NodeBitmask) method
ndb/src/mgmsrv/Services.cpp:
  When status is queried, force an update of the status in the mgm server. (i.e. send heartbeats)
ndb/src/ndbapi/ClusterMgr.cpp:
  new DEBUG_REG define for debugging registration and HB code.
  
  Add ClusterMgr::forceHB(NodeBitmask) which sends a HB signal to each node in
  the bitmask and then waits for a REGCONF from them.
  Will only wait for a total of 1 second, not blocking an end client for too long.
  
  On receipt of HB, clear the nodeId in the waiting for bitmask and signal any
  waiting threads.
ndb/src/ndbapi/ClusterMgr.hpp:
  Add ::forceHB(NodeBitmask) and associated variables
parent 746fc2f4
......@@ -1412,6 +1412,12 @@ MgmtSrvr::exitSingleUser(int * stopCount, bool abort)
#include <ClusterMgr.hpp>
void
MgmtSrvr::updateStatus(NodeBitmask nodes)
{
theFacade->theClusterMgr->forceHB(nodes);
}
int
MgmtSrvr::status(int nodeId,
ndb_mgm_node_status * _status,
......
......@@ -487,6 +487,8 @@ public:
void get_connected_nodes(NodeBitmask &connected_nodes) const;
SocketServer *get_socket_server() { return m_socket_server; }
void updateStatus(NodeBitmask nodes);
//**************************************************************************
private:
//**************************************************************************
......
......@@ -951,6 +951,9 @@ printNodeStatus(OutputStream *output,
MgmtSrvr &mgmsrv,
enum ndb_mgm_node_type type) {
NodeId nodeId = 0;
NodeBitmask hbnodes;
mgmsrv.get_connected_nodes(hbnodes);
mgmsrv.updateStatus(hbnodes);
while(mgmsrv.getNextNodeId(&nodeId, type)) {
enum ndb_mgm_node_status status;
Uint32 startPhase = 0,
......
......@@ -39,6 +39,8 @@
int global_flag_send_heartbeat_now= 0;
//#define DEBUG_REG
// Just a C wrapper for threadMain
extern "C"
void*
......@@ -67,6 +69,8 @@ ClusterMgr::ClusterMgr(TransporterFacade & _facade):
DBUG_ENTER("ClusterMgr::ClusterMgr");
ndbSetOwnVersion();
clusterMgrThreadMutex = NdbMutex_Create();
waitForHBMutex= NdbMutex_Create();
waitForHBCond= NdbCondition_Create();
noOfAliveNodes= 0;
noOfConnectedNodes= 0;
theClusterMgrThread= 0;
......@@ -78,6 +82,8 @@ ClusterMgr::~ClusterMgr()
{
DBUG_ENTER("ClusterMgr::~ClusterMgr");
doStop();
NdbCondition_Destroy(waitForHBCond);
NdbMutex_Destroy(waitForHBMutex);
NdbMutex_Destroy(clusterMgrThreadMutex);
DBUG_VOID_RETURN;
}
......@@ -163,6 +169,49 @@ ClusterMgr::doStop( ){
DBUG_VOID_RETURN;
}
void
ClusterMgr::forceHB(NodeBitmask waitFor)
{
theFacade.lock_mutex();
global_flag_send_heartbeat_now= 1;
waitForHBFromNodes= waitFor;
#ifdef DEBUG_REG
char buf[128];
ndbout << "Waiting for HB from " << waitForHBFromNodes.getText(buf) << endl;
#endif
NdbApiSignal signal(numberToRef(API_CLUSTERMGR, theFacade.ownId()));
signal.theVerId_signalNumber = GSN_API_REGREQ;
signal.theReceiversBlockNumber = QMGR;
signal.theTrace = 0;
signal.theLength = ApiRegReq::SignalLength;
ApiRegReq * req = CAST_PTR(ApiRegReq, signal.getDataPtrSend());
req->ref = numberToRef(API_CLUSTERMGR, theFacade.ownId());
req->version = NDB_VERSION;
int nodeId= 0;
for(int i=0;
NodeBitmask::NotFound!=(nodeId= waitForHBFromNodes.find(i));
i= nodeId+1)
{
#ifdef DEBUG_REG
ndbout << "FORCE HB to " << nodeId << endl;
#endif
theFacade.sendSignalUnCond(&signal, nodeId);
}
theFacade.unlock_mutex();
NdbMutex_Lock(waitForHBMutex);
NdbCondition_WaitTimeout(waitForHBCond, waitForHBMutex, 1000);
NdbMutex_Unlock(waitForHBMutex);
#ifdef DEBUG_REG
ndbout << "Still waiting for HB from " << waitForHBFromNodes.getText(buf) << endl;
#endif
}
void
ClusterMgr::threadMain( ){
NdbApiSignal signal(numberToRef(API_CLUSTERMGR, theFacade.ownId()));
......@@ -226,7 +275,7 @@ ClusterMgr::threadMain( ){
if (theNode.m_info.m_type == NodeInfo::REP) {
signal.theReceiversBlockNumber = API_CLUSTERMGR;
}
#if 0
#ifdef DEBUG_REG
ndbout_c("ClusterMgr: Sending API_REGREQ to node %d", (int)nodeId);
#endif
theFacade.sendSignalUnCond(&signal, nodeId);
......@@ -278,7 +327,7 @@ ClusterMgr::execAPI_REGREQ(const Uint32 * theData){
const ApiRegReq * const apiRegReq = (ApiRegReq *)&theData[0];
const NodeId nodeId = refToNode(apiRegReq->ref);
#if 0
#ifdef DEBUG_REG
ndbout_c("ClusterMgr: Recd API_REGREQ from node %d", nodeId);
#endif
......@@ -319,7 +368,7 @@ ClusterMgr::execAPI_REGCONF(const Uint32 * theData){
const ApiRegConf * const apiRegConf = (ApiRegConf *)&theData[0];
const NodeId nodeId = refToNode(apiRegConf->qmgrRef);
#if 0
#ifdef DEBUG_REG
ndbout_c("ClusterMgr: Recd API_REGCONF from node %d", nodeId);
#endif
......@@ -351,6 +400,13 @@ ClusterMgr::execAPI_REGCONF(const Uint32 * theData){
if (node.m_info.m_type != NodeInfo::REP) {
node.hbFrequency = (apiRegConf->apiHeartbeatFrequency * 10) - 50;
}
waitForHBFromNodes.clear(nodeId);
if(waitForHBFromNodes.isclear())
{
NdbMutex_Lock(waitForHBMutex);
NdbCondition_Signal(waitForHBCond);
NdbMutex_Unlock(waitForHBMutex);
}
}
void
......@@ -379,6 +435,13 @@ ClusterMgr::execAPI_REGREF(const Uint32 * theData){
default:
break;
}
waitForHBFromNodes.clear(nodeId);
if(waitForHBFromNodes.isclear())
{
NdbMutex_Lock(waitForHBMutex);
NdbCondition_Signal(waitForHBCond);
NdbMutex_Unlock(waitForHBMutex);
}
}
void
......
......@@ -50,6 +50,8 @@ public:
void doStop();
void startThread();
void forceHB(NodeBitmask waitFor);
private:
void threadMain();
......@@ -86,6 +88,10 @@ private:
Node theNodes[MAX_NODES];
NdbThread* theClusterMgrThread;
NodeBitmask waitForHBFromNodes; // used in forcing HBs
NdbMutex* waitForHBMutex;
NdbCondition* waitForHBCond;
/**
* Used for controlling start/stop of the thread
*/
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment