diff --git a/ndb/include/kernel/signaldata/ArbitSignalData.hpp b/ndb/include/kernel/signaldata/ArbitSignalData.hpp index f255b8dcbbe6db960470d21cee4341312941d081..34b73644a13eaf013e82f2dd38aa8ce05d506a7d 100644 --- a/ndb/include/kernel/signaldata/ArbitSignalData.hpp +++ b/ndb/include/kernel/signaldata/ArbitSignalData.hpp @@ -94,13 +94,14 @@ public: // arbitration result LoseNodes = 41, // lose on ndb node count - WinGroups = 42, // we win, no need for arbitration - LoseGroups = 43, // we lose, missing node group - Partitioning = 44, // possible network partitioning - WinChoose = 45, // positive reply - LoseChoose = 46, // negative reply - LoseNorun = 47, // arbitrator required but not running - LoseNocfg = 48, // arbitrator required but none configured + WinNodes = 42, // win on ndb node count + WinGroups = 43, // we win, no need for arbitration + LoseGroups = 44, // we lose, missing node group + Partitioning = 45, // possible network partitioning + WinChoose = 46, // positive reply + LoseChoose = 47, // negative reply + LoseNorun = 48, // arbitrator required but not running + LoseNocfg = 49, // arbitrator required but none configured // general error codes ErrTicket = 91, // invalid arbitrator-ticket diff --git a/ndb/src/common/debugger/EventLogger.cpp b/ndb/src/common/debugger/EventLogger.cpp index 8a09be9a0a7399c0b3e8ec218511039749f98c5c..59be0affcb4c9196944886e9415bb3fc131681c0 100644 --- a/ndb/src/common/debugger/EventLogger.cpp +++ b/ndb/src/common/debugger/EventLogger.cpp @@ -421,6 +421,11 @@ EventLogger::getText(char * m_text, size_t m_text_len, "%sArbitration check lost - less than 1/2 nodes left", theNodeId); break; + case ArbitCode::WinNodes: + BaseString::snprintf(m_text, m_text_len, + "%sArbitration check won - all node groups and more than 1/2 nodes left", + theNodeId); + break; case ArbitCode::WinGroups: BaseString::snprintf(m_text, m_text_len, "%sArbitration check won - node group majority", diff --git a/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp b/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp index a433d72744e4a1233d9ee902377b5240ab8184d1..da8596076ec2263519b38938ecfcf4ad7ac86129 100644 --- a/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp +++ b/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp @@ -2946,6 +2946,12 @@ void Qmgr::sendPrepFailReq(Signal* signal, Uint16 aNode) * the "handle" routines. */ +/** + * Should < 1/2 nodes die unconditionally. Affects only >= 3-way + * replication. + */ +static const bool g_ndb_arbit_one_half_rule = false; + /** * Config signals are logically part of CM_INIT. */ @@ -3157,7 +3163,8 @@ Qmgr::handleArbitCheck(Signal* signal) ndbrequire(cpresident == getOwnNodeId()); NodeBitmask ndbMask; computeArbitNdbMask(ndbMask); - if (2 * ndbMask.count() < cnoOfNodes) { + if (g_ndb_arbit_one_half_rule && + 2 * ndbMask.count() < cnoOfNodes) { jam(); arbitRec.code = ArbitCode::LoseNodes; } else { @@ -3181,6 +3188,11 @@ Qmgr::handleArbitCheck(Signal* signal) case CheckNodeGroups::Partitioning: jam(); arbitRec.code = ArbitCode::Partitioning; + if (g_ndb_arbit_one_half_rule && + 2 * ndbMask.count() > cnoOfNodes) { + jam(); + arbitRec.code = ArbitCode::WinNodes; + } break; default: ndbrequire(false); @@ -3190,8 +3202,12 @@ Qmgr::handleArbitCheck(Signal* signal) switch (arbitRec.code) { case ArbitCode::LoseNodes: jam(); + case ArbitCode::LoseGroups: + jam(); goto crashme; - case ArbitCode::WinGroups: + case ArbitCode::WinNodes: + jam(); + case ArbitCode::WinGroups: jam(); if (arbitRec.state == ARBIT_RUN) { jam(); @@ -3200,9 +3216,6 @@ Qmgr::handleArbitCheck(Signal* signal) arbitRec.state = ARBIT_INIT; arbitRec.newstate = true; break; - case ArbitCode::LoseGroups: - jam(); - goto crashme; case ArbitCode::Partitioning: if (arbitRec.state == ARBIT_RUN) { jam(); @@ -3762,8 +3775,7 @@ Qmgr::execARBIT_CHOOSEREF(Signal* signal) } /** - * Handle CRASH state. We must crash immediately. But it - * would be nice to wait until event reports have been sent. + * Handle CRASH state. We must crash immediately. * XXX tell other nodes in our party to crash too. */ void @@ -3773,12 +3785,11 @@ Qmgr::stateArbitCrash(Signal* signal) if (arbitRec.newstate) { jam(); CRASH_INSERTION((Uint32)910 + arbitRec.state); - arbitRec.setTimestamp(); arbitRec.code = 0; arbitRec.newstate = false; } -#if 0 +#ifdef ndb_arbit_crash_wait_for_event_report_to_get_out if (! (arbitRec.getTimediff() > getArbitTimeout())) return; #endif