From 327cd42a7f0cf18b8a6b13ac7cf4b04733df07e3 Mon Sep 17 00:00:00 2001
From: unknown <jonas@perch.ndb.mysql.com>
Date: Wed, 7 Nov 2007 20:57:21 +0100
Subject: [PATCH] ndb - bug#32160 (recommit to 5.0)   fix lcp master take over
 bug

ndb/src/kernel/blocks/ERROR_codes.txt:
  new error codes
ndb/src/kernel/blocks/dbdih/Dbdih.hpp:
  add debug code
ndb/src/kernel/blocks/dbdih/DbdihMain.cpp:
  fix master lcp bug
  add 2 new error codes
ndb/test/ndbapi/testNodeRestart.cpp:
  testcase
ndb/test/run-test/daily-basic-tests.txt:
  testcase
---
 ndb/src/kernel/blocks/ERROR_codes.txt     |  7 ++-
 ndb/src/kernel/blocks/dbdih/Dbdih.hpp     | 10 ++++
 ndb/src/kernel/blocks/dbdih/DbdihMain.cpp | 60 ++++++++++++++++++++---
 ndb/test/ndbapi/testNodeRestart.cpp       | 48 ++++++++++++++++++
 ndb/test/run-test/daily-basic-tests.txt   |  4 ++
 5 files changed, 122 insertions(+), 7 deletions(-)

diff --git a/ndb/src/kernel/blocks/ERROR_codes.txt b/ndb/src/kernel/blocks/ERROR_codes.txt
index e45c608b60..2599bf4098 100644
--- a/ndb/src/kernel/blocks/ERROR_codes.txt
+++ b/ndb/src/kernel/blocks/ERROR_codes.txt
@@ -5,7 +5,7 @@ Next DBACC 3002
 Next DBTUP 4014
 Next DBLQH 5043
 Next DBDICT 6007
-Next DBDIH 7183
+Next DBDIH 7195
 Next DBTC 8052
 Next CMVMI 9000
 Next BACKUP 10022
@@ -73,6 +73,11 @@ Delay GCP_SAVEREQ by 10 secs
 
 7180: Crash master during master-take-over in execMASTER_LCPCONF
 
+7193: Dont send LCP_FRAG_ORD to self, and crash when sending first
+      LCP_FRAG_ORD(last)
+
+7194: Force removeNodeFromStored to complete in the middle of MASTER_LCPCONF
+
 ERROR CODES FOR TESTING NODE FAILURE, LOCAL CHECKPOINT HANDLING:
 -----------------------------------------------------------------
 
diff --git a/ndb/src/kernel/blocks/dbdih/Dbdih.hpp b/ndb/src/kernel/blocks/dbdih/Dbdih.hpp
index ca91f56909..e471a95339 100644
--- a/ndb/src/kernel/blocks/dbdih/Dbdih.hpp
+++ b/ndb/src/kernel/blocks/dbdih/Dbdih.hpp
@@ -1291,7 +1291,17 @@ private:
     LcpStatus lcpStatus;
     Uint32 lcpStatusUpdatedPlace;
 
+    struct Save {
+      LcpStatus m_status;
+      Uint32 m_place;
+    } m_saveState[10];
+
     void setLcpStatus(LcpStatus status, Uint32 line){
+      for (Uint32 i = 9; i > 0; i--)
+        m_saveState[i] = m_saveState[i-1];
+      m_saveState[0].m_status = lcpStatus;
+      m_saveState[0].m_place = lcpStatusUpdatedPlace;
+
       lcpStatus = status;
       lcpStatusUpdatedPlace = line;
     }
diff --git a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
index 9191bb3fb9..88d167f098 100644
--- a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
+++ b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
@@ -4764,11 +4764,19 @@ void Dbdih::startRemoveFailedNode(Signal* signal, NodeRecordPtr failedNodePtr)
   }
   
   jam();
-  signal->theData[0] = DihContinueB::ZREMOVE_NODE_FROM_TABLE;
-  signal->theData[1] = failedNodePtr.i;
-  signal->theData[2] = 0; // Tab id
-  sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
-  
+
+  if (!ERROR_INSERTED(7194))
+  {
+    signal->theData[0] = DihContinueB::ZREMOVE_NODE_FROM_TABLE;
+    signal->theData[1] = failedNodePtr.i;
+    signal->theData[2] = 0; // Tab id
+    sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
+  }    
+  else
+  {
+    ndbout_c("7194 Not starting ZREMOVE_NODE_FROM_TABLE");
+  }
+
   setLocalNodefailHandling(signal, failedNodePtr.i, NF_REMOVE_NODE_FROM_TABLE);
 }//Dbdih::startRemoveFailedNode()
 
@@ -5676,12 +5684,22 @@ Dbdih::checkEmptyLcpComplete(Signal *signal){
     
     signal->theData[0] = 7012;
     execDUMP_STATE_ORD(signal);
+
+    if (ERROR_INSERTED(7194))
+    {
+      ndbout_c("7194 starting ZREMOVE_NODE_FROM_TABLE");
+      signal->theData[0] = DihContinueB::ZREMOVE_NODE_FROM_TABLE;
+      signal->theData[1] = c_lcpMasterTakeOverState.failedNodeId;
+      signal->theData[2] = 0; // Tab id
+      sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
+    }
     
     c_lcpMasterTakeOverState.set(LMTOS_INITIAL, __LINE__);
     MasterLCPReq * const req = (MasterLCPReq *)&signal->theData[0];
     req->masterRef = reference();
     req->failedNodeId = c_lcpMasterTakeOverState.failedNodeId;
     sendLoopMacro(MASTER_LCPREQ, sendMASTER_LCPREQ);
+
   } else {
     sendMASTER_LCPCONF(signal);
   }
@@ -5998,6 +6016,15 @@ void Dbdih::execMASTER_LCPCONF(Signal* signal)
 {
   const MasterLCPConf * const conf = (MasterLCPConf *)&signal->theData[0];
   jamEntry();
+
+  if (ERROR_INSERTED(7194))
+  {
+    ndbout_c("delaying MASTER_LCPCONF due to error 7194");
+    sendSignalWithDelay(reference(), GSN_MASTER_LCPCONF, signal, 
+                        300, signal->getLength());
+    return;
+  }
+
   Uint32 senderNodeId = conf->senderNodeId;
   MasterLCPConf::State lcpState = (MasterLCPConf::State)conf->lcpState;
   const Uint32 failedNodeId = conf->failedNodeId;
@@ -6132,7 +6159,6 @@ void Dbdih::MASTER_LCPhandling(Signal* signal, Uint32 failedNodeId)
 #endif
     
       c_lcpState.keepGci = SYSFILE->keepGCI;
-      c_lcpState.setLcpStatus(LCP_START_LCP_ROUND, __LINE__);
       startLcpRoundLoopLab(signal, 0, 0);
       break;
     }
@@ -9924,6 +9950,8 @@ void Dbdih::sendLastLCP_FRAG_ORD(Signal* signal)
       if(ERROR_INSERTED(7075)){
 	continue;
       }
+
+      CRASH_INSERTION(7193);
       BlockReference ref = calcLqhBlockRef(nodePtr.i);
       sendSignal(ref, GSN_LCP_FRAG_ORD, signal,LcpFragOrd::SignalLength, JBB);
     }
@@ -10121,6 +10149,13 @@ Dbdih::checkLcpAllTablesDoneInLqh(){
   CRASH_INSERTION2(7017, !isMaster());
   
   c_lcpState.setLcpStatus(LCP_TAB_COMPLETED, __LINE__);
+
+  if (ERROR_INSERTED(7194))
+  {
+    ndbout_c("CLEARING 7194");
+    CLEAR_ERROR_INSERT_VALUE;
+  }
+  
   return true;
 }
 
@@ -10276,6 +10311,11 @@ Dbdih::sendLCP_FRAG_ORD(Signal* signal,
   
   BlockReference ref = calcLqhBlockRef(replicaPtr.p->procNode);
   
+  if (ERROR_INSERTED(7193) && replicaPtr.p->procNode == getOwnNodeId())
+  {
+    return;
+  }
+  
   LcpFragOrd * const lcpFragOrd = (LcpFragOrd *)&signal->theData[0];
   lcpFragOrd->tableId    = info.tableId;
   lcpFragOrd->fragmentId = info.fragId;
@@ -13686,6 +13726,14 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal)
       ("immediateLcpStart = %d masterLcpNodeId = %d",
        c_lcpState.immediateLcpStart,
        refToNode(c_lcpState.m_masterLcpDihRef));
+
+    for (Uint32 i = 0; i<10; i++)
+    {
+      infoEvent("%u : status: %u place: %u", i, 
+                c_lcpState.m_saveState[i].m_status,
+                c_lcpState.m_saveState[i].m_place);
+    }
+    
     infoEvent("-- Node %d LCP STATE --", getOwnNodeId());
   }
 
diff --git a/ndb/test/ndbapi/testNodeRestart.cpp b/ndb/test/ndbapi/testNodeRestart.cpp
index 03a60b1b52..12b0187b71 100644
--- a/ndb/test/ndbapi/testNodeRestart.cpp
+++ b/ndb/test/ndbapi/testNodeRestart.cpp
@@ -1347,6 +1347,51 @@ runBug28717(NDBT_Context* ctx, NDBT_Step* step)
   return NDBT_OK;
 }
 
+int
+runBug32160(NDBT_Context* ctx, NDBT_Step* step)
+{
+  int result = NDBT_OK;
+  int loops = ctx->getNumLoops();
+  int records = ctx->getNumRecords();
+  Ndb* pNdb = GETNDB(step);
+  NdbRestarter res;
+
+  if (res.getNumDbNodes() < 2)
+  {
+    return NDBT_OK;
+  }
+
+  int master = res.getMasterNodeId();
+  int next = res.getNextMasterNodeId(master);
+
+  if (res.insertErrorInNode(next, 7194))
+  {
+    return NDBT_FAILED;
+  }
+
+  int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };    
+  if (res.dumpStateOneNode(master, val2, 2))
+    return NDBT_FAILED;
+
+  if (res.insertErrorInNode(master, 7193))
+    return NDBT_FAILED;
+
+  int val3[] = { 7099 };
+  if (res.dumpStateOneNode(master, val3, 1))
+    return NDBT_FAILED;
+
+  if (res.waitNodesNoStart(&master, 1))
+    return NDBT_FAILED;
+
+  if (res.startNodes(&master, 1))
+    return NDBT_FAILED;
+
+  if (res.waitClusterStarted())
+    return NDBT_FAILED;
+  
+  return NDBT_OK;
+}
+
 NDBT_TESTSUITE(testNodeRestart);
 TESTCASE("NoLoad", 
 	 "Test that one node at a time can be stopped and then restarted "\
@@ -1686,6 +1731,9 @@ TESTCASE("Bug28717", ""){
 TESTCASE("Bug29364", ""){
   INITIALIZER(runBug29364);
 }
+TESTCASE("Bug32160", ""){
+  INITIALIZER(runBug32160);
+}
 NDBT_TESTSUITE_END(testNodeRestart);
 
 int main(int argc, const char** argv){
diff --git a/ndb/test/run-test/daily-basic-tests.txt b/ndb/test/run-test/daily-basic-tests.txt
index 4f7ba26bf2..7b4a4ca0e2 100644
--- a/ndb/test/run-test/daily-basic-tests.txt
+++ b/ndb/test/run-test/daily-basic-tests.txt
@@ -497,6 +497,10 @@ max-time: 1000
 cmd: testNodeRestart
 args: -n Bug26481 T1
 
+max-time: 300
+cmd: testNodeRestart
+args: -n Bug32160 T1
+
 # OLD FLEX
 max-time: 500
 cmd: flexBench
-- 
2.30.9