Commit d36dbbbb authored by unknown's avatar unknown

ndb - bug#21271

  make each fragment use own LCP file, so that (s/n)r can use different LCP-no for different fragments


storage/ndb/include/kernel/signaldata/FsOpenReq.hpp:
  Add fragment id to LCP filename
storage/ndb/src/kernel/blocks/ERROR_codes.txt:
  Add new error code
storage/ndb/src/kernel/blocks/backup/Backup.cpp:
  put each fragment in own LCP file
storage/ndb/src/kernel/blocks/backup/Backup.hpp:
  put each fragment in own LCP file
storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp:
  Use fifo lists
storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp:
  1) use fifo lists
  2) restore each fragment separatly
  3) add error codes
storage/ndb/src/kernel/blocks/restore.cpp:
  Add fragment id to LCP filename
storage/ndb/src/kernel/blocks/ndbfs/Filename.cpp:
  Add fragment id to LCP filename
storage/ndb/test/ndbapi/testNodeRestart.cpp:
  Add testcase
storage/ndb/test/run-test/daily-basic-tests.txt:
  add testcase
parent 137c3f95
......@@ -136,9 +136,11 @@ private:
*/
static Uint32 v5_getLcpNo(const Uint32 fileNumber[]);
static Uint32 v5_getTableId(const Uint32 fileNumber[]);
static Uint32 v5_getFragmentId(const Uint32 fileNumber[]);
static void v5_setLcpNo(Uint32 fileNumber[], Uint32 no);
static void v5_setTableId(Uint32 fileNumber[], Uint32 no);
static void v5_setFragmentId(Uint32 fileNumber[], Uint32 no);
};
/**
......@@ -318,5 +320,15 @@ void FsOpenReq::v5_setLcpNo(Uint32 fileNumber[], Uint32 val){
fileNumber[1] = val;
}
inline
Uint32 FsOpenReq::v5_getFragmentId(const Uint32 fileNumber[]){
return fileNumber[2];
}
inline
void FsOpenReq::v5_setFragmentId(Uint32 fileNumber[], Uint32 val){
fileNumber[2] = val;
}
#endif
......@@ -3,7 +3,7 @@ Next NDBCNTR 1000
Next NDBFS 2000
Next DBACC 3002
Next DBTUP 4024
Next DBLQH 5043
Next DBLQH 5045
Next DBDICT 6007
Next DBDIH 7177
Next DBTC 8037
......
......@@ -3796,20 +3796,28 @@ Backup::fragmentCompleted(Signal* signal, BackupFilePtr filePtr)
BackupRecordPtr ptr;
c_backupPool.getPtr(ptr, filePtr.p->backupPtr);
BackupFragmentConf * conf = (BackupFragmentConf*)signal->getDataPtrSend();
conf->backupId = ptr.p->backupId;
conf->backupPtr = ptr.i;
conf->tableId = filePtr.p->tableId;
conf->fragmentNo = filePtr.p->fragmentNo;
conf->noOfRecordsLow = (Uint32)(op.noOfRecords & 0xFFFFFFFF);
conf->noOfRecordsHigh = (Uint32)(op.noOfRecords >> 32);
conf->noOfBytesLow = (Uint32)(op.noOfBytes & 0xFFFFFFFF);
conf->noOfBytesHigh = (Uint32)(op.noOfBytes >> 32);
sendSignal(ptr.p->masterRef, GSN_BACKUP_FRAGMENT_CONF, signal,
BackupFragmentConf::SignalLength, JBB);
ptr.p->m_gsn = GSN_BACKUP_FRAGMENT_CONF;
ptr.p->slaveState.setState(STARTED);
if (ptr.p->is_lcp())
{
ptr.p->slaveState.setState(STOPPING);
filePtr.p->operation.dataBuffer.eof();
}
else
{
BackupFragmentConf * conf = (BackupFragmentConf*)signal->getDataPtrSend();
conf->backupId = ptr.p->backupId;
conf->backupPtr = ptr.i;
conf->tableId = filePtr.p->tableId;
conf->fragmentNo = filePtr.p->fragmentNo;
conf->noOfRecordsLow = (Uint32)(op.noOfRecords & 0xFFFFFFFF);
conf->noOfRecordsHigh = (Uint32)(op.noOfRecords >> 32);
conf->noOfBytesLow = (Uint32)(op.noOfBytes & 0xFFFFFFFF);
conf->noOfBytesHigh = (Uint32)(op.noOfBytes >> 32);
sendSignal(ptr.p->masterRef, GSN_BACKUP_FRAGMENT_CONF, signal,
BackupFragmentConf::SignalLength, JBB);
ptr.p->m_gsn = GSN_BACKUP_FRAGMENT_CONF;
ptr.p->slaveState.setState(STARTED);
}
return;
}
......@@ -4719,29 +4727,8 @@ Backup::execLCP_PREPARE_REQ(Signal* signal)
BackupRecordPtr ptr;
c_backupPool.getPtr(ptr, req.backupPtr);
bool first= true;
TablePtr tabPtr;
if(ptr.p->tables.first(tabPtr) && tabPtr.p->tableId != req.tableId)
{
jam();
first= false;
tabPtr.p->attributes.release();
tabPtr.p->fragments.release();
ptr.p->tables.release();
ptr.p->errorCode = 0;
}
if(ptr.p->tables.first(tabPtr) && ptr.p->errorCode == 0)
{
jam();
FragmentPtr fragPtr;
tabPtr.p->fragments.getPtr(fragPtr, 0);
fragPtr.p->fragmentId = req.fragmentId;
lcp_open_file_done(signal, ptr);
return;
}
else if(ptr.p->errorCode == 0)
if(ptr.p->errorCode == 0)
{
jam();
FragmentPtr fragPtr;
......@@ -4759,6 +4746,9 @@ Backup::execLCP_PREPARE_REQ(Signal* signal)
fragPtr.p->scanned = 0;
fragPtr.p->scanning = 0;
fragPtr.p->tableId = req.tableId;
ptr.p->backupId= req.backupId;
lcp_open_file(signal, ptr);
}
else
{
......@@ -4767,43 +4757,38 @@ Backup::execLCP_PREPARE_REQ(Signal* signal)
tabPtr.p->fragments.getPtr(fragPtr, 0);
fragPtr.p->fragmentId = req.fragmentId;
defineBackupRef(signal, ptr, ptr.p->errorCode);
return;
}
if(first)
{
jam();
// start file thread
ptr.p->backupId= req.backupId;
lcp_open_file(signal, ptr);
return;
}
else
{
jam();
ndbrequire(ptr.p->backupId == req.backupId);
}
/**
* Close previous file
*/
jam();
BackupFilePtr filePtr;
c_backupFilePool.getPtr(filePtr, ptr.p->dataFilePtr);
filePtr.p->operation.dataBuffer.eof();
}
void
Backup::lcp_close_file_conf(Signal* signal, BackupRecordPtr ptr)
{
if(!ptr.p->tables.isEmpty())
{
jam();
lcp_open_file(signal, ptr);
return;
}
jam();
TablePtr tabPtr;
ndbrequire(ptr.p->tables.first(tabPtr));
Uint32 tableId = tabPtr.p->tableId;
FragmentPtr fragPtr;
tabPtr.p->fragments.getPtr(fragPtr, 0);
Uint32 fragmentId = fragPtr.p->fragmentId;
tabPtr.p->attributes.release();
tabPtr.p->fragments.release();
ptr.p->tables.release();
ptr.p->errorCode = 0;
lcp_send_end_lcp_conf(signal, ptr);
BackupFragmentConf * conf = (BackupFragmentConf*)signal->getDataPtrSend();
conf->backupId = ptr.p->backupId;
conf->backupPtr = ptr.i;
conf->tableId = tableId;
conf->fragmentNo = fragmentId;
conf->noOfRecordsLow = 0;
conf->noOfRecordsHigh = 0;
conf->noOfBytesLow = 0;
conf->noOfBytesHigh = 0;
sendSignal(ptr.p->masterRef, GSN_BACKUP_FRAGMENT_CONF, signal,
BackupFragmentConf::SignalLength, JBB);
}
void
......@@ -4840,6 +4825,7 @@ Backup::lcp_open_file(Signal* signal, BackupRecordPtr ptr)
FsOpenReq::setSuffix(req->fileNumber, FsOpenReq::S_DATA);
FsOpenReq::v5_setLcpNo(req->fileNumber, fragPtr.p->lcp_no);
FsOpenReq::v5_setTableId(req->fileNumber, tabPtr.p->tableId);
FsOpenReq::v5_setFragmentId(req->fileNumber, fragPtr.p->fragmentId);
sendSignal(NDBFS_REF, GSN_FSOPENREQ, signal, FsOpenReq::SignalLength, JBA);
}
......@@ -4872,38 +4858,15 @@ Backup::execEND_LCPREQ(Signal* signal)
c_backupPool.getPtr(ptr, req->backupPtr);
ndbrequire(ptr.p->backupId == req->backupId);
ptr.p->slaveState.setState(STOPPING);
TablePtr tabPtr;
if(ptr.p->tables.first(tabPtr))
{
tabPtr.p->attributes.release();
tabPtr.p->fragments.release();
ptr.p->tables.release();
BackupFilePtr filePtr;
c_backupFilePool.getPtr(filePtr, ptr.p->dataFilePtr);
filePtr.p->operation.dataBuffer.eof();
return;
}
lcp_send_end_lcp_conf(signal, ptr);
}
void
Backup::lcp_send_end_lcp_conf(Signal* signal, BackupRecordPtr ptr)
{
EndLcpConf* conf= (EndLcpConf*)signal->getDataPtr();
conf->senderData = ptr.p->clientData;
conf->senderRef = reference();
ptr.p->errorCode = 0;
ptr.p->slaveState.setState(CLEANING);
ptr.p->slaveState.setState(INITIAL);
ptr.p->slaveState.setState(DEFINING);
ptr.p->slaveState.setState(DEFINED);
EndLcpConf* conf= (EndLcpConf*)signal->getDataPtr();
conf->senderData = ptr.p->clientData;
conf->senderRef = reference();
sendSignal(ptr.p->masterRef, GSN_END_LCPCONF,
signal, EndLcpConf::SignalLength, JBB);
}
......@@ -645,7 +645,6 @@ public:
void lcp_open_file(Signal* signal, BackupRecordPtr ptr);
void lcp_open_file_done(Signal*, BackupRecordPtr);
void lcp_close_file_conf(Signal* signal, BackupRecordPtr);
void lcp_send_end_lcp_conf(Signal* signal, BackupRecordPtr);
bool ready_to_write(bool ready, Uint32 sz, bool eof, BackupFile *fileP);
};
......
......@@ -2794,10 +2794,10 @@ private:
/*THIS VARIABLE IS THE HEAD OF A LINKED LIST OF FRAGMENTS WAITING TO BE */
/*RESTORED FROM DISK. */
/* ------------------------------------------------------------------------- */
DLList<Fragrecord> c_lcp_waiting_fragments; // StartFragReq'ed
DLList<Fragrecord> c_lcp_restoring_fragments; // Restoring as we speek
DLList<Fragrecord> c_lcp_complete_fragments; // Restored
DLList<Fragrecord> c_redo_complete_fragments; // Redo'ed
DLFifoList<Fragrecord> c_lcp_waiting_fragments; // StartFragReq'ed
DLFifoList<Fragrecord> c_lcp_restoring_fragments; // Restoring as we speek
DLFifoList<Fragrecord> c_lcp_complete_fragments; // Restored
DLFifoList<Fragrecord> c_redo_complete_fragments; // Redo'ed
/* ------------------------------------------------------------------------- */
/*USED DURING SYSTEM RESTART, INDICATES THE OLDEST GCI THAT CAN BE RESTARTED */
......
......@@ -427,17 +427,20 @@ void Dblqh::execCONTINUEB(Signal* signal)
signal->theData[0] = fragptr.p->tabRef;
signal->theData[1] = fragptr.p->fragId;
sendSignal(DBACC_REF, GSN_EXPANDCHECK2, signal, 2, JBB);
Ptr<Fragrecord> save = fragptr;
c_redo_complete_fragments.next(fragptr);
signal->theData[0] = ZENABLE_EXPAND_CHECK;
signal->theData[1] = fragptr.i;
sendSignal(DBLQH_REF, GSN_CONTINUEB, signal, 2, JBB);
c_redo_complete_fragments.remove(save);
return;
}
else
{
jam();
c_redo_complete_fragments.remove();
ndbrequire(c_redo_complete_fragments.isEmpty());
StartRecConf * conf = (StartRecConf*)signal->getDataPtrSend();
conf->startingNodeId = getOwnNodeId();
sendSignal(cmasterDihBlockref, GSN_START_RECCONF, signal,
......@@ -11269,8 +11272,22 @@ void Dblqh::execLCP_PREPARE_CONF(Signal* signal)
else
#endif
{
sendSignal(BACKUP_REF, GSN_BACKUP_FRAGMENT_REQ, signal,
BackupFragmentReq::SignalLength, JBB);
if (ERROR_INSERTED(5044) &&
(fragptr.p->tabRef == c_error_insert_table_id) &&
fragptr.p->fragId) // Not first frag
{
/**
* Force CRASH_INSERTION in 10s
*/
ndbout_c("table: %d frag: %d", fragptr.p->tabRef, fragptr.p->fragId);
SET_ERROR_INSERT_VALUE(5027);
sendSignalWithDelay(reference(), GSN_START_RECREQ, signal, 10000, 1);
}
else
{
sendSignal(BACKUP_REF, GSN_BACKUP_FRAGMENT_REQ, signal,
BackupFragmentReq::SignalLength, JBB);
}
}
}
}
......@@ -13745,7 +13762,7 @@ void Dblqh::execSTART_FRAGREQ(Signal* signal)
fragptr.p->newestGci = cnewestGci;
}//if
if (lcpNo == ZNIL || fragptr.i != tabptr.p->fragrec[0])
if (lcpNo == ZNIL)
{
jam();
/**
......@@ -18470,10 +18487,17 @@ Dblqh::execDUMP_STATE_ORD(Signal* signal)
}
}
if (dumpState->args[0] == DumpStateOrd::LqhErrorInsert5042 && signal->getLength() == 2)
if (dumpState->args[0] == DumpStateOrd::LqhErrorInsert5042 && (signal->getLength() >= 2))
{
c_error_insert_table_id = dumpState->args[1];
SET_ERROR_INSERT_VALUE(5042);
if (signal->getLength() == 2)
{
SET_ERROR_INSERT_VALUE(5042);
}
else
{
SET_ERROR_INSERT_VALUE(dumpState->args[2]);
}
}
TcConnectionrec *regTcConnectionrec = tcConnectionrec;
......
......@@ -149,7 +149,8 @@ Filename::set(Filename::NameSpec& spec,
{
Uint32 tableId = FsOpenReq::v5_getTableId(filenumber);
Uint32 lcpNo = FsOpenReq::v5_getLcpNo(filenumber);
BaseString::snprintf(buf, sizeof(buf), "LCP/%d/T%d", lcpNo, tableId);
Uint32 fragId = FsOpenReq::v5_getFragmentId(filenumber);
BaseString::snprintf(buf, sizeof(buf), "LCP/%d/T%dF%d", lcpNo, tableId, fragId);
strcat(theName, buf);
break;
}
......
......@@ -264,7 +264,7 @@ Restore::init_file(const RestoreLcpReq* req, FilePtr file_ptr)
file_ptr.p->m_status = File::FIRST_READ;
file_ptr.p->m_table_id = req->tableId;
file_ptr.p->m_fragment_id = RNIL;
file_ptr.p->m_fragment_id = req->fragmentId;
file_ptr.p->m_table_version = RNIL;
file_ptr.p->m_bytes_left = 0; // Bytes read from FS
......@@ -361,6 +361,7 @@ Restore::open_file(Signal* signal, FilePtr file_ptr, Uint32 lcpNo)
FsOpenReq::setSuffix(req->fileNumber, FsOpenReq::S_DATA);
FsOpenReq::v5_setLcpNo(req->fileNumber, lcpNo);
FsOpenReq::v5_setTableId(req->fileNumber, file_ptr.p->m_table_id);
FsOpenReq::v5_setFragmentId(req->fileNumber, file_ptr.p->m_fragment_id);
sendSignal(NDBFS_REF, GSN_FSOPENREQ, signal, FsOpenReq::SignalLength, JBA);
}
......@@ -475,6 +476,11 @@ Restore::restore_next(Signal* signal, FilePtr file_ptr)
*/
ndbout_c("records: %d len: %x left: %d",
status & File::READING_RECORDS, 4*len, left);
if (unlikely((status & File:: FILE_THREAD_RUNNING) == 0))
{
ndbrequire(false);
}
len= 0;
break;
}
......
......@@ -931,6 +931,41 @@ int runBug20185(NDBT_Context* ctx, NDBT_Step* step){
return NDBT_OK;
}
int
runBug21271(NDBT_Context* ctx, NDBT_Step* step){
int result = NDBT_OK;
int loops = ctx->getNumLoops();
int records = ctx->getNumRecords();
NdbRestarter restarter;
HugoOperations hugoOps(*ctx->getTab());
Ndb* pNdb = GETNDB(step);
const int masterNode = restarter.getMasterNodeId();
const int nodeId = restarter.getRandomNodeSameNodeGroup(masterNode, rand());
int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
if (restarter.dumpStateOneNode(nodeId, val2, 2))
return NDBT_FAILED;
Uint32 tableId = ctx->getTab()->getTableId();
int dump[] = { DumpStateOrd::LqhErrorInsert5042, 0, 5044 };
dump[1] = tableId;
if (restarter.dumpStateOneNode(nodeId, dump, 3))
return NDBT_FAILED;
restarter.waitNodesNoStart(&nodeId, 1);
ctx->stopTest();
restarter.startNodes(&nodeId, 1);
if (restarter.waitClusterStarted() != 0)
return NDBT_FAILED;
return NDBT_OK;
return NDBT_OK;
}
NDBT_TESTSUITE(testNodeRestart);
TESTCASE("NoLoad",
......@@ -1244,6 +1279,13 @@ TESTCASE("Bug20185",
STEP(runBug20185);
FINALIZER(runClearTable);
}
TESTCASE("Bug21271",
""){
INITIALIZER(runLoadTable);
STEP(runBug21271);
STEP(runPkUpdateUntilStopped);
FINALIZER(runClearTable);
}
NDBT_TESTSUITE_END(testNodeRestart);
int main(int argc, const char** argv){
......
......@@ -489,6 +489,10 @@ max-time: 1000
cmd: testNodeRestart
args: -n Bug20185 T1
max-time: 1000
cmd: testNodeRestart
args: -n Bug21271 T6
#
# DICT TESTS
max-time: 1500
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment