TransporterFacade.cpp 41.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/* Copyright (C) 2003 MySQL AB

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */

17
#include <ndb_global.h>
18
#include <my_pthread.h>
19 20 21 22 23 24 25 26 27 28 29 30 31
#include <ndb_limits.h>
#include "TransporterFacade.hpp"
#include "ClusterMgr.hpp"
#include <IPCConfig.hpp>
#include <TransporterCallback.hpp>
#include <TransporterRegistry.hpp>
#include "NdbApiSignal.hpp"
#include <NdbOut.hpp>
#include <NdbEnv.h>
#include <NdbSleep.h>

#include "API.hpp"
#include <ConfigRetriever.hpp>
32 33
#include <mgmapi_config_parameters.h>
#include <mgmapi_configuration.hpp>
34 35 36
#include <NdbConfig.h>
#include <ndb_version.h>
#include <SignalLoggerManager.hpp>
37
#include <kernel/ndb_limits.h>
38
#include <signaldata/AlterTable.hpp>
unknown's avatar
unknown committed
39
#include <signaldata/SumaImpl.hpp>
40 41

//#define REPORT_TRANSPORTER
unknown's avatar
unknown committed
42
//#define API_TRACE;
43

44 45 46 47 48 49 50 51 52 53
static int numberToIndex(int number)
{
  return number - MIN_API_BLOCK_NO;
}

static int indexToNumber(int index)
{
  return index + MIN_API_BLOCK_NO;
}

54 55 56 57 58 59 60 61 62 63 64
#if defined DEBUG_TRANSPORTER
#define TRP_DEBUG(t) ndbout << __FILE__ << ":" << __LINE__ << ":" << t << endl;
#else
#define TRP_DEBUG(t)
#endif

/*****************************************************************************
 * Call back functions
 *****************************************************************************/

void
65 66 67
reportError(void * callbackObj, NodeId nodeId,
	    TransporterError errorCode, const char *info)
{
68
#ifdef REPORT_TRANSPORTER
69 70
  ndbout_c("REPORT_TRANSP: reportError (nodeId=%d, errorCode=%d) %s", 
	   (int)nodeId, (int)errorCode, info ? info : "");
71
#endif
72 73 74
  if(errorCode & TE_DO_DISCONNECT) {
    ndbout_c("reportError (%d, %d) %s", (int)nodeId, (int)errorCode,
	     info ? info : "");
75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129
    ((TransporterFacade*)(callbackObj))->doDisconnect(nodeId);
  }
}

/**
 * Report average send length in bytes (4096 last sends)
 */
void
reportSendLen(void * callbackObj, NodeId nodeId, Uint32 count, Uint64 bytes){
#ifdef REPORT_TRANSPORTER
  ndbout_c("REPORT_TRANSP: reportSendLen (nodeId=%d, bytes/count=%d)", 
	   (int)nodeId, (Uint32)(bytes/count));
#endif
  (void)nodeId;
  (void)count;
  (void)bytes;
}

/** 
 * Report average receive length in bytes (4096 last receives)
 */
void
reportReceiveLen(void * callbackObj, 
		 NodeId nodeId, Uint32 count, Uint64 bytes){
#ifdef REPORT_TRANSPORTER
  ndbout_c("REPORT_TRANSP: reportReceiveLen (nodeId=%d, bytes/count=%d)", 
	   (int)nodeId, (Uint32)(bytes/count));
#endif
  (void)nodeId;
  (void)count;
  (void)bytes;
}

/**
 * Report connection established
 */
void
reportConnect(void * callbackObj, NodeId nodeId){
#ifdef REPORT_TRANSPORTER
  ndbout_c("REPORT_TRANSP: API reportConnect (nodeId=%d)", (int)nodeId);
#endif
  ((TransporterFacade*)(callbackObj))->reportConnected(nodeId);
}

/**
 * Report connection broken
 */
void
reportDisconnect(void * callbackObj, NodeId nodeId, Uint32 error){
#ifdef REPORT_TRANSPORTER
  ndbout_c("REPORT_TRANSP: API reportDisconnect (nodeId=%d)", (int)nodeId);
#endif
  ((TransporterFacade*)(callbackObj))->reportDisconnected(nodeId);
}

130 131 132 133
void
transporter_recv_from(void * callbackObj, NodeId nodeId){
  ((TransporterFacade*)(callbackObj))->hb_received(nodeId);
}
134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167

/****************************************************************************
 * 
 *****************************************************************************/

/**
 * Report connection broken
 */
int checkJobBuffer() {
  return 0;
}

#ifdef API_TRACE
static const char * API_SIGNAL_LOG = "API_SIGNAL_LOG";
static const char * apiSignalLog   = 0;
static SignalLoggerManager signalLogger;
static
inline
bool
setSignalLog(){
  signalLogger.flushSignalLog();

  const char * tmp = NdbEnv_GetEnv(API_SIGNAL_LOG, (char *)0, 0);
  if(tmp != 0 && apiSignalLog != 0 && strcmp(tmp,apiSignalLog) == 0){
    return true;
  } else if(tmp == 0 && apiSignalLog == 0){
    return false;
  } else if(tmp == 0 && apiSignalLog != 0){
    signalLogger.setOutputStream(0);
    apiSignalLog = tmp;
    return false;
  } else if(tmp !=0){
    if (strcmp(tmp, "-") == 0)
        signalLogger.setOutputStream(stdout);
unknown's avatar
unknown committed
168 169 170 171
#ifndef DBUG_OFF
    else if (strcmp(tmp, "+") == 0)
        signalLogger.setOutputStream(DBUG_FILE);
#endif
172 173 174 175 176 177 178
    else
        signalLogger.setOutputStream(fopen(tmp, "w"));
    apiSignalLog = tmp;
    return true;
  }
  return false;
}
unknown's avatar
unknown committed
179 180 181 182 183
#ifdef TRACE_APIREGREQ
#define TRACE_GSN(gsn) true
#else
#define TRACE_GSN(gsn) (gsn != GSN_API_REGREQ && gsn != GSN_API_REGCONF)
#endif
184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
#endif

/**
 * The execute function : Handle received signal
 */
void
execute(void * callbackObj, SignalHeader * const header, 
	Uint8 prio, Uint32 * const theData,
	LinearSectionPtr ptr[3]){

  TransporterFacade * theFacade = (TransporterFacade*)callbackObj;
  TransporterFacade::ThreadData::Object_Execute oe; 
  Uint32 tRecBlockNo = header->theReceiversBlockNumber;
  
#ifdef API_TRACE
unknown's avatar
unknown committed
199
  if(setSignalLog() && TRACE_GSN(header->theVerId_signalNumber)){
200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312
    signalLogger.executeSignal(* header, 
			       prio,
                               theData,
			       theFacade->ownId(), 
                               ptr, header->m_noOfSections);
    signalLogger.flushSignalLog();
  }
#endif  

  if (tRecBlockNo >= MIN_API_BLOCK_NO) {
    oe = theFacade->m_threads.get(tRecBlockNo);
    if (oe.m_object != 0 && oe.m_executeFunction != 0) {
      /**
       * Handle received signal immediately to avoid any unnecessary
       * copying of data, allocation of memory and other things. Copying
       * of data could be interesting to support several priority levels
       * and to support a special memory structure when executing the
       * signals. Neither of those are interesting when receiving data
       * in the NDBAPI. The NDBAPI will thus read signal data directly as
       * it was written by the sender (SCI sender is other node, Shared
       * memory sender is other process and TCP/IP sender is the OS that
       * writes the TCP/IP message into a message buffer).
       */
      NdbApiSignal tmpSignal(*header);
      NdbApiSignal * tSignal = &tmpSignal;
      tSignal->setDataPtr(theData);
      (* oe.m_executeFunction) (oe.m_object, tSignal, ptr);
    }//if
  } else if (tRecBlockNo == API_PACKED) {
    /**
     * Block number == 2047 is used to signal a signal that consists of
     * multiple instances of the same signal. This is an effort to
     * package the signals so as to avoid unnecessary communication
     * overhead since TCP/IP has a great performance impact.
     */
    Uint32 Tlength = header->theLength;
    Uint32 Tsent = 0;
    /**
     * Since it contains at least two data packets we will first
     * copy the signal data to safe place.
     */
    while (Tsent < Tlength) {
      Uint32 Theader = theData[Tsent];
      Tsent++;
      Uint32 TpacketLen = (Theader & 0x1F) + 3;
      tRecBlockNo = Theader >> 16;
      if (TpacketLen <= 25) {
	if ((TpacketLen + Tsent) <= Tlength) {
	  /**
	   * Set the data length of the signal and the receivers block
	   * reference and then call the API.
	   */
	  header->theLength = TpacketLen;
	  header->theReceiversBlockNumber = tRecBlockNo;
	  Uint32* tDataPtr = &theData[Tsent];
	  Tsent += TpacketLen;
	  if (tRecBlockNo >= MIN_API_BLOCK_NO) {
	    oe = theFacade->m_threads.get(tRecBlockNo);
	    if(oe.m_object != 0 && oe.m_executeFunction != 0){
	      NdbApiSignal tmpSignal(*header);
	      NdbApiSignal * tSignal = &tmpSignal;
	      tSignal->setDataPtr(tDataPtr);
	      (*oe.m_executeFunction)(oe.m_object, tSignal, 0);
	    }
	  }
	}
      }
    }
    return;
  } else if (tRecBlockNo == API_CLUSTERMGR) {
     /**
      * The signal was aimed for the Cluster Manager. 
      * We handle it immediately here.
      */     
     ClusterMgr * clusterMgr = theFacade->theClusterMgr;
     const Uint32 gsn = header->theVerId_signalNumber;

     switch (gsn){
     case GSN_API_REGREQ:
       clusterMgr->execAPI_REGREQ(theData);
       break;

     case GSN_API_REGCONF:
       clusterMgr->execAPI_REGCONF(theData);
       break;
     
     case GSN_API_REGREF:
       clusterMgr->execAPI_REGREF(theData);
       break;

     case GSN_NODE_FAILREP:
       clusterMgr->execNODE_FAILREP(theData);
       break;
       
     case GSN_NF_COMPLETEREP:
       clusterMgr->execNF_COMPLETEREP(theData);
       break;

     case GSN_ARBIT_STARTREQ:
       if (theFacade->theArbitMgr != NULL)
	 theFacade->theArbitMgr->doStart(theData);
       break;
       
     case GSN_ARBIT_CHOOSEREQ:
       if (theFacade->theArbitMgr != NULL)
	 theFacade->theArbitMgr->doChoose(theData);
       break;
       
     case GSN_ARBIT_STOPORD:
       if(theFacade->theArbitMgr != NULL)
	 theFacade->theArbitMgr->doStop(theData);
       break;

313 314 315 316 317 318 319 320 321 322
     case GSN_ALTER_TABLE_REP:
     {
       const AlterTableRep* rep = (const AlterTableRep*)theData;
       theFacade->m_globalDictCache.lock();
       theFacade->m_globalDictCache.
	 alter_table_rep((const char*)ptr[0].p, 
			 rep->tableId,
			 rep->tableVersion,
			 rep->changeType == AlterTableRep::CT_ALTERED);
       theFacade->m_globalDictCache.unlock();
unknown's avatar
unknown committed
323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345
       break;
     }
     case GSN_SUB_GCP_COMPLETE_REP:
     {
       /**
	* Report
	*/
       NdbApiSignal tSignal(* header);
       tSignal.setDataPtr(theData);
       theFacade->for_each(&tSignal, ptr);

       /**
	* Reply
	*/
       {
	 Uint32* send= tSignal.getDataPtrSend();
	 memcpy(send, theData, tSignal.getLength() << 2);
	 ((SubGcpCompleteAck*)send)->rep.senderRef = 
	   numberToRef(API_CLUSTERMGR, theFacade->theOwnId);
	 Uint32 ref= header->theSendersBlockRef;
	 Uint32 aNodeId= refToNode(ref);
	 tSignal.theReceiversBlockNumber= refToBlock(ref);
	 tSignal.theVerId_signalNumber= GSN_SUB_GCP_COMPLETE_ACK;
unknown's avatar
unknown committed
346
	 theFacade->sendSignalUnCond(&tSignal, aNodeId);
unknown's avatar
unknown committed
347 348
       }
       break;
349
     }
350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365
     default:
       break;
       
     }
     return;
  } else {
    ; // Ignore all other block numbers.
    if(header->theVerId_signalNumber!=3) {
      TRP_DEBUG( "TransporterFacade received signal to unknown block no." );
      ndbout << "BLOCK NO: "  << tRecBlockNo << " sig " 
	     << header->theVerId_signalNumber  << endl;
      abort();
    }
  }
}

unknown's avatar
unknown committed
366 367 368 369 370 371 372 373
// These symbols are needed, but not used in the API
void 
SignalLoggerManager::printSegmentedSection(FILE *, const SignalHeader &,
					   const SegmentedSectionPtr ptr[3],
					   unsigned i){
  abort();
}

374 375 376 377 378 379 380 381 382 383 384 385
void 
copy(Uint32 * & insertPtr, 
     class SectionSegmentPool & thePool, const SegmentedSectionPtr & _ptr){
  abort();
}

/**
 * Note that this function need no locking since its
 * only called from the constructor of Ndb (the NdbObject)
 * 
 * Which is protected by a mutex
 */
386

387
int
388 389 390
TransporterFacade::start_instance(int nodeId, 
				  const ndb_mgm_configuration* props)
{
391
  if (! init(nodeId, props)) {
392
    return -1;
393 394 395 396 397 398 399 400 401 402 403 404
  }
  
  /**
   * Install signal handler for SIGPIPE
   *
   * This due to the fact that a socket connection might have
   * been closed in between a select and a corresponding send
   */
#if !defined NDB_OSE && !defined NDB_SOFTOSE && !defined NDB_WIN32
  signal(SIGPIPE, SIG_IGN);
#endif

405
  return 0;
406 407
}

408 409 410 411 412 413 414 415
/**
 * Note that this function need no locking since its
 * only called from the destructor of Ndb (the NdbObject)
 * 
 * Which is protected by a mutex
 */
void
TransporterFacade::stop_instance(){
416
  DBUG_ENTER("TransporterFacade::stop_instance");
417
  doStop();
418
  DBUG_VOID_RETURN;
419 420 421 422
}

void
TransporterFacade::doStop(){
423
  DBUG_ENTER("TransporterFacade::doStop");
424 425 426 427 428 429 430 431 432 433 434 435
  /**
   * First stop the ClusterMgr because it needs to send one more signal
   * and also uses theFacadeInstance to lock/unlock theMutexPtr
   */
  if (theClusterMgr != NULL) theClusterMgr->doStop();
  if (theArbitMgr != NULL) theArbitMgr->doStop(NULL);
  
  /**
   * Now stop the send and receive threads
   */
  void *status;
  theStopReceive = 1;
436 437 438 439 440 441 442 443
  if (theReceiveThread) {
    NdbThread_WaitFor(theReceiveThread, &status);
    NdbThread_Destroy(&theReceiveThread);
  }
  if (theSendThread) {
    NdbThread_WaitFor(theSendThread, &status);
    NdbThread_Destroy(&theSendThread);
  }
444
  DBUG_VOID_RETURN;
445 446 447 448 449 450 451
}

extern "C" 
void* 
runSendRequest_C(void * me)
{
  ((TransporterFacade*) me)->threadMainSend();
452
  return 0;
453 454 455 456 457
}

void TransporterFacade::threadMainSend(void)
{
  theTransporterRegistry->startSending();
unknown's avatar
unknown committed
458 459 460 461
  if (!theTransporterRegistry->start_clients()){
    ndbout_c("Unable to start theTransporterRegistry->start_clients");
    exit(0);
  }
unknown's avatar
unknown committed
462

unknown's avatar
unknown committed
463
  m_socket_server.startServer();
unknown's avatar
unknown committed
464

465 466 467 468 469 470 471 472 473 474
  while(!theStopReceive) {
    NdbSleep_MilliSleep(10);
    NdbMutex_Lock(theMutexPtr);
    if (sendPerformedLastInterval == 0) {
      theTransporterRegistry->performSend();
    }
    sendPerformedLastInterval = 0;
    NdbMutex_Unlock(theMutexPtr);
  }
  theTransporterRegistry->stopSending();
unknown's avatar
unknown committed
475

unknown's avatar
unknown committed
476
  m_socket_server.stopServer();
unknown's avatar
unknown committed
477
  m_socket_server.stopSessions(true);
unknown's avatar
unknown committed
478 479

  theTransporterRegistry->stop_clients();
480 481 482 483 484 485 486
}

extern "C" 
void* 
runReceiveResponse_C(void * me)
{
  ((TransporterFacade*) me)->threadMainReceive();
487
  return 0;
488 489
}

unknown's avatar
unknown committed
490 491 492 493 494 495 496 497
/*
  The receiver thread is changed to only wake up once every 10 milliseconds
  to poll. It will first check that nobody owns the poll "right" before
  polling. This means that methods using the receiveResponse and
  sendRecSignal will have a slightly longer response time if they are
  executed without any parallel key lookups. Currently also scans are
  affected but this is to be fixed.
*/
498 499 500
void TransporterFacade::threadMainReceive(void)
{
  theTransporterRegistry->startReceiving();
unknown's avatar
unknown committed
501
#ifdef NDB_SHM_TRANSPORTER
unknown's avatar
Fixes  
unknown committed
502
  NdbThread_set_shm_sigmask(TRUE);
unknown's avatar
unknown committed
503
#endif
504
  NdbMutex_Lock(theMutexPtr);
unknown's avatar
unknown committed
505
  theTransporterRegistry->update_connections();
506 507 508
  NdbMutex_Unlock(theMutexPtr);
  while(!theStopReceive) {
    for(int i = 0; i<10; i++){
unknown's avatar
unknown committed
509 510 511 512 513 514
      NdbSleep_MilliSleep(10);
      NdbMutex_Lock(theMutexPtr);
      if (poll_owner == NULL) {
        const int res = theTransporterRegistry->pollReceive(0);
        if(res > 0)
          theTransporterRegistry->performReceive();
515
      }
unknown's avatar
unknown committed
516
      NdbMutex_Unlock(theMutexPtr);
517 518
    }
    NdbMutex_Lock(theMutexPtr);
unknown's avatar
unknown committed
519
    theTransporterRegistry->update_connections();
520 521 522 523
    NdbMutex_Unlock(theMutexPtr);
  }//while
  theTransporterRegistry->stopReceiving();
}
unknown's avatar
unknown committed
524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643
/*
  This method is called by worker thread that owns the poll "rights".
  It waits for events and if something arrives it takes care of it
  and returns to caller. It will quickly come back here if not all
  data was received for the worker thread.
*/
void TransporterFacade::external_poll(Uint32 wait_time)
{
  NdbMutex_Unlock(theMutexPtr);
  const int res = theTransporterRegistry->pollReceive(wait_time);
  NdbMutex_Lock(theMutexPtr);
  if (res > 0) {
    theTransporterRegistry->performReceive();
  }
}

/*
  This Ndb object didn't get hold of the poll "right" and will wait on a
  conditional mutex wait instead. It is put into the conditional wait
  queue so that it is accessible to take over the poll "right" if needed.
  The method gets a free entry in the free list and puts it first in the
  doubly linked list. Finally it assigns the ndb object reference to the
  entry.
*/
Uint32 TransporterFacade::put_in_cond_wait_queue(NdbWaiter *aWaiter)
{
  /*
   Get first free entry
  */
  Uint32 index = first_free_cond_wait;
  assert(index < MAX_NO_THREADS);
  first_free_cond_wait = cond_wait_array[index].next_cond_wait;

  /*
   Put in doubly linked list
  */
  cond_wait_array[index].next_cond_wait = MAX_NO_THREADS;
  cond_wait_array[index].prev_cond_wait = last_in_cond_wait;
  if (last_in_cond_wait == MAX_NO_THREADS) {
    first_in_cond_wait = index;
  } else
    cond_wait_array[last_in_cond_wait].next_cond_wait = index;
  last_in_cond_wait = index;

  cond_wait_array[index].cond_wait_object = aWaiter;
  aWaiter->set_cond_wait_index(index);
  return index;
}

/*
  Somebody is about to signal the thread to wake it up, it could also
  be that it woke up on a timeout and found himself still in the list.
  Removes the entry from the doubly linked list.
  Inserts the entry into the free list.
  NULLifies the ndb object reference entry and sets the index in the
  Ndb object to NIL (=MAX_NO_THREADS)
*/
void TransporterFacade::remove_from_cond_wait_queue(NdbWaiter *aWaiter)
{
  Uint32 index = aWaiter->get_cond_wait_index();
  assert(index < MAX_NO_THREADS &&
         cond_wait_array[index].cond_wait_object == aWaiter);
  /*
   Remove from doubly linked list
  */
  Uint32 prev_elem, next_elem;
  prev_elem = cond_wait_array[index].prev_cond_wait;
  next_elem = cond_wait_array[index].next_cond_wait;
  if (prev_elem != MAX_NO_THREADS)
    cond_wait_array[prev_elem].next_cond_wait = next_elem;
  else
    first_in_cond_wait = next_elem;
  if (next_elem != MAX_NO_THREADS)
    cond_wait_array[next_elem].prev_cond_wait = prev_elem;
  else
    last_in_cond_wait = prev_elem;
  /*
   Insert into free list
  */
  cond_wait_array[index].next_cond_wait = first_free_cond_wait;
  cond_wait_array[index].prev_cond_wait = MAX_NO_THREADS;
  first_free_cond_wait = index;

  cond_wait_array[index].cond_wait_object = NULL;
  aWaiter->set_cond_wait_index(MAX_NO_THREADS);
}

/*
  Get the latest Ndb object from the conditional wait queue
  and also remove it from the list.
*/
NdbWaiter* TransporterFacade::rem_last_from_cond_wait_queue()
{
  NdbWaiter *tWaiter;
  Uint32 index = last_in_cond_wait;
  if (last_in_cond_wait == MAX_NO_THREADS)
    return NULL;
  tWaiter = cond_wait_array[index].cond_wait_object;
  remove_from_cond_wait_queue(tWaiter);
  return tWaiter;
}

void TransporterFacade::init_cond_wait_queue()
{
  Uint32 i;
  /*
   Initialise the doubly linked list as empty
  */
  first_in_cond_wait = MAX_NO_THREADS;
  last_in_cond_wait = MAX_NO_THREADS;
  /*
   Initialise free list
  */
  first_free_cond_wait = 0;
  for (i = 0; i < MAX_NO_THREADS; i++) {
    cond_wait_array[i].cond_wait_object = NULL;
    cond_wait_array[i].next_cond_wait = i+1;
    cond_wait_array[i].prev_cond_wait = MAX_NO_THREADS;
  }
}
644

645
TransporterFacade::TransporterFacade() :
646 647 648
  theTransporterRegistry(0),
  theStopReceive(0),
  theSendThread(NULL),
649
  theReceiveThread(NULL),
unknown's avatar
unknown committed
650
  m_fragmented_signal_id(0)
651
{
unknown's avatar
unknown committed
652
  DBUG_ENTER("TransporterFacade::TransporterFacade");
unknown's avatar
unknown committed
653 654
  init_cond_wait_queue();
  poll_owner = NULL;
655
  theOwnId = 0;
656 657 658 659 660 661 662 663
  theMutexPtr = NdbMutex_Create();
  sendPerformedLastInterval = 0;

  checkCounter = 4;
  currentSendLimit = 1;
  theClusterMgr = NULL;
  theArbitMgr = NULL;
  theStartNodeId = 1;
664 665 666
  m_scan_batch_size= MAX_SCAN_BATCH_SIZE;
  m_batch_byte_size= SCAN_BATCH_SIZE;
  m_batch_size= DEF_BATCH_SIZE;
667
  m_max_trans_id = 0;
668 669

  theClusterMgr = new ClusterMgr(* this);
unknown's avatar
unknown committed
670

unknown's avatar
unknown committed
671 672 673
#ifdef API_TRACE
  apiSignalLog = 0;
#endif
unknown's avatar
unknown committed
674
  DBUG_VOID_RETURN;
675 676 677
}

bool
678
TransporterFacade::init(Uint32 nodeId, const ndb_mgm_configuration* props)
679
{
unknown's avatar
unknown committed
680 681
  DBUG_ENTER("TransporterFacade::init");

682
  theOwnId = nodeId;
683
  theTransporterRegistry = new TransporterRegistry(this);
684 685 686 687 688

  const int res = IPCConfig::configureTransporters(nodeId, 
						   * props, 
						   * theTransporterRegistry);
  if(res <= 0){
689
    TRP_DEBUG( "configureTransporters returned 0 or less" );
unknown's avatar
unknown committed
690
    DBUG_RETURN(false);
691 692
  }
  
693 694 695
  ndb_mgm_configuration_iterator iter(* props, CFG_SECTION_NODE);
  iter.first();
  theClusterMgr->init(iter);
696
  
unknown's avatar
unknown committed
697 698 699
  iter.first();
  if(iter.find(CFG_NODE_ID, nodeId)){
    TRP_DEBUG( "Node info missing from config." );
unknown's avatar
Merge  
unknown committed
700
    DBUG_RETURN(false);
unknown's avatar
unknown committed
701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721
  }
  
  Uint32 rank = 0;
  if(!iter.get(CFG_NODE_ARBIT_RANK, &rank) && rank>0){
    theArbitMgr = new ArbitMgr(* this);
    theArbitMgr->setRank(rank);
    Uint32 delay = 0;
    iter.get(CFG_NODE_ARBIT_DELAY, &delay);
    theArbitMgr->setDelay(delay);
  }
  Uint32 scan_batch_size= 0;
  if (!iter.get(CFG_MAX_SCAN_BATCH_SIZE, &scan_batch_size)) {
    m_scan_batch_size= scan_batch_size;
  }
  Uint32 batch_byte_size= 0;
  if (!iter.get(CFG_BATCH_BYTE_SIZE, &batch_byte_size)) {
    m_batch_byte_size= batch_byte_size;
  }
  Uint32 batch_size= 0;
  if (!iter.get(CFG_BATCH_SIZE, &batch_size)) {
    m_batch_size= batch_size;
722
  }
723
  
unknown's avatar
unknown committed
724 725 726 727 728 729 730 731 732 733 734 735 736
  Uint32 timeout = 120000;
  iter.first();
  for (iter.first(); iter.valid(); iter.next())
  {
    Uint32 tmp1 = 0, tmp2 = 0;
    iter.get(CFG_DB_TRANSACTION_CHECK_INTERVAL, &tmp1);
    iter.get(CFG_DB_TRANSACTION_DEADLOCK_TIMEOUT, &tmp2);
    tmp1 += tmp2;
    if (tmp1 > timeout)
      timeout = tmp1;
  }
  m_waitfor_timeout = timeout;
  
unknown's avatar
unknown committed
737 738
  if (!theTransporterRegistry->start_service(m_socket_server)){
    ndbout_c("Unable to start theTransporterRegistry->start_service");
unknown's avatar
unknown committed
739
    DBUG_RETURN(false);
unknown's avatar
unknown committed
740 741
  }

742 743 744 745 746 747 748 749 750 751 752 753 754
  theReceiveThread = NdbThread_Create(runReceiveResponse_C,
                                      (void**)this,
                                      32768,
                                      "ndb_receive",
                                      NDB_THREAD_PRIO_LOW);

  theSendThread = NdbThread_Create(runSendRequest_C,
                                   (void**)this,
                                   32768,
                                   "ndb_send",
                                   NDB_THREAD_PRIO_LOW);
  theClusterMgr->startThread();
  
755 756 757 758
#ifdef API_TRACE
  signalLogger.logOn(true, 0, SignalLoggerManager::LogInOut);
#endif
  
unknown's avatar
unknown committed
759
  DBUG_RETURN(true);
760 761
}

unknown's avatar
unknown committed
762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777
void
TransporterFacade::for_each(NdbApiSignal* aSignal, LinearSectionPtr ptr[3])
{
  DBUG_ENTER("TransporterFacade::connected");
  Uint32 sz = m_threads.m_statusNext.size();
  TransporterFacade::ThreadData::Object_Execute oe; 
  for (Uint32 i = 0; i < sz ; i ++) 
  {
    oe = m_threads.m_objectExecute[i];
    if (m_threads.getInUse(i))
    {
      (* oe.m_executeFunction) (oe.m_object, aSignal, ptr);
    }
  }
  DBUG_VOID_RETURN;
}
778

779 780 781 782 783 784 785 786 787 788 789 790 791 792 793
void
TransporterFacade::connected()
{
  DBUG_ENTER("TransporterFacade::connected");
  Uint32 sz = m_threads.m_statusNext.size();
  for (Uint32 i = 0; i < sz ; i ++) {
    if (m_threads.getInUse(i)){
      void * obj = m_threads.m_objectExecute[i].m_object;
      NodeStatusFunction RegPC = m_threads.m_statusFunction[i];
      (*RegPC) (obj, numberToRef(indexToNumber(i), theOwnId), true, true);
    }
  }
  DBUG_VOID_RETURN;
}

794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825
void
TransporterFacade::ReportNodeDead(NodeId tNodeId)
{
  /**
   * When a node fails we must report this to each Ndb object. 
   * The function that is used for communicating node failures is called.
   * This is to ensure that the Ndb objects do not think their connections 
   * are correct after a failure followed by a restart. 
   * After the restart the node is up again and the Ndb object 
   * might not have noticed the failure.
   */
  Uint32 sz = m_threads.m_statusNext.size();
  for (Uint32 i = 0; i < sz ; i ++) {
    if (m_threads.getInUse(i)){
      void * obj = m_threads.m_objectExecute[i].m_object;
      NodeStatusFunction RegPC = m_threads.m_statusFunction[i];
      (*RegPC) (obj, tNodeId, false, false);
    }
  }
}

void
TransporterFacade::ReportNodeFailureComplete(NodeId tNodeId)
{
  /**
   * When a node fails we must report this to each Ndb object. 
   * The function that is used for communicating node failures is called.
   * This is to ensure that the Ndb objects do not think their connections 
   * are correct after a failure followed by a restart. 
   * After the restart the node is up again and the Ndb object 
   * might not have noticed the failure.
   */
unknown's avatar
unknown committed
826 827 828

  DBUG_ENTER("TransporterFacade::ReportNodeFailureComplete");
  DBUG_PRINT("enter",("nodeid= %d", tNodeId));
829 830 831 832 833 834 835 836
  Uint32 sz = m_threads.m_statusNext.size();
  for (Uint32 i = 0; i < sz ; i ++) {
    if (m_threads.getInUse(i)){
      void * obj = m_threads.m_objectExecute[i].m_object;
      NodeStatusFunction RegPC = m_threads.m_statusFunction[i];
      (*RegPC) (obj, tNodeId, false, true);
    }
  }
unknown's avatar
unknown committed
837
  DBUG_VOID_RETURN;
838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862
}

void
TransporterFacade::ReportNodeAlive(NodeId tNodeId)
{
  /**
   * When a node fails we must report this to each Ndb object. 
   * The function that is used for communicating node failures is called.
   * This is to ensure that the Ndb objects do not think there connections 
   * are correct after a failure
   * followed by a restart. 
   * After the restart the node is up again and the Ndb object 
   * might not have noticed the failure.
   */
  Uint32 sz = m_threads.m_statusNext.size();
  for (Uint32 i = 0; i < sz ; i ++) {
    if (m_threads.getInUse(i)){
      void * obj = m_threads.m_objectExecute[i].m_object;
      NodeStatusFunction RegPC = m_threads.m_statusFunction[i];
      (*RegPC) (obj, tNodeId, true, false);
    }
  }
}

int 
863
TransporterFacade::close(BlockNumber blockNumber, Uint64 trans_id)
864 865
{
  NdbMutex_Lock(theMutexPtr);
866 867
  Uint32 low_bits = (Uint32)trans_id;
  m_max_trans_id = m_max_trans_id > low_bits ? m_max_trans_id : low_bits;
868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883
  close_local(blockNumber);
  NdbMutex_Unlock(theMutexPtr);
  return 0;
}

int 
TransporterFacade::close_local(BlockNumber blockNumber){
  m_threads.close(blockNumber);
  return 0;
}

int
TransporterFacade::open(void* objRef, 
                        ExecuteFunction fun, 
                        NodeStatusFunction statusFun)
{
884 885 886 887 888 889 890 891 892 893
  DBUG_ENTER("TransporterFacade::open");
  int r= m_threads.open(objRef, fun, statusFun);
  if (r < 0)
    DBUG_RETURN(r);
#if 1
  if (theOwnId > 0) {
    (*statusFun)(objRef, numberToRef(r, theOwnId), true, true);
  }
#endif
  DBUG_RETURN(r);
894 895
}

unknown's avatar
unknown committed
896 897 898 899
TransporterFacade::~TransporterFacade()
{  
  DBUG_ENTER("TransporterFacade::~TransporterFacade");

900 901 902 903 904 905 906 907 908
  NdbMutex_Lock(theMutexPtr);
  delete theClusterMgr;  
  delete theArbitMgr;
  delete theTransporterRegistry;
  NdbMutex_Unlock(theMutexPtr);
  NdbMutex_Destroy(theMutexPtr);
#ifdef API_TRACE
  signalLogger.setOutputStream(0);
#endif
unknown's avatar
unknown committed
909
  DBUG_VOID_RETURN;
910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938
}

void 
TransporterFacade::calculateSendLimit()
{
  Uint32 Ti;
  Uint32 TthreadCount = 0;
  
  Uint32 sz = m_threads.m_statusNext.size();
  for (Ti = 0; Ti < sz; Ti++) {
    if (m_threads.m_statusNext[Ti] == (ThreadData::ACTIVE)){
      TthreadCount++;
      m_threads.m_statusNext[Ti] = ThreadData::INACTIVE;
    }
  }
  currentSendLimit = TthreadCount;
  if (currentSendLimit == 0) {
    currentSendLimit = 1;
  }
  checkCounter = currentSendLimit << 2;
}


//-------------------------------------------------
// Force sending but still report the sending to the
// adaptive algorithm.
//-------------------------------------------------
void TransporterFacade::forceSend(Uint32 block_number) {
  checkCounter--;
939
  m_threads.m_statusNext[numberToIndex(block_number)] = ThreadData::ACTIVE;
940 941 942 943 944 945 946 947 948 949 950 951
  sendPerformedLastInterval = 1;
  if (checkCounter < 0) {
    calculateSendLimit();
  }
  theTransporterRegistry->forceSendCheck(0);
}

//-------------------------------------------------
// Improving API performance
//-------------------------------------------------
void
TransporterFacade::checkForceSend(Uint32 block_number) {  
952
  m_threads.m_statusNext[numberToIndex(block_number)] = ThreadData::ACTIVE;
953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975
  //-------------------------------------------------
  // This code is an adaptive algorithm to discover when
  // the API should actually send its buffers. The reason
  // is that the performance is highly dependent on the
  // size of the writes over the communication network.
  // Thus we try to ensure that the send size is as big
  // as possible. At the same time we don't want response
  // time to increase so therefore we have to keep track of
  // how the users are performing adaptively.
  //-------------------------------------------------
  
  if (theTransporterRegistry->forceSendCheck(currentSendLimit) == 1) {
    sendPerformedLastInterval = 1;
  }
  checkCounter--;
  if (checkCounter < 0) {
    calculateSendLimit();
  }
}


/******************************************************************************
 * SEND SIGNAL METHODS
unknown's avatar
unknown committed
976
 *****************************************************************************/
977 978 979 980 981 982 983
int
TransporterFacade::sendSignal(NdbApiSignal * aSignal, NodeId aNode){
  Uint32* tDataPtr = aSignal->getDataPtrSend();
  Uint32 Tlen = aSignal->theLength;
  Uint32 TBno = aSignal->theReceiversBlockNumber;
  if(getIsNodeSendable(aNode) == true){
#ifdef API_TRACE
unknown's avatar
unknown committed
984
    if(setSignalLog() && TRACE_GSN(aSignal->theVerId_signalNumber)){
985 986 987 988 989
      Uint32 tmp = aSignal->theSendersBlockRef;
      aSignal->theSendersBlockRef = numberToRef(tmp, theOwnId);
      LinearSectionPtr ptr[3];
      signalLogger.sendSignal(* aSignal,
			      1,
unknown's avatar
ndb -  
unknown committed
990
			      tDataPtr,
991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016
			      aNode, ptr, 0);
      signalLogger.flushSignalLog();
      aSignal->theSendersBlockRef = tmp;
    }
#endif
    if ((Tlen != 0) && (Tlen <= 25) && (TBno != 0)) {
      SendStatus ss = theTransporterRegistry->prepareSend(aSignal, 
							  1, // JBB
							  tDataPtr, 
							  aNode, 
							  0);
      //if (ss != SEND_OK) ndbout << ss << endl;
      return (ss == SEND_OK ? 0 : -1);
    } else {
      ndbout << "ERR: SigLen = " << Tlen << " BlockRec = " << TBno;
      ndbout << " SignalNo = " << aSignal->theVerId_signalNumber << endl;
      assert(0);
    }//if
  }
  //const ClusterMgr::Node & node = theClusterMgr->getNodeInfo(aNode);
  //const Uint32 startLevel = node.m_state.startLevel;
  return -1; // Node Dead
}

int
TransporterFacade::sendSignalUnCond(NdbApiSignal * aSignal, NodeId aNode){
unknown's avatar
ndb -  
unknown committed
1017
  Uint32* tDataPtr = aSignal->getDataPtrSend();
1018
#ifdef API_TRACE
unknown's avatar
unknown committed
1019
  if(setSignalLog() && TRACE_GSN(aSignal->theVerId_signalNumber)){
1020 1021 1022 1023 1024
    Uint32 tmp = aSignal->theSendersBlockRef;
    aSignal->theSendersBlockRef = numberToRef(tmp, theOwnId);
    LinearSectionPtr ptr[3];
    signalLogger.sendSignal(* aSignal,
			    0,
unknown's avatar
ndb -  
unknown committed
1025
			    tDataPtr,
1026 1027 1028 1029 1030 1031 1032 1033 1034 1035
			    aNode, ptr, 0);
    signalLogger.flushSignalLog();
    aSignal->theSendersBlockRef = tmp;
  }
#endif
  assert((aSignal->theLength != 0) &&
         (aSignal->theLength <= 25) &&
         (aSignal->theReceiversBlockNumber != 0));
  SendStatus ss = theTransporterRegistry->prepareSend(aSignal, 
						      0, 
unknown's avatar
ndb -  
unknown committed
1036
						      tDataPtr,
1037 1038 1039 1040 1041 1042
						      aNode, 
						      0);
  
  return (ss == SEND_OK ? 0 : -1);
}

1043
#define CHUNK_SZ NDB_SECTION_SEGMENT_SZ*64 // related to MAX_MESSAGE_SIZE
1044 1045
int
TransporterFacade::sendFragmentedSignal(NdbApiSignal* aSignal, NodeId aNode, 
1046 1047 1048 1049 1050
					LinearSectionPtr ptr[3], Uint32 secs)
{
  if(getIsNodeSendable(aNode) != true)
    return -1;

1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063
#ifdef API_TRACE
  if(setSignalLog() && TRACE_GSN(aSignal->theVerId_signalNumber)){
    Uint32 tmp = aSignal->theSendersBlockRef;
    aSignal->theSendersBlockRef = numberToRef(tmp, theOwnId);
    signalLogger.sendSignal(* aSignal,
			    1,
			    aSignal->getDataPtrSend(),
			    aNode,
			    ptr, secs);
    aSignal->theSendersBlockRef = tmp;
  }
#endif

1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080
  NdbApiSignal tmp_signal(*(SignalHeader*)aSignal);
  LinearSectionPtr tmp_ptr[3];
  Uint32 unique_id= m_fragmented_signal_id++; // next unique id
  unsigned i;
  for (i= 0; i < secs; i++)
    tmp_ptr[i]= ptr[i];

  unsigned start_i= 0;
  unsigned chunk_sz= 0;
  unsigned fragment_info= 0;
  Uint32 *tmp_data= tmp_signal.getDataPtrSend();
  for (i= 0; i < secs;) {
    unsigned save_sz= tmp_ptr[i].sz;
    tmp_data[i-start_i]= i;
    if (chunk_sz + save_sz > CHUNK_SZ) {
      // truncate
      unsigned send_sz= CHUNK_SZ - chunk_sz;
1081 1082 1083 1084 1085 1086 1087 1088 1089
      if (i != start_i) // first piece of a new section has to be a multiple of NDB_SECTION_SEGMENT_SZ
      {
	send_sz=
	  NDB_SECTION_SEGMENT_SZ
	  *(send_sz+NDB_SECTION_SEGMENT_SZ-1)
	  /NDB_SECTION_SEGMENT_SZ;
	if (send_sz > save_sz)
	  send_sz= save_sz;
      }
1090
      tmp_ptr[i].sz= send_sz;
1091 1092
      
      if (fragment_info < 2) // 1 = first fragment, 2 = middle fragments
1093 1094 1095 1096 1097 1098
	fragment_info++;

      // send tmp_signal
      tmp_data[i-start_i+1]= unique_id;
      tmp_signal.setLength(i-start_i+2);
      tmp_signal.m_fragmentInfo= fragment_info;
1099
      tmp_signal.m_noOfSections= i-start_i+1;
1100 1101
      // do prepare send
      {
1102 1103 1104 1105 1106 1107 1108 1109
	SendStatus ss = theTransporterRegistry->prepareSend
	  (&tmp_signal, 
	   1, /*JBB*/
	   tmp_data,
	   aNode, 
	   &tmp_ptr[start_i]);
	assert(ss != SEND_MESSAGE_TOO_BIG);
	if (ss != SEND_OK) return -1;
1110 1111 1112 1113 1114 1115
      }
      // setup variables for next signal
      start_i= i;
      chunk_sz= 0;
      tmp_ptr[i].sz= save_sz-send_sz;
      tmp_ptr[i].p+= send_sz;
1116 1117
      if (tmp_ptr[i].sz == 0)
	i++;
1118 1119 1120
    }
    else
    {
1121
      chunk_sz+=save_sz;
1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138
      i++;
    }
  }

  unsigned a_sz= aSignal->getLength();

  if (fragment_info > 0) {
    // update the original signal to include section info
    Uint32 *a_data= aSignal->getDataPtrSend();
    unsigned tmp_sz= i-start_i;
    memcpy(a_data+a_sz,
	   tmp_data,
	   tmp_sz*sizeof(Uint32));
    a_data[a_sz+tmp_sz]= unique_id;
    aSignal->setLength(a_sz+tmp_sz+1);

    // send last fragment
1139
    aSignal->m_fragmentInfo= 3; // 3 = last fragment
1140 1141 1142 1143 1144 1145 1146
    aSignal->m_noOfSections= i-start_i;
  } else {
    aSignal->m_noOfSections= secs;
  }

  // send aSignal
  int ret;
1147
  {
1148
    SendStatus ss = theTransporterRegistry->prepareSend
1149 1150
      (aSignal,
       1/*JBB*/,
1151
       aSignal->getDataPtrSend(),
1152 1153
       aNode,
       &tmp_ptr[start_i]);
1154 1155
    assert(ss != SEND_MESSAGE_TOO_BIG);
    ret = (ss == SEND_OK ? 0 : -1);
1156
  }
1157 1158 1159 1160 1161 1162
  aSignal->m_noOfSections = 0;
  aSignal->m_fragmentInfo = 0;
  aSignal->setLength(a_sz);
  return ret;
}

1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194
int
TransporterFacade::sendSignal(NdbApiSignal* aSignal, NodeId aNode, 
			      LinearSectionPtr ptr[3], Uint32 secs){
  aSignal->m_noOfSections = secs;
  if(getIsNodeSendable(aNode) == true){
#ifdef API_TRACE
    if(setSignalLog() && TRACE_GSN(aSignal->theVerId_signalNumber)){
      Uint32 tmp = aSignal->theSendersBlockRef;
      aSignal->theSendersBlockRef = numberToRef(tmp, theOwnId);
      signalLogger.sendSignal(* aSignal,
			      1,
			      aSignal->getDataPtrSend(),
			      aNode,
                              ptr, secs);
      signalLogger.flushSignalLog();
      aSignal->theSendersBlockRef = tmp;
    }
#endif
    SendStatus ss = theTransporterRegistry->prepareSend
      (aSignal, 
       1, // JBB
       aSignal->getDataPtrSend(),
       aNode, 
       ptr);
    assert(ss != SEND_MESSAGE_TOO_BIG);
    aSignal->m_noOfSections = 0;
    return (ss == SEND_OK ? 0 : -1);
  }
  aSignal->m_noOfSections = 0;
  return -1;
}

1195 1196 1197 1198 1199 1200 1201
/******************************************************************************
 * CONNECTION METHODS  Etc
 ******************************************************************************/

void
TransporterFacade::doConnect(int aNodeId){
  theTransporterRegistry->setIOState(aNodeId, NoHalt);
unknown's avatar
unknown committed
1202
  theTransporterRegistry->do_connect(aNodeId);
1203 1204 1205 1206 1207
}

void
TransporterFacade::doDisconnect(int aNodeId)
{
unknown's avatar
unknown committed
1208
  theTransporterRegistry->do_disconnect(aNodeId);
1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232
}

void
TransporterFacade::reportConnected(int aNodeId)
{
  theClusterMgr->reportConnected(aNodeId);
  return;
}

void
TransporterFacade::reportDisconnected(int aNodeId)
{
  theClusterMgr->reportDisconnected(aNodeId);
  return;
}

NodeId
TransporterFacade::ownId() const
{
  return theOwnId;
}

bool
TransporterFacade::isConnected(NodeId aNodeId){
unknown's avatar
unknown committed
1233
  return theTransporterRegistry->is_connected(aNodeId);
1234 1235 1236 1237 1238
}

NodeId
TransporterFacade::get_an_alive_node()
{
unknown's avatar
unknown committed
1239 1240
  DBUG_ENTER("TransporterFacade::get_an_alive_node");
  DBUG_PRINT("enter", ("theStartNodeId: %d", theStartNodeId));
1241 1242 1243 1244 1245 1246 1247 1248
#ifdef VM_TRACE
  const char* p = NdbEnv_GetEnv("NDB_ALIVE_NODE_ID", (char*)0, 0);
  if (p != 0 && *p != 0)
    return atoi(p);
#endif
  NodeId i;
  for (i = theStartNodeId; i < MAX_NDB_NODES; i++) {
    if (get_node_alive(i)){
unknown's avatar
unknown committed
1249
      DBUG_PRINT("info", ("Node %d is alive", i));
1250
      theStartNodeId = ((i + 1) % MAX_NDB_NODES);
unknown's avatar
unknown committed
1251
      DBUG_RETURN(i);
1252 1253 1254 1255
    }
  }
  for (i = 1; i < theStartNodeId; i++) {
    if (get_node_alive(i)){
unknown's avatar
unknown committed
1256
      DBUG_PRINT("info", ("Node %d is alive", i));
1257
      theStartNodeId = ((i + 1) % MAX_NDB_NODES);
unknown's avatar
unknown committed
1258
      DBUG_RETURN(i);
1259 1260
    }
  }
unknown's avatar
unknown committed
1261
  DBUG_RETURN((NodeId)0);
1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284
}

TransporterFacade::ThreadData::ThreadData(Uint32 size){
  m_firstFree = END_OF_LIST;
  expand(size);
}

void
TransporterFacade::ThreadData::expand(Uint32 size){
  Object_Execute oe = { 0 ,0 };
  NodeStatusFunction fun = 0;

  const Uint32 sz = m_statusNext.size();
  m_objectExecute.fill(sz + size, oe);
  m_statusFunction.fill(sz + size, fun);
  for(Uint32 i = 0; i<size; i++){
    m_statusNext.push_back(sz + i + 1);
  }

  m_statusNext.back() = m_firstFree;
  m_firstFree = m_statusNext.size() - size;
}

1285

1286 1287
int
TransporterFacade::ThreadData::open(void* objRef, 
1288 1289 1290
				    ExecuteFunction fun, 
				    NodeStatusFunction fun2)
{
1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309
  Uint32 nextFree = m_firstFree;

  if(m_statusNext.size() >= MAX_NO_THREADS && nextFree == END_OF_LIST){
    return -1;
  }
  
  if(nextFree == END_OF_LIST){
    expand(10);
    nextFree = m_firstFree;
  }
  
  m_firstFree = m_statusNext[nextFree];
  
  Object_Execute oe = { objRef , fun };

  m_statusNext[nextFree] = INACTIVE;
  m_objectExecute[nextFree] = oe;
  m_statusFunction[nextFree] = fun2;

1310
  return indexToNumber(nextFree);
1311 1312 1313 1314
}

int
TransporterFacade::ThreadData::close(int number){
1315
  number= numberToIndex(number);
1316 1317 1318 1319 1320 1321 1322 1323
  assert(getInUse(number));
  m_statusNext[number] = m_firstFree;
  m_firstFree = number;
  Object_Execute oe = { 0, 0 };
  m_objectExecute[number] = oe;
  m_statusFunction[number] = 0;
  return 0;
}
unknown's avatar
unknown committed
1324

unknown's avatar
unknown committed
1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448
PollGuard::PollGuard(TransporterFacade *tp, NdbWaiter *aWaiter,
                     Uint32 block_no)
{
  m_tp= tp;
  m_waiter= aWaiter;
  m_locked= true;
  m_block_no= block_no;
  tp->lock_mutex();
}

/*
  This is a common routine for possibly forcing the send of buffered signals
  and receiving response the thread is waiting for. It is designed to be
  useful from:
  1) PK, UK lookups using the asynchronous interface
     This routine uses the wait_for_input routine instead since it has
     special end conditions due to the asynchronous nature of its usage.
  2) Scans
  3) dictSignal
  It uses a NdbWaiter object to wait on the events and this object is
  linked into the conditional wait queue. Thus this object contains
  a reference to its place in the queue.

  It replaces the method receiveResponse previously used on the Ndb object
*/
int PollGuard::wait_n_unlock(int wait_time, NodeId nodeId, Uint32 state,
                             bool forceSend)
{
  int ret_val;
  m_waiter->set_node(nodeId);
  m_waiter->set_state(state);
  ret_val= wait_for_input_in_loop(wait_time, forceSend);
  unlock_and_signal();
  return ret_val;
}

int PollGuard::wait_scan(int wait_time, NodeId nodeId, bool forceSend)
{
  m_waiter->set_node(nodeId);
  m_waiter->set_state(WAIT_SCAN);
  return wait_for_input_in_loop(wait_time, forceSend);
}

int PollGuard::wait_for_input_in_loop(int wait_time, bool forceSend)
{
  int ret_val, response_time;
  if (forceSend)
    m_tp->forceSend(m_block_no);
  else
    m_tp->checkForceSend(m_block_no);
  if (wait_time == -1) //Means wait forever
    response_time= WAITFOR_RESPONSE_TIMEOUT;
  else
    response_time= wait_time;
  NDB_TICKS curr_time = NdbTick_CurrentMillisecond();
  NDB_TICKS max_time = curr_time + (NDB_TICKS)wait_time;
  do
  {
    wait_for_input(response_time);
    Uint32 state= m_waiter->get_state();
    if (state == NO_WAIT)
    {
      return 0;
    }
    else if (state == WAIT_NODE_FAILURE)
    {
      ret_val= -2;
      break;
    }
    if (wait_time == -1)
    {
#ifdef VM_TRACE
      ndbout << "Waited WAITFOR_RESPONSE_TIMEOUT, continuing wait" << endl;
#endif
      continue;
    }
    wait_time= max_time - NdbTick_CurrentMillisecond();
    if (wait_time <= 0)
    {
#ifdef VM_TRACE
      ndbout << "Time-out state is " << m_waiter->get_state() << endl;
#endif
      m_waiter->set_state(WST_WAIT_TIMEOUT);
      ret_val= -1;
      break;
    }
  } while (1);
#ifdef VM_TRACE
  ndbout << "ERR: receiveResponse - theImpl->theWaiter.m_state = ";
  ndbout << m_waiter->get_state() << endl;
#endif
  m_waiter->set_state(NO_WAIT);
  return ret_val;
}

void PollGuard::wait_for_input(int wait_time)
{
  NdbWaiter *t_poll_owner= m_tp->get_poll_owner();
  if (t_poll_owner != NULL && t_poll_owner != m_waiter)
  {
    /*
      We didn't get hold of the poll "right". We will sleep on a
      conditional mutex until the thread owning the poll "right"
      will wake us up after all data is received. If no data arrives
      we will wake up eventually due to the timeout.
      After receiving all data we take the object out of the cond wait
      queue if it hasn't happened already. It is usually already out of the
      queue but at time-out it could be that the object is still there.
    */
    Uint32 cond_wait_index= m_tp->put_in_cond_wait_queue(m_waiter);
    m_waiter->wait(wait_time);
    if (m_waiter->get_cond_wait_index() != TransporterFacade::MAX_NO_THREADS)
    {
      m_tp->remove_from_cond_wait_queue(m_waiter);
    }
  }
  else
  {
    /*
      We got the poll "right" and we poll until data is received. After
      receiving data we will check if all data is received, if not we
      poll again.
    */
#ifdef NDB_SHM_TRANSPORTER
1449 1450 1451 1452 1453 1454
    /*
      If shared memory transporters are used we need to set our sigmask
      such that we wake up also on interrupts on the shared memory
      interrupt signal.
    */
    NdbThread_set_shm_sigmask(FALSE);
unknown's avatar
unknown committed
1455
#endif
1456 1457
    m_tp->set_poll_owner(m_waiter);
    m_waiter->set_poll_owner(true);
unknown's avatar
unknown committed
1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485
    m_tp->external_poll((Uint32)wait_time);
  }
}

void PollGuard::unlock_and_signal()
{
  NdbWaiter *t_signal_cond_waiter= 0;
  if (!m_locked)
    return;
  /*
   When completing the poll for this thread we must return the poll
   ownership if we own it. We will give it to the last thread that
   came here (the most recent) which is likely to be the one also
   last to complete. We will remove that thread from the conditional
   wait queue and set him as the new owner of the poll "right".
   We will wait however with the signal until we have unlocked the
   mutex for performance reasons.
   See Stevens book on Unix NetworkProgramming: The Sockets Networking
   API Volume 1 Third Edition on page 703-704 for a discussion on this
   subject.
  */
  if (m_tp->get_poll_owner() == m_waiter)
  {
#ifdef NDB_SHM_TRANSPORTER
    /*
      If shared memory transporters are used we need to reset our sigmask
      since we are no longer the thread to receive interrupts.
    */
unknown's avatar
Fixes  
unknown committed
1486
    NdbThread_set_shm_sigmask(TRUE);
unknown's avatar
unknown committed
1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499
#endif
    m_waiter->set_poll_owner(false);
    t_signal_cond_waiter= m_tp->rem_last_from_cond_wait_queue();
    m_tp->set_poll_owner(t_signal_cond_waiter);
    if (t_signal_cond_waiter)
      t_signal_cond_waiter->set_poll_owner(true);
  }
  m_tp->unlock_mutex();
  if (t_signal_cond_waiter)
    t_signal_cond_waiter->cond_signal();
  m_locked=false;
}

unknown's avatar
unknown committed
1500 1501
template class Vector<NodeStatusFunction>;
template class Vector<TransporterFacade::ThreadData::Object_Execute>;