/* Copyright (C) 2003 MySQL AB This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef SCI_Transporter_H #define SCI_Transporter_H #include "Transporter.hpp" #include "SHM_Buffer.hpp" #include <sisci_api.h> #include <sisci_error.h> #include <sisci_types.h> #include <ndb_types.h> /** * The SCI Transporter * * The design goal of the SCI transporter is to deliver high performance * data transfers (low latency, high bandwidth) combined with very high * availability (failover support). * High performance is an inherit feature of SCI and the, whereas failover * support is implemented at the application level. * In SCI the programming model is similar to the shared memory paradigm. * A process on one node (A) allocates a memory segment and import the * segment to its virtual address space. Another node (B) can connect to * the segment and map this segment into its virtual address space. * If A writes data to the segment, then B can read it and vice versa, through * ordinary loads and stores. This is also called PIO (programmable IO), and * is one thing that distinguish SCI from other interconnects such as, * ethernet, Gig-e, Myrinet, and Infiniband. By using PIO, lower network * latency is achieved, compared to the interconnects mentioned above. * In order for NDB to utilize SCI, the SCI transporter relies on the * SISCI api. The SISCI api provides a high level abstraction to the low * level SCI driver called PCISCI driver. * The SISCI api provides functions to setup, export, and import * memory segments in a process virtual address space, and also functions to * guarantee the correctness of data transfers between nodes. Basically, the * * In NDB Cluster, each SCI transporter creates a local segment * that is mapped into the virtual address space. After the creation of the * local segment, the SCI transporter connects to a segment created by another * transporter at a remote node, and the maps the remote segment into its * virtual address space. However, since NDB Cluster relies on redundancy * at the network level, by using dual SCI adapters communication can be * maintained even if one of the adapter cards fails (or anything on the * network this adapter card exists in e.g. an SCI switch failure). * */ /** * class SCITransporter * @brief - main class for the SCI transporter. */ class SCI_Transporter : public Transporter { friend class TransporterRegistry; public: /** * Init the transporter. Allocate sendbuffers and open a SCI virtual device * for each adapter. * @return true if successful, otherwize false */ bool initTransporter(); /** * Creates a sequence for error checking. * @param adapterid the adapter on which to create a new sequence. * @return SCI_ERR_OK if ok, otherwize something else. */ sci_error_t createSequence(Uint32 adapterid); /** Initiate Local Segment: create a memory segment, * prepare a memory segment, map the local segment * into memory space and make segment available. * @return SCI_ERR_OK if ok, otherwize something else. */ sci_error_t initLocalSegment(); /** * Calculate the segment id for the remote segment * @param localNodeId - local id (e.g. 1 = mgm , 2 = ndb.2 etc.) * @param remoteNodeId - remote id (e.g. 1 = mgm , 2 = ndb.2 etc.) * @return a segment id */ Uint32 remoteSegmentId(Uint16 localNodeId, Uint16 remoteNodeId); // Get local segment id (inline) Uint32 hostSegmentId(Uint16 localNodeId, Uint16 remoteNodeId); /** * closeSCI closes the SCI virtual device */ void closeSCI(); /** * Check the status of the remote node, * if it is connected or has disconnected * @return true if connected, otherwize false. */ bool checkConnected(); /** * Check if the segment are properly connected to each other (remotely * and locally). * @return True if the both the local segment is mapped and the * remote segment is mapped. Otherwize false. */ bool getConnectionStatus(); virtual Uint32 get_free_buffer() const; private: SCI_Transporter(TransporterRegistry &t_reg, const char *local_host, const char *remote_host, int port, bool isMgmConnection, Uint32 packetSize, Uint32 bufferSize, Uint32 nAdapters, Uint16 remoteSciNodeId0, Uint16 remoteSciNodeId1, NodeId localNodeID, NodeId remoteNodeID, NodeId serverNodeId, bool checksum, bool signalId, Uint32 reportFreq = 4096); /** * Destructor. Disconnects the transporter. */ ~SCI_Transporter(); bool m_mapped; bool m_initLocal; bool m_sciinit; Uint32 m_failCounter; /** * For statistics on transfered packets */ //#ifdef DEBUG_TRANSPORTER #if 1 Uint32 i1024; Uint32 i2048; Uint32 i2049; Uint32 i10242048; Uint32 i20484096; Uint32 i4096; Uint32 i4097; #endif volatile Uint32 * m_localStatusFlag; volatile Uint32 * m_remoteStatusFlag; volatile Uint32 * m_remoteStatusFlag2; struct { Uint32 * m_buffer; // The buffer Uint32 m_dataSize; // No of words in buffer Uint32 m_sendBufferSize; // Buffer size Uint32 m_forceSendLimit; // Send when buffer is this full } m_sendBuffer; SHM_Reader * reader; SHM_Writer * writer; SHM_Writer * writer2; /** * Statistics */ Uint32 m_reportFreq; Uint32 m_adapters; Uint32 m_numberOfRemoteNodes; Uint16 m_remoteNodes[2]; typedef struct SciAdapter { sci_desc_t scidesc; Uint32 localSciNodeId; bool linkStatus; } SciAdapter; SciAdapter* sciAdapters; Uint32 m_ActiveAdapterId; Uint32 m_StandbyAdapterId; typedef struct sourceSegm { sci_local_segment_t localHandle; // Handle to local segment to be mapped struct localHandleMap { sci_map_t map; // Handle to the new mapped segment. // 2 = max adapters in one node } lhm[2]; volatile void *mappedMemory; // Used when reading } sourceSegm; typedef struct targetSegm { struct remoteHandleMap { sci_remote_segment_t remoteHandle; //Handle to local segment to be mapped sci_map_t map; //Handle to the new mapped segment } rhm[2]; sci_sequence_status_t m_SequenceStatus; // Used for error checking sci_sequence_t sequence; volatile void * mappedMemory; // Used when writing SHM_Writer * writer; } targetSegm; sci_sequence_status_t m_SequenceStatus; // Used for error checking // Shared between all SCI users active=(either prim or second) sci_desc_t activeSCIDescriptor; sourceSegm* m_SourceSegm; // Local segment reference targetSegm* m_TargetSegm; // Remote segment reference Uint32 m_LocalAdapterId; // Adapter Id Uint16 m_LocalSciNodeId; // The SCI-node Id of this machine (adapter 0) Uint16 m_LocalSciNodeId1; // The SCI-node Id of this machine (adapter 1) Uint16 m_RemoteSciNodeId; // The SCI-node Id of remote machine (adapter 0) Uint16 m_RemoteSciNodeId1; // The SCI-node Id of remote machine (adapter 1) Uint32 m_PacketSize; // The size of each data packet Uint32 m_BufferSize; // Mapped SCI buffer size Uint32 * getWritePtr(Uint32 lenBytes, Uint32 prio); void updateWritePtr(Uint32 lenBytes, Uint32 prio); /** * doSend. Copies the data from the source (the send buffer) to the * shared mem. segment. * Sequences are used for error checking. * If an error occurs, the transfer is retried. * If the link that we need to swap to is broken, we will disconnect. * @return Returns true if datatransfer ok. If not retriable * then false is returned. */ bool doSend(); /** * @param adapterNo the adapter for which to retrieve the node id. * @return Returns the node id for an adapter. */ Uint32 getLocalNodeId(Uint32 adapterNo); bool hasDataToRead() const { return reader->empty() == false; } bool hasDataToSend() const { return m_sendBuffer.m_dataSize > 0; } /** * Make the local segment unavailable, no new connections will be accepted. * @return Returns true if the segment was successfully disconnected. */ bool disconnectLocal(); /** * Make the local segment unavailable, no new connections will be accepted. * @return Returns true if the segment was successfully disconnected. */ bool disconnectRemote(); void resetToInitialState(); /** * It is always possible to send data with SCI! * @return True (always) */ bool sendIsPossible(struct timeval * timeout); void getReceivePtr(Uint32 ** ptr, Uint32 ** eod){ reader->getReadPtr(* ptr, * eod); } void updateReceivePtr(Uint32 *ptr){ reader->updateReadPtr(ptr); } /** * Corresponds to SHM_Transporter::setupBuffers() * Initiates the start pointer of the buffer and read pointers. * Initiate the localSegment for the SHM reader. */ void setupLocalSegment(); /** * Initiate the remoteSegment for the SHM writer */ void setupRemoteSegment(); /** * Set the connect flag in the remote memory segment (write through) */ void setConnected(); /** * Set the disconnect flag in the remote memory segment (write through) */ void setDisconnect(); /** * Check if there is a link between the adapter and the switch * @param adapterNo the adapter for which to retrieve the link status. * @return Returns true if there is a link between adapter and switch. * Otherwize false is returned and the cables must be checked. */ bool getLinkStatus(Uint32 adapterNo); /** * failoverShmWriter takes the state of the active writer and inserts into * the standby writer. */ void failoverShmWriter(); bool init_local(); bool init_remote(); protected: /** Perform a connection between segment * This is a client node, trying to connect to a remote segment. * @param timeout, the time the connect thread sleeps before * retrying. * @return Returns true on success, otherwize falser */ bool connect_server_impl(NDB_SOCKET_TYPE sockfd); bool connect_client_impl(NDB_SOCKET_TYPE sockfd); /** * We will disconnect if: * -# the other node has disconnected from us * -# unrecoverable error in transmission, on both adapters * -# if we are shutdown properly */ void disconnectImpl(); static bool initSCI(); }; /** The theLocalAdapterId combined with the theRemoteNodeId constructs * (SCI ids)* a unique identifier for the local segment */ inline Uint32 SCI_Transporter::hostSegmentId(Uint16 SciLocalNodeId, Uint16 SciRemoteNodeId) { return (SciLocalNodeId << 16) | SciRemoteNodeId; } /** The theLocalAdapterId combined with the theRemoteNodeId constructs * (SCI ids)* a unique identifier for the remote segment */ inline Uint32 SCI_Transporter::remoteSegmentId(Uint16 SciLocalNodeId, Uint16 SciRemoteNodeId) { return (SciRemoteNodeId << 16) | SciLocalNodeId; } #endif