DbdihMain.cpp
上传用户:romrleung
上传日期:2022-05-23
资源大小:18897k
文件大小:494k
- jam();
- /* --------------------------------------------------------------------- */
- /* ERROR IN FIRST FILE, TRY THE SECOND FILE. */
- /* --------------------------------------------------------------------- */
- filePtr.i = crestartInfoFile[1];
- ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
- openFileRw(signal, filePtr);
- filePtr.p->reqStatus = FileRecord::OPENING_GCP;
- return;
- }//if
- /* ----------------------------------------------------------------------- */
- /* WE DISCOVERED A FAILURE WITH THE SECOND FILE AS WELL. THIS IS A */
- /* SERIOUS PROBLEM. REPORT FAILURE TO NDBCNTR. */
- /* ----------------------------------------------------------------------- */
- sendSignal(cntrlblockref, GSN_DIH_RESTARTREF, signal, 1, JBB);
- }//Dbdih::closingGcpCrashLab()
- /*****************************************************************************/
- /* ------------------------------------------------------------------------- */
- /* THIS IS AN INITIAL RESTART. WE WILL CREATE THE TWO FILES DESCRIBING */
- /* THE GLOBAL CHECKPOINTS THAT ARE RESTORABLE. */
- /* ------------------------------------------------------------------------- */
- /*****************************************************************************/
- void Dbdih::initGciFilesLab(Signal* signal)
- {
- FileRecordPtr filePtr;
- filePtr.i = crestartInfoFile[0];
- ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
- createFileRw(signal, filePtr);
- filePtr.p->reqStatus = FileRecord::CREATING_GCP;
- }//Dbdih::initGciFilesLab()
- /* ------------------------------------------------------------------------- */
- /* GLOBAL CHECKPOINT FILE HAVE BEEN SUCCESSFULLY CREATED. */
- /* ------------------------------------------------------------------------- */
- void Dbdih::creatingGcpLab(Signal* signal, FileRecordPtr filePtr)
- {
- if (filePtr.i == crestartInfoFile[0]) {
- jam();
- /* --------------------------------------------------------------------- */
- /* IF CREATED FIRST THEN ALSO CREATE THE SECOND FILE. */
- /* --------------------------------------------------------------------- */
- filePtr.i = crestartInfoFile[1];
- ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
- createFileRw(signal, filePtr);
- filePtr.p->reqStatus = FileRecord::CREATING_GCP;
- } else {
- jam();
- /* --------------------------------------------------------------------- */
- /* BOTH FILES HAVE BEEN CREATED. NOW WRITE THE INITIAL DATA TO BOTH */
- /* OF THE FILES. */
- /* --------------------------------------------------------------------- */
- filePtr.i = crestartInfoFile[0];
- ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
- writeRestorableGci(signal, filePtr);
- filePtr.p->reqStatus = FileRecord::WRITE_INIT_GCP;
- }//if
- }//Dbdih::creatingGcpLab()
- /* ------------------------------------------------------------------------- */
- /* WE HAVE SUCCESSFULLY WRITTEN A GCI FILE. */
- /* ------------------------------------------------------------------------- */
- void Dbdih::writeInitGcpLab(Signal* signal, FileRecordPtr filePtr)
- {
- filePtr.p->reqStatus = FileRecord::IDLE;
- if (filePtr.i == crestartInfoFile[0]) {
- jam();
- /* --------------------------------------------------------------------- */
- /* WE HAVE WRITTEN THE FIRST FILE NOW ALSO WRITE THE SECOND FILE. */
- /* --------------------------------------------------------------------- */
- filePtr.i = crestartInfoFile[1];
- ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
- writeRestorableGci(signal, filePtr);
- filePtr.p->reqStatus = FileRecord::WRITE_INIT_GCP;
- } else {
- /* --------------------------------------------------------------------- */
- /* WE HAVE WRITTEN BOTH FILES. LEAVE BOTH FILES OPEN AND CONFIRM OUR */
- /* PART OF THE INITIAL START. */
- /* --------------------------------------------------------------------- */
- if (isMaster()) {
- jam();
- /*---------------------------------------------------------------------*/
- // IN MASTER NODES THE START REQUEST IS RECEIVED FROM NDBCNTR AND WE MUST
- // RESPOND WHEN COMPLETED.
- /*---------------------------------------------------------------------*/
- signal->theData[0] = reference();
- sendSignal(cndbStartReqBlockref, GSN_NDB_STARTCONF, signal, 1, JBB);
- } else {
- jam();
- ndbsttorry10Lab(signal, __LINE__);
- return;
- }//if
- }//if
- }//Dbdih::writeInitGcpLab()
- /*****************************************************************************/
- /* ********** NODES DELETION MODULE *************/
- /*****************************************************************************/
- /*---------------------------------------------------------------------------*/
- /* LOGIC FOR NODE FAILURE */
- /*---------------------------------------------------------------------------*/
- void Dbdih::execNODE_FAILREP(Signal* signal)
- {
- Uint32 i;
- Uint32 failedNodes[MAX_NDB_NODES];
- jamEntry();
- NodeFailRep * const nodeFail = (NodeFailRep *)&signal->theData[0];
- cfailurenr = nodeFail->failNo;
- Uint32 newMasterId = nodeFail->masterNodeId;
- const Uint32 noOfFailedNodes = nodeFail->noOfNodes;
- /*-------------------------------------------------------------------------*/
- // The first step is to convert from a bit mask to an array of failed nodes.
- /*-------------------------------------------------------------------------*/
- Uint32 index = 0;
- for (i = 1; i < MAX_NDB_NODES; i++) {
- jam();
- if(NodeBitmask::get(nodeFail->theNodes, i)){
- jam();
- failedNodes[index] = i;
- index++;
- }//if
- }//for
- ndbrequire(noOfFailedNodes == index);
- ndbrequire(noOfFailedNodes - 1 < MAX_NDB_NODES);
- /*-------------------------------------------------------------------------*/
- // The second step is to update the node status of the failed nodes, remove
- // them from the alive node list and put them into the dead node list. Also
- // update the number of nodes on-line.
- // We also set certain state variables ensuring that the node no longer is
- // used in transactions and also mark that we received this signal.
- /*-------------------------------------------------------------------------*/
- for (i = 0; i < noOfFailedNodes; i++) {
- jam();
- NodeRecordPtr TNodePtr;
- TNodePtr.i = failedNodes[i];
- ptrCheckGuard(TNodePtr, MAX_NDB_NODES, nodeRecord);
- TNodePtr.p->useInTransactions = false;
- TNodePtr.p->m_inclDihLcp = false;
- TNodePtr.p->recNODE_FAILREP = ZTRUE;
- if (TNodePtr.p->nodeStatus == NodeRecord::ALIVE) {
- jam();
- con_lineNodes--;
- TNodePtr.p->nodeStatus = NodeRecord::DIED_NOW;
- removeAlive(TNodePtr);
- insertDeadNode(TNodePtr);
- }//if
- }//for
- /*-------------------------------------------------------------------------*/
- // Verify that we can continue to operate the cluster. If we cannot we will
- // not return from checkEscalation.
- /*-------------------------------------------------------------------------*/
- checkEscalation();
- /*------------------------------------------------------------------------*/
- // Verify that a starting node has also crashed. Reset the node start record.
- /*-------------------------------------------------------------------------*/
- if (c_nodeStartMaster.startNode != RNIL) {
- ndbrequire(getNodeStatus(c_nodeStartMaster.startNode)!= NodeRecord::ALIVE);
- }//if
- /*--------------------------------------------------*/
- /* */
- /* WE CHANGE THE REFERENCE TO MASTER DIH */
- /* BLOCK AND POINTER AT THIS PLACE IN THE CODE*/
- /*--------------------------------------------------*/
- Uint32 oldMasterId = cmasterNodeId;
- BlockReference oldMasterRef = cmasterdihref;
- cmasterdihref = calcDihBlockRef(newMasterId);
- cmasterNodeId = newMasterId;
- const bool masterTakeOver = (oldMasterId != newMasterId);
- for(i = 0; i < noOfFailedNodes; i++) {
- NodeRecordPtr failedNodePtr;
- failedNodePtr.i = failedNodes[i];
- ptrCheckGuard(failedNodePtr, MAX_NDB_NODES, nodeRecord);
- Uint32 activeTakeOverPtr = findTakeOver(failedNodes[i]);
- if (oldMasterRef == reference()) {
- /*-------------------------------------------------------*/
- // Functions that need to be called only for master nodes.
- /*-------------------------------------------------------*/
- checkCopyTab(failedNodePtr);
- checkStopPermMaster(signal, failedNodePtr);
- checkWaitGCPMaster(signal, failedNodes[i]);
- checkTakeOverInMasterAllNodeFailure(signal, failedNodePtr);
- checkTakeOverInMasterCopyNodeFailure(signal, failedNodePtr.i);
- checkTakeOverInMasterStartNodeFailure(signal, activeTakeOverPtr);
- checkGcpOutstanding(signal, failedNodePtr.i);
- } else {
- jam();
- /*-----------------------------------------------------------*/
- // Functions that need to be called only for nodes that were
- // not master before these failures.
- /*-----------------------------------------------------------*/
- checkStopPermProxy(signal, failedNodes[i]);
- checkWaitGCPProxy(signal, failedNodes[i]);
- if (isMaster()) {
- /*-----------------------------------------------------------*/
- // We take over as master since old master has failed
- /*-----------------------------------------------------------*/
- handleTakeOverNewMaster(signal, activeTakeOverPtr);
- } else {
- /*-----------------------------------------------------------*/
- // We are not master and will not become master.
- /*-----------------------------------------------------------*/
- checkTakeOverInNonMasterStartNodeFailure(signal, activeTakeOverPtr);
- }//if
- }//if
- /*--------------------------------------------------*/
- // Functions that need to be called for all nodes.
- /*--------------------------------------------------*/
- checkStopMe(signal, failedNodePtr);
- failedNodeLcpHandling(signal, failedNodePtr);
- checkWaitDropTabFailedLqh(signal, failedNodePtr.i, 0); // 0 = start w/ tab 0
- startRemoveFailedNode(signal, failedNodePtr);
- /**
- * This is the last function called
- * It modifies failedNodePtr.p->nodeStatus
- */
- failedNodeSynchHandling(signal, failedNodePtr);
- }//for
-
- if(masterTakeOver){
- jam();
- startLcpMasterTakeOver(signal, oldMasterId);
- startGcpMasterTakeOver(signal, oldMasterId);
- if(getNodeState().getNodeRestartInProgress()){
- jam();
- progError(__LINE__,
- ERR_SYSTEM_ERROR,
- "Unhandle master failure during node restart");
- }
- }
-
- if (isMaster()) {
- jam();
- setNodeRestartInfoBits();
- }//if
- }//Dbdih::execNODE_FAILREP()
- void Dbdih::checkCopyTab(NodeRecordPtr failedNodePtr)
- {
- jam();
- if(c_nodeStartMaster.startNode != failedNodePtr.i){
- jam();
- return;
- }
-
- switch(c_nodeStartMaster.m_outstandingGsn){
- case GSN_COPY_TABREQ:
- jam();
- ndbrequire(c_COPY_TABREQ_Counter.isWaitingFor(failedNodePtr.i));
- releaseTabPages(failedNodePtr.p->activeTabptr);
- c_COPY_TABREQ_Counter.clearWaitingFor(failedNodePtr.i);
- c_nodeStartMaster.wait = ZFALSE;
- break;
- case GSN_START_INFOREQ:
- case GSN_START_PERMCONF:
- case GSN_DICTSTARTREQ:
- case GSN_START_MECONF:
- jam();
- break;
- default:
- ndbout_c("outstanding gsn: %s(%d)",
- getSignalName(c_nodeStartMaster.m_outstandingGsn),
- c_nodeStartMaster.m_outstandingGsn);
- ndbrequire(false);
- }
-
- nodeResetStart();
- }//Dbdih::checkCopyTab()
- void Dbdih::checkStopMe(Signal* signal, NodeRecordPtr failedNodePtr)
- {
- jam();
- if (c_STOP_ME_REQ_Counter.isWaitingFor(failedNodePtr.i)){
- jam();
- ndbrequire(c_stopMe.clientRef != 0);
- StopMeConf * const stopMeConf = (StopMeConf *)&signal->theData[0];
- stopMeConf->senderRef = calcDihBlockRef(failedNodePtr.i);
- stopMeConf->senderData = c_stopMe.clientData;
- sendSignal(reference(), GSN_STOP_ME_CONF, signal,
- StopMeConf::SignalLength, JBB);
- }//if
- }//Dbdih::checkStopMe()
- void Dbdih::checkStopPermMaster(Signal* signal, NodeRecordPtr failedNodePtr)
- {
- DihSwitchReplicaRef* const ref = (DihSwitchReplicaRef*)&signal->theData[0];
- jam();
- if (c_DIH_SWITCH_REPLICA_REQ_Counter.isWaitingFor(failedNodePtr.i)){
- jam();
- ndbrequire(c_stopPermMaster.clientRef != 0);
- ref->senderNode = failedNodePtr.i;
- ref->errorCode = StopPermRef::NF_CausedAbortOfStopProcedure;
- sendSignal(reference(), GSN_DIH_SWITCH_REPLICA_REF, signal,
- DihSwitchReplicaRef::SignalLength, JBB);
- return;
- }//if
- }//Dbdih::checkStopPermMaster()
- void Dbdih::checkStopPermProxy(Signal* signal, NodeId failedNodeId)
- {
- jam();
- if(c_stopPermProxy.clientRef != 0 &&
- refToNode(c_stopPermProxy.masterRef) == failedNodeId){
-
- /**
- * The master has failed report to proxy-client
- */
- jam();
- StopPermRef* const ref = (StopPermRef*)&signal->theData[0];
-
- ref->senderData = c_stopPermProxy.clientData;
- ref->errorCode = StopPermRef::NF_CausedAbortOfStopProcedure;
- sendSignal(c_stopPermProxy.clientRef, GSN_STOP_PERM_REF, signal, 2, JBB);
- c_stopPermProxy.clientRef = 0;
- }//if
- }//Dbdih::checkStopPermProxy()
- void
- Dbdih::checkTakeOverInMasterAllNodeFailure(Signal* signal,
- NodeRecordPtr failedNodePtr)
- {
- //------------------------------------------------------------------------
- // This code is used to handle the failure of "all" nodes during the
- // take over when "all" nodes are informed about state changes in
- // the take over protocol.
- //--------------------------------------------------------------------------
- if (c_START_TOREQ_Counter.isWaitingFor(failedNodePtr.i)){
- jam();
- StartToConf * const conf = (StartToConf *)&signal->theData[0];
- conf->userPtr = c_startToLock;
- conf->sendingNodeId = failedNodePtr.i;
- conf->startingNodeId = getStartNode(c_startToLock);
- sendSignal(reference(), GSN_START_TOCONF, signal,
- StartToConf::SignalLength, JBB);
- }//if
- if (c_CREATE_FRAGREQ_Counter.isWaitingFor(failedNodePtr.i)){
- jam();
- CreateFragConf * const conf = (CreateFragConf *)&signal->theData[0];
- TakeOverRecordPtr takeOverPtr;
- takeOverPtr.i = c_createFragmentLock;
- ptrCheckGuard(takeOverPtr, MAX_NDB_NODES, takeOverRecord);
- conf->userPtr = takeOverPtr.i;
- conf->tableId = takeOverPtr.p->toCurrentTabref;
- conf->fragId = takeOverPtr.p->toCurrentFragid;
- conf->sendingNodeId = failedNodePtr.i;
- conf->startingNodeId = takeOverPtr.p->toStartingNode;
- sendSignal(reference(), GSN_CREATE_FRAGCONF, signal,
- CreateFragConf::SignalLength, JBB);
- }//if
- if (c_UPDATE_TOREQ_Counter.isWaitingFor(failedNodePtr.i)){
- jam();
- UpdateToConf * const conf = (UpdateToConf *)&signal->theData[0];
- conf->userPtr = c_updateToLock;
- conf->sendingNodeId = failedNodePtr.i;
- conf->startingNodeId = getStartNode(c_updateToLock);
- sendSignal(reference(), GSN_UPDATE_TOCONF, signal,
- UpdateToConf::SignalLength, JBB);
- }//if
-
- if (c_END_TOREQ_Counter.isWaitingFor(failedNodePtr.i)){
- jam();
- EndToConf * const conf = (EndToConf *)&signal->theData[0];
- conf->userPtr = c_endToLock;
- conf->sendingNodeId = failedNodePtr.i;
- conf->startingNodeId = getStartNode(c_endToLock);
- sendSignal(reference(), GSN_END_TOCONF, signal,
- EndToConf::SignalLength, JBB);
- }//if
- }//Dbdih::checkTakeOverInMasterAllNodeFailure()
- void Dbdih::checkTakeOverInMasterCopyNodeFailure(Signal* signal,
- Uint32 failedNodeId)
- {
- //---------------------------------------------------------------------------
- // This code is used to handle failure of the copying node during a take over
- //---------------------------------------------------------------------------
- TakeOverRecordPtr takeOverPtr;
- for (Uint32 i = 0; i < MAX_NDB_NODES; i++) {
- jam();
- takeOverPtr.i = i;
- ptrCheckGuard(takeOverPtr, MAX_NDB_NODES, takeOverRecord);
- if ((takeOverPtr.p->toMasterStatus == TakeOverRecord::COPY_FRAG) &&
- (takeOverPtr.p->toCopyNode == failedNodeId)) {
- jam();
- /**
- * The copying node failed but the system is still operational.
- * We restart the copy process by selecting a new copy node.
- * We do not need to add a fragment however since it is already added.
- * We start again from the prepare create fragment phase.
- */
- prepareSendCreateFragReq(signal, takeOverPtr.i);
- }//if
- }//for
- }//Dbdih::checkTakeOverInMasterCopyNodeFailure()
- void Dbdih::checkTakeOverInMasterStartNodeFailure(Signal* signal,
- Uint32 takeOverPtrI)
- {
- jam();
- if (takeOverPtrI == RNIL) {
- jam();
- return;
- }
- //-----------------------------------------------------------------------
- // We are the master and the starting node has failed during a take over.
- // We need to handle this failure in different ways depending on the state.
- //-----------------------------------------------------------------------
- TakeOverRecordPtr takeOverPtr;
- takeOverPtr.i = takeOverPtrI;
- ptrCheckGuard(takeOverPtr, MAX_NDB_NODES, takeOverRecord);
- bool ok = false;
- switch (takeOverPtr.p->toMasterStatus) {
- case TakeOverRecord::IDLE:
- //-----------------------------------------------------------------------
- // The state cannot be idle when it has a starting node.
- //-----------------------------------------------------------------------
- ndbrequire(false);
- break;
- case TakeOverRecord::TO_WAIT_START_TAKE_OVER:
- jam();
- case TakeOverRecord::TO_START_COPY:
- jam();
- case TakeOverRecord::TO_START_COPY_ONGOING:
- jam();
- case TakeOverRecord::TO_WAIT_START:
- jam();
- case TakeOverRecord::TO_WAIT_PREPARE_CREATE:
- jam();
- case TakeOverRecord::TO_WAIT_UPDATE_TO:
- jam();
- case TakeOverRecord::TO_WAIT_COMMIT_CREATE:
- jam();
- case TakeOverRecord::TO_END_COPY:
- jam();
- case TakeOverRecord::TO_END_COPY_ONGOING:
- jam();
- case TakeOverRecord::TO_WAIT_ENDING:
- jam();
- //-----------------------------------------------------------------------
- // We will not do anything since an internal signal process is outstanding.
- // When the signal arrives the take over will be released.
- //-----------------------------------------------------------------------
- ok = true;
- break;
- case TakeOverRecord::STARTING:
- jam();
- ok = true;
- c_startToLock = RNIL;
- c_START_TOREQ_Counter.clearWaitingFor();
- endTakeOver(takeOverPtr.i);
- break;
- case TakeOverRecord::TO_UPDATE_TO:
- jam();
- ok = true;
- c_updateToLock = RNIL;
- c_UPDATE_TOREQ_Counter.clearWaitingFor();
- endTakeOver(takeOverPtr.i);
- break;
- case TakeOverRecord::ENDING:
- jam();
- ok = true;
- c_endToLock = RNIL;
- c_END_TOREQ_Counter.clearWaitingFor();
- endTakeOver(takeOverPtr.i);
- break;
- case TakeOverRecord::COMMIT_CREATE:
- ok = true;
- jam();
- {// We have mutex
- Mutex m(signal, c_mutexMgr, takeOverPtr.p->m_switchPrimaryMutexHandle);
- m.unlock(); // Ignore result
- }
- // Fall through
- case TakeOverRecord::PREPARE_CREATE:
- ok = true;
- jam();
- c_createFragmentLock = RNIL;
- c_CREATE_FRAGREQ_Counter.clearWaitingFor();
- endTakeOver(takeOverPtr.i);
- break;
- case TakeOverRecord::LOCK_MUTEX:
- ok = true;
- jam();
- // Lock mutex will return and do endTakeOver
- break;
-
- //-----------------------------------------------------------------------
- // Signals are outstanding to external nodes. These signals carry the node
- // id of the starting node and will not use the take over record if the
- // starting node has failed.
- //-----------------------------------------------------------------------
- case TakeOverRecord::COPY_FRAG:
- ok = true;
- jam();
- //-----------------------------------------------------------------------
- // The starting node will discover the problem. We will receive either
- // COPY_FRAGREQ or COPY_FRAGCONF and then we can release the take over
- // record and end the process. If the copying node should also die then
- // we will try to send prepare create fragment and will then discover
- // that the starting node has failed.
- //-----------------------------------------------------------------------
- break;
- case TakeOverRecord::COPY_ACTIVE:
- ok = true;
- jam();
- //-----------------------------------------------------------------------
- // In this we are waiting for a signal from the starting node. Thus we
- // can release the take over record and end the process.
- //-----------------------------------------------------------------------
- endTakeOver(takeOverPtr.i);
- break;
- case TakeOverRecord::WAIT_LCP:
- ok = true;
- jam();
- //-----------------------------------------------------------------------
- //-----------------------------------------------------------------------
- endTakeOver(takeOverPtr.i);
- break;
- /**
- * The following are states that it should not be possible to "be" in
- */
- case TakeOverRecord::SELECTING_NEXT:
- jam();
- case TakeOverRecord::TO_COPY_COMPLETED:
- jam();
- ndbrequire(false);
- }
- if(!ok){
- jamLine(takeOverPtr.p->toSlaveStatus);
- ndbrequire(ok);
- }
- }//Dbdih::checkTakeOverInMasterStartNodeFailure()
- void Dbdih::checkTakeOverInNonMasterStartNodeFailure(Signal* signal,
- Uint32 takeOverPtrI)
- {
- jam();
- if (takeOverPtrI == RNIL) {
- jam();
- return;
- }
- //-----------------------------------------------------------------------
- // We are not master and not taking over as master. A take over was ongoing
- // but the starting node has now failed. Handle it according to the state
- // of the take over.
- //-----------------------------------------------------------------------
- TakeOverRecordPtr takeOverPtr;
- takeOverPtr.i = takeOverPtrI;
- ptrCheckGuard(takeOverPtr, MAX_NDB_NODES, takeOverRecord);
- bool ok = false;
- switch (takeOverPtr.p->toSlaveStatus) {
- case TakeOverRecord::TO_SLAVE_IDLE:
- ndbrequire(false);
- break;
- case TakeOverRecord::TO_SLAVE_STARTED:
- jam();
- case TakeOverRecord::TO_SLAVE_CREATE_PREPARE:
- jam();
- case TakeOverRecord::TO_SLAVE_COPY_FRAG_COMPLETED:
- jam();
- case TakeOverRecord::TO_SLAVE_CREATE_COMMIT:
- jam();
- case TakeOverRecord::TO_SLAVE_COPY_COMPLETED:
- jam();
- ok = true;
- endTakeOver(takeOverPtr.i);
- break;
- }//switch
- if(!ok){
- jamLine(takeOverPtr.p->toSlaveStatus);
- ndbrequire(ok);
- }
- }//Dbdih::checkTakeOverInNonMasterStartNodeFailure()
- void Dbdih::failedNodeSynchHandling(Signal* signal,
- NodeRecordPtr failedNodePtr)
- {
- jam();
- /*----------------------------------------------------*/
- /* INITIALISE THE VARIABLES THAT KEEP TRACK OF */
- /* WHEN A NODE FAILURE IS COMPLETED. */
- /*----------------------------------------------------*/
- failedNodePtr.p->dbdictFailCompleted = ZFALSE;
- failedNodePtr.p->dbtcFailCompleted = ZFALSE;
- failedNodePtr.p->dbdihFailCompleted = ZFALSE;
- failedNodePtr.p->dblqhFailCompleted = ZFALSE;
-
- failedNodePtr.p->m_NF_COMPLETE_REP.clearWaitingFor();
- NodeRecordPtr nodePtr;
- for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
- ptrAss(nodePtr, nodeRecord);
- if (nodePtr.p->nodeStatus == NodeRecord::ALIVE) {
- jam();
- /**
- * We'r waiting for nodePtr.i to complete
- * handling of failedNodePtr.i's death
- */
- failedNodePtr.p->m_NF_COMPLETE_REP.setWaitingFor(nodePtr.i);
- } else {
- jam();
- if ((nodePtr.p->nodeStatus == NodeRecord::DYING) &&
- (nodePtr.p->m_NF_COMPLETE_REP.isWaitingFor(failedNodePtr.i))){
- jam();
- /*----------------------------------------------------*/
- /* THE NODE FAILED BEFORE REPORTING THE FAILURE */
- /* HANDLING COMPLETED ON THIS FAILED NODE. */
- /* REPORT THAT NODE FAILURE HANDLING WAS */
- /* COMPLETED ON THE NEW FAILED NODE FOR THIS */
- /* PARTICULAR OLD FAILED NODE. */
- /*----------------------------------------------------*/
- NFCompleteRep * const nf = (NFCompleteRep *)&signal->theData[0];
- nf->blockNo = 0;
- nf->nodeId = failedNodePtr.i;
- nf->failedNodeId = nodePtr.i;
- nf->from = __LINE__;
- sendSignal(reference(), GSN_NF_COMPLETEREP, signal,
- NFCompleteRep::SignalLength, JBB);
- }//if
- }//if
- }//for
- if (failedNodePtr.p->nodeStatus == NodeRecord::DIED_NOW) {
- jam();
- failedNodePtr.p->nodeStatus = NodeRecord::DYING;
- } else {
- jam();
- /*----------------------------------------------------*/
- // No more processing needed when node not even started
- // yet. We give the node status to DEAD since we do not
- // care whether all nodes complete the node failure
- // handling. The node have not been included in the
- // node failure protocols.
- /*----------------------------------------------------*/
- failedNodePtr.p->nodeStatus = NodeRecord::DEAD;
- /**-----------------------------------------------------------------------
- * WE HAVE COMPLETED HANDLING THE NODE FAILURE IN DIH. WE CAN REPORT THIS
- * TO DIH THAT WAIT FOR THE OTHER BLOCKS TO BE CONCLUDED AS WELL.
- *-----------------------------------------------------------------------*/
- NFCompleteRep * const nf = (NFCompleteRep *)&signal->theData[0];
- nf->blockNo = DBDIH;
- nf->nodeId = cownNodeId;
- nf->failedNodeId = failedNodePtr.i;
- nf->from = __LINE__;
- sendSignal(reference(), GSN_NF_COMPLETEREP, signal,
- NFCompleteRep::SignalLength, JBB);
- }//if
- }//Dbdih::failedNodeSynchHandling()
- Uint32 Dbdih::findTakeOver(Uint32 failedNodeId)
- {
- for (Uint32 i = 0; i < MAX_NDB_NODES; i++) {
- jam();
- TakeOverRecordPtr takeOverPtr;
- takeOverPtr.i = i;
- ptrCheckGuard(takeOverPtr, MAX_NDB_NODES, takeOverRecord);
- if (takeOverPtr.p->toStartingNode == failedNodeId) {
- jam();
- return i;
- }//if
- }//for
- return RNIL;
- }//Dbdih::findTakeOver()
- Uint32 Dbdih::getStartNode(Uint32 takeOverPtrI)
- {
- TakeOverRecordPtr takeOverPtr;
- takeOverPtr.i = takeOverPtrI;
- ptrCheckGuard(takeOverPtr, MAX_NDB_NODES, takeOverRecord);
- return takeOverPtr.p->toStartingNode;
- }//Dbdih::getStartNode()
- void Dbdih::failedNodeLcpHandling(Signal* signal, NodeRecordPtr failedNodePtr)
- {
- jam();
- const Uint32 nodeId = failedNodePtr.i;
- if (c_lcpState.m_participatingLQH.get(failedNodePtr.i)){
- /*----------------------------------------------------*/
- /* THE NODE WAS INVOLVED IN A LOCAL CHECKPOINT. WE */
- /* MUST UPDATE THE ACTIVE STATUS TO INDICATE THAT */
- /* THE NODE HAVE MISSED A LOCAL CHECKPOINT. */
- /*----------------------------------------------------*/
- switch (failedNodePtr.p->activeStatus) {
- case Sysfile::NS_Active:
- jam();
- failedNodePtr.p->activeStatus = Sysfile::NS_ActiveMissed_1;
- break;
- case Sysfile::NS_ActiveMissed_1:
- jam();
- failedNodePtr.p->activeStatus = Sysfile::NS_ActiveMissed_2;
- break;
- case Sysfile::NS_ActiveMissed_2:
- jam();
- failedNodePtr.p->activeStatus = Sysfile::NS_NotActive_NotTakenOver;
- break;
- case Sysfile::NS_TakeOver:
- jam();
- failedNodePtr.p->activeStatus = Sysfile::NS_NotActive_NotTakenOver;
- break;
- default:
- ndbout << "activeStatus = " << (Uint32) failedNodePtr.p->activeStatus;
- ndbout << " at failure after NODE_FAILREP of node = ";
- ndbout << failedNodePtr.i << endl;
- ndbrequire(false);
- break;
- }//switch
- }//if
- c_lcpState.m_participatingDIH.clear(failedNodePtr.i);
- c_lcpState.m_participatingLQH.clear(failedNodePtr.i);
- if(c_lcpState.m_LCP_COMPLETE_REP_Counter_DIH.isWaitingFor(failedNodePtr.i)){
- jam();
- LcpCompleteRep * rep = (LcpCompleteRep*)signal->getDataPtrSend();
- rep->nodeId = failedNodePtr.i;
- rep->lcpId = SYSFILE->latestLCP_ID;
- rep->blockNo = DBDIH;
- sendSignal(reference(), GSN_LCP_COMPLETE_REP, signal,
- LcpCompleteRep::SignalLength, JBB);
- }
- /**
- * Check if we'r waiting for the failed node's LQH to complete
- *
- * Note that this is ran "before" LCP master take over
- */
- if(c_lcpState.m_LCP_COMPLETE_REP_Counter_LQH.isWaitingFor(nodeId)){
- jam();
- LcpCompleteRep * rep = (LcpCompleteRep*)signal->getDataPtrSend();
- rep->nodeId = nodeId;
- rep->lcpId = SYSFILE->latestLCP_ID;
- rep->blockNo = DBLQH;
- sendSignal(reference(), GSN_LCP_COMPLETE_REP, signal,
- LcpCompleteRep::SignalLength, JBB);
- if(c_lcpState.m_LAST_LCP_FRAG_ORD.isWaitingFor(nodeId)){
- jam();
- /**
- * Make sure we're ready to accept it
- */
- c_lcpState.m_LAST_LCP_FRAG_ORD.clearWaitingFor(nodeId);
- }
- }
-
- if (c_TCGETOPSIZEREQ_Counter.isWaitingFor(failedNodePtr.i)) {
- jam();
- signal->theData[0] = failedNodePtr.i;
- signal->theData[1] = 0;
- sendSignal(reference(), GSN_TCGETOPSIZECONF, signal, 2, JBB);
- }//if
-
- if (c_TC_CLOPSIZEREQ_Counter.isWaitingFor(failedNodePtr.i)) {
- jam();
- signal->theData[0] = failedNodePtr.i;
- sendSignal(reference(), GSN_TC_CLOPSIZECONF, signal, 1, JBB);
- }//if
- if (c_START_LCP_REQ_Counter.isWaitingFor(failedNodePtr.i)) {
- jam();
- StartLcpConf * conf = (StartLcpConf*)signal->getDataPtrSend();
- conf->senderRef = numberToRef(DBLQH, failedNodePtr.i);
- conf->lcpId = SYSFILE->latestLCP_ID;
- sendSignal(reference(), GSN_START_LCP_CONF, signal,
- StartLcpConf::SignalLength, JBB);
- }//if
-
- if (c_EMPTY_LCP_REQ_Counter.isWaitingFor(failedNodePtr.i)) {
- jam();
- EmptyLcpConf * const rep = (EmptyLcpConf *)&signal->theData[0];
- rep->senderNodeId = failedNodePtr.i;
- rep->tableId = ~0;
- rep->fragmentId = ~0;
- rep->lcpNo = 0;
- rep->lcpId = SYSFILE->latestLCP_ID;
- rep->idle = true;
- sendSignal(reference(), GSN_EMPTY_LCP_CONF, signal,
- EmptyLcpConf::SignalLength, JBB);
- }//if
- if (c_MASTER_LCPREQ_Counter.isWaitingFor(failedNodePtr.i)) {
- jam();
- MasterLCPRef * const ref = (MasterLCPRef *)&signal->theData[0];
- ref->senderNodeId = failedNodePtr.i;
- ref->failedNodeId = cmasterTakeOverNode;
- sendSignal(reference(), GSN_MASTER_LCPREF, signal,
- MasterLCPRef::SignalLength, JBB);
- }//if
-
- }//Dbdih::failedNodeLcpHandling()
- void Dbdih::checkGcpOutstanding(Signal* signal, Uint32 failedNodeId){
- if (c_GCP_PREPARE_Counter.isWaitingFor(failedNodeId)){
- jam();
- signal->theData[0] = failedNodeId;
- signal->theData[1] = cnewgcp;
- sendSignal(reference(), GSN_GCP_PREPARECONF, signal, 2, JBB);
- }//if
- if (c_GCP_COMMIT_Counter.isWaitingFor(failedNodeId)) {
- jam();
- signal->theData[0] = failedNodeId;
- signal->theData[1] = coldgcp;
- signal->theData[2] = cfailurenr;
- sendSignal(reference(), GSN_GCP_NODEFINISH, signal, 3, JBB);
- }//if
- if (c_GCP_SAVEREQ_Counter.isWaitingFor(failedNodeId)) {
- jam();
- GCPSaveRef * const saveRef = (GCPSaveRef*)&signal->theData[0];
- saveRef->dihPtr = failedNodeId;
- saveRef->nodeId = failedNodeId;
- saveRef->gci = coldgcp;
- saveRef->errorCode = GCPSaveRef::FakedSignalDueToNodeFailure;
- sendSignal(reference(), GSN_GCP_SAVEREF, signal,
- GCPSaveRef::SignalLength, JBB);
- }//if
- if (c_COPY_GCIREQ_Counter.isWaitingFor(failedNodeId)) {
- jam();
- signal->theData[0] = failedNodeId;
- sendSignal(reference(), GSN_COPY_GCICONF, signal, 1, JBB);
- }//if
-
- if (c_MASTER_GCPREQ_Counter.isWaitingFor(failedNodeId)){
- jam();
- MasterGCPRef * const ref = (MasterGCPRef *)&signal->theData[0];
- ref->senderNodeId = failedNodeId;
- ref->failedNodeId = cmasterTakeOverNode;
- sendSignal(reference(), GSN_MASTER_GCPREF, signal,
- MasterGCPRef::SignalLength, JBB);
- }//if
- }//Dbdih::handleGcpStateInMaster()
-
-
- void
- Dbdih::startLcpMasterTakeOver(Signal* signal, Uint32 nodeId){
- jam();
- c_lcpMasterTakeOverState.minTableId = ~0;
- c_lcpMasterTakeOverState.minFragId = ~0;
- c_lcpMasterTakeOverState.failedNodeId = nodeId;
-
- c_lcpMasterTakeOverState.set(LMTOS_WAIT_EMPTY_LCP, __LINE__);
-
- if(c_EMPTY_LCP_REQ_Counter.done()){
- jam();
- c_lcpState.m_LAST_LCP_FRAG_ORD.clearWaitingFor();
- EmptyLcpReq* req = (EmptyLcpReq*)signal->getDataPtrSend();
- req->senderRef = reference();
- sendLoopMacro(EMPTY_LCP_REQ, sendEMPTY_LCP_REQ);
- ndbrequire(!c_EMPTY_LCP_REQ_Counter.done());
- } else {
- /**
- * Node failure during master take over...
- */
- ndbout_c("Nodefail during master take over");
- }
-
- setLocalNodefailHandling(signal, nodeId, NF_LCP_TAKE_OVER);
- }
- void Dbdih::startGcpMasterTakeOver(Signal* signal, Uint32 oldMasterId){
- jam();
- /*--------------------------------------------------*/
- /* */
- /* THE MASTER HAVE FAILED AND WE WERE ELECTED */
- /* TO BE THE NEW MASTER NODE. WE NEED TO QUERY*/
- /* ALL THE OTHER NODES ABOUT THEIR STATUS IN */
- /* ORDER TO BE ABLE TO TAKE OVER CONTROL OF */
- /* THE GLOBAL CHECKPOINT PROTOCOL AND THE */
- /* LOCAL CHECKPOINT PROTOCOL. */
- /*--------------------------------------------------*/
- if(!isMaster()){
- jam();
- return;
- }
- cmasterState = MASTER_TAKE_OVER_GCP;
- cmasterTakeOverNode = oldMasterId;
- MasterGCPReq * const req = (MasterGCPReq *)&signal->theData[0];
- req->masterRef = reference();
- req->failedNodeId = oldMasterId;
- sendLoopMacro(MASTER_GCPREQ, sendMASTER_GCPREQ);
- cgcpMasterTakeOverState = GMTOS_INITIAL;
-
- signal->theData[0] = EventReport::GCP_TakeoverStarted;
- sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 1, JBB);
- setLocalNodefailHandling(signal, oldMasterId, NF_GCP_TAKE_OVER);
- }//Dbdih::handleNewMaster()
- void Dbdih::handleTakeOverNewMaster(Signal* signal, Uint32 takeOverPtrI)
- {
- jam();
- if (takeOverPtrI != RNIL) {
- jam();
- TakeOverRecordPtr takeOverPtr;
- takeOverPtr.i = takeOverPtrI;
- ptrCheckGuard(takeOverPtr, MAX_NDB_NODES, takeOverRecord);
- bool ok = false;
- switch (takeOverPtr.p->toSlaveStatus) {
- case TakeOverRecord::TO_SLAVE_IDLE:
- ndbrequire(false);
- break;
- case TakeOverRecord::TO_SLAVE_STARTED:
- jam();
- case TakeOverRecord::TO_SLAVE_CREATE_PREPARE:
- jam();
- case TakeOverRecord::TO_SLAVE_COPY_FRAG_COMPLETED:
- jam();
- case TakeOverRecord::TO_SLAVE_CREATE_COMMIT:
- jam();
- ok = true;
- infoEvent("Unhandled MasterTO of TO slaveStatus=%d killing node %d",
- takeOverPtr.p->toSlaveStatus,
- takeOverPtr.p->toStartingNode);
- takeOverPtr.p->toMasterStatus = TakeOverRecord::COPY_ACTIVE;
-
- {
- BlockReference cntrRef = calcNdbCntrBlockRef(takeOverPtr.p->toStartingNode);
- SystemError * const sysErr = (SystemError*)&signal->theData[0];
- sysErr->errorCode = SystemError::CopyFragRefError;
- sysErr->errorRef = reference();
- sysErr->data1= 0;
- sysErr->data2= __LINE__;
- sendSignal(cntrRef, GSN_SYSTEM_ERROR, signal,
- SystemError::SignalLength, JBB);
- }
- break;
- case TakeOverRecord::TO_SLAVE_COPY_COMPLETED:
- ok = true;
- jam();
- takeOverPtr.p->toMasterStatus = TakeOverRecord::WAIT_LCP;
- break;
- }
- ndbrequire(ok);
- }//if
- }//Dbdih::handleTakeOverNewMaster()
- void Dbdih::startRemoveFailedNode(Signal* signal, NodeRecordPtr failedNodePtr)
- {
- Uint32 nodeId = failedNodePtr.i;
- if(failedNodePtr.p->nodeStatus != NodeRecord::DIED_NOW){
- jam();
- /**
- * Is node isn't alive. It can't be part of LCP
- */
- ndbrequire(!c_lcpState.m_LCP_COMPLETE_REP_Counter_LQH.isWaitingFor(nodeId));
-
- /**
- * And there is no point in removing any replicas
- * It's dead...
- */
- return;
- }
-
- jam();
- signal->theData[0] = DihContinueB::ZREMOVE_NODE_FROM_TABLE;
- signal->theData[1] = failedNodePtr.i;
- signal->theData[2] = 0; // Tab id
- sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
-
- setLocalNodefailHandling(signal, failedNodePtr.i, NF_REMOVE_NODE_FROM_TABLE);
- }//Dbdih::startRemoveFailedNode()
- /*--------------------------------------------------*/
- /* THE MASTER HAS FAILED AND THE NEW MASTER IS*/
- /* QUERYING THIS NODE ABOUT THE STATE OF THE */
- /* GLOBAL CHECKPOINT PROTOCOL */
- /*--------------------------------------------------*/
- void Dbdih::execMASTER_GCPREQ(Signal* signal)
- {
- NodeRecordPtr failedNodePtr;
- MasterGCPReq * const masterGCPReq = (MasterGCPReq *)&signal->theData[0];
- jamEntry();
- const BlockReference newMasterBlockref = masterGCPReq->masterRef;
- const Uint32 failedNodeId = masterGCPReq->failedNodeId;
- if (c_copyGCISlave.m_copyReason != CopyGCIReq::IDLE) {
- jam();
- /*--------------------------------------------------*/
- /* WE ARE CURRENTLY WRITING THE RESTART INFO */
- /* IN THIS NODE. SINCE ONLY ONE PROCESS IS */
- /* ALLOWED TO DO THIS AT A TIME WE MUST ENSURE*/
- /* THAT THIS IS NOT ONGOING WHEN THE NEW */
- /* MASTER TAKES OVER CONTROL. IF NOT ALL NODES*/
- /* RECEIVE THE SAME RESTART INFO DUE TO THE */
- /* FAILURE OF THE MASTER IT IS TAKEN CARE OF */
- /* BY THE NEW MASTER. */
- /*--------------------------------------------------*/
- sendSignalWithDelay(reference(), GSN_MASTER_GCPREQ,
- signal, 10, MasterGCPReq::SignalLength);
- return;
- }//if
- failedNodePtr.i = failedNodeId;
- ptrCheckGuard(failedNodePtr, MAX_NDB_NODES, nodeRecord);
- if (failedNodePtr.p->nodeStatus == NodeRecord::ALIVE) {
- jam();
- /*--------------------------------------------------*/
- /* ENSURE THAT WE HAVE PROCESSED THE SIGNAL */
- /* NODE_FAILURE BEFORE WE PROCESS THIS REQUEST*/
- /* FROM THE NEW MASTER. THIS ENSURES THAT WE */
- /* HAVE REMOVED THE FAILED NODE FROM THE LIST */
- /* OF ACTIVE NODES AND SO FORTH. */
- /*--------------------------------------------------*/
- sendSignalWithDelay(reference(), GSN_MASTER_GCPREQ,
- signal, 10, MasterGCPReq::SignalLength);
- return;
- } else {
- ndbrequire(failedNodePtr.p->nodeStatus == NodeRecord::DYING);
- }//if
- MasterGCPConf::State gcpState;
- switch (cgcpParticipantState) {
- case GCP_PARTICIPANT_READY:
- jam();
- /*--------------------------------------------------*/
- /* THE GLOBAL CHECKPOINT IS NOT ACTIVE SINCE */
- /* THE PREVIOUS GLOBAL CHECKPOINT IS COMPLETED*/
- /* AND THE NEW HAVE NOT STARTED YET. */
- /*--------------------------------------------------*/
- gcpState = MasterGCPConf::GCP_READY;
- break;
- case GCP_PARTICIPANT_PREPARE_RECEIVED:
- jam();
- /*--------------------------------------------------*/
- /* GCP_PREPARE HAVE BEEN RECEIVED AND RESPONSE*/
- /* HAVE BEEN SENT. */
- /*--------------------------------------------------*/
- gcpState = MasterGCPConf::GCP_PREPARE_RECEIVED;
- break;
- case GCP_PARTICIPANT_COMMIT_RECEIVED:
- jam();
- /*------------------------------------------------*/
- /* GCP_COMMIT HAVE BEEN RECEIVED BUT NOT YET*/
- /* GCP_TCFINISHED FROM LOCAL TC. */
- /*------------------------------------------------*/
- gcpState = MasterGCPConf::GCP_COMMIT_RECEIVED;
- break;
- case GCP_PARTICIPANT_TC_FINISHED:
- jam();
- /*------------------------------------------------*/
- /* GCP_COMMIT HAS BEEN RECEIVED AND ALSO */
- /* GCP_TCFINISHED HAVE BEEN RECEIVED. */
- /*------------------------------------------------*/
- gcpState = MasterGCPConf::GCP_TC_FINISHED;
- break;
- case GCP_PARTICIPANT_COPY_GCI_RECEIVED:
- /*--------------------------------------------------*/
- /* COPY RESTART INFORMATION HAS BEEN RECEIVED */
- /* BUT NOT YET COMPLETED. */
- /*--------------------------------------------------*/
- ndbrequire(false);
- gcpState= MasterGCPConf::GCP_READY; // remove warning
- break;
- default:
- /*------------------------------------------------*/
- /* */
- /* THIS SHOULD NOT OCCUR SINCE THE ABOVE */
- /* STATES ARE THE ONLY POSSIBLE STATES AT A */
- /* NODE WHICH WAS NOT A MASTER NODE. */
- /*------------------------------------------------*/
- ndbrequire(false);
- gcpState= MasterGCPConf::GCP_READY; // remove warning
- break;
- }//switch
- MasterGCPConf * const masterGCPConf = (MasterGCPConf *)&signal->theData[0];
- masterGCPConf->gcpState = gcpState;
- masterGCPConf->senderNodeId = cownNodeId;
- masterGCPConf->failedNodeId = failedNodeId;
- masterGCPConf->newGCP = cnewgcp;
- masterGCPConf->latestLCP = SYSFILE->latestLCP_ID;
- masterGCPConf->oldestRestorableGCI = SYSFILE->oldestRestorableGCI;
- masterGCPConf->keepGCI = SYSFILE->keepGCI;
- for(Uint32 i = 0; i < NdbNodeBitmask::Size; i++)
- masterGCPConf->lcpActive[i] = SYSFILE->lcpActive[i];
- sendSignal(newMasterBlockref, GSN_MASTER_GCPCONF, signal,
- MasterGCPConf::SignalLength, JBB);
- }//Dbdih::execMASTER_GCPREQ()
- void Dbdih::execMASTER_GCPCONF(Signal* signal)
- {
- NodeRecordPtr senderNodePtr;
- MasterGCPConf * const masterGCPConf = (MasterGCPConf *)&signal->theData[0];
- jamEntry();
- senderNodePtr.i = masterGCPConf->senderNodeId;
- ptrCheckGuard(senderNodePtr, MAX_NDB_NODES, nodeRecord);
-
- MasterGCPConf::State gcpState = (MasterGCPConf::State)masterGCPConf->gcpState;
- const Uint32 failedNodeId = masterGCPConf->failedNodeId;
- const Uint32 newGcp = masterGCPConf->newGCP;
- const Uint32 latestLcpId = masterGCPConf->latestLCP;
- const Uint32 oldestRestorableGci = masterGCPConf->oldestRestorableGCI;
- const Uint32 oldestKeepGci = masterGCPConf->keepGCI;
- if (latestLcpId > SYSFILE->latestLCP_ID) {
- jam();
- #if 0
- ndbout_c("Dbdih: Setting SYSFILE->latestLCP_ID to %d", latestLcpId);
- SYSFILE->latestLCP_ID = latestLcpId;
- #endif
- SYSFILE->keepGCI = oldestKeepGci;
- SYSFILE->oldestRestorableGCI = oldestRestorableGci;
- for(Uint32 i = 0; i < NdbNodeBitmask::Size; i++)
- SYSFILE->lcpActive[i] = masterGCPConf->lcpActive[i];
- }//if
- switch (gcpState) {
- case MasterGCPConf::GCP_READY:
- jam();
- senderNodePtr.p->gcpstate = NodeRecord::READY;
- break;
- case MasterGCPConf::GCP_PREPARE_RECEIVED:
- jam();
- senderNodePtr.p->gcpstate = NodeRecord::PREPARE_RECEIVED;
- cnewgcp = newGcp;
- break;
- case MasterGCPConf::GCP_COMMIT_RECEIVED:
- jam();
- senderNodePtr.p->gcpstate = NodeRecord::COMMIT_SENT;
- break;
- case MasterGCPConf::GCP_TC_FINISHED:
- jam();
- senderNodePtr.p->gcpstate = NodeRecord::NODE_FINISHED;
- break;
- default:
- ndbrequire(false);
- break;
- }//switch
- switch (cgcpMasterTakeOverState) {
- case GMTOS_INITIAL:
- switch (gcpState) {
- case MasterGCPConf::GCP_READY:
- jam();
- cgcpMasterTakeOverState = ALL_READY;
- break;
- case MasterGCPConf::GCP_PREPARE_RECEIVED:
- jam();
- cgcpMasterTakeOverState = ALL_PREPARED;
- break;
- case MasterGCPConf::GCP_COMMIT_RECEIVED:
- jam();
- cgcpMasterTakeOverState = COMMIT_STARTED_NOT_COMPLETED;
- break;
- case MasterGCPConf::GCP_TC_FINISHED:
- jam();
- cgcpMasterTakeOverState = COMMIT_COMPLETED;
- break;
- default:
- ndbrequire(false);
- break;
- }//switch
- break;
- case ALL_READY:
- switch (gcpState) {
- case MasterGCPConf::GCP_READY:
- jam();
- /*empty*/;
- break;
- case MasterGCPConf::GCP_PREPARE_RECEIVED:
- jam();
- cgcpMasterTakeOverState = PREPARE_STARTED_NOT_COMMITTED;
- break;
- case MasterGCPConf::GCP_COMMIT_RECEIVED:
- ndbrequire(false);
- break;
- case MasterGCPConf::GCP_TC_FINISHED:
- jam();
- cgcpMasterTakeOverState = SAVE_STARTED_NOT_COMPLETED;
- break;
- default:
- ndbrequire(false);
- break;
- }//switch
- break;
- case PREPARE_STARTED_NOT_COMMITTED:
- switch (gcpState) {
- case MasterGCPConf::GCP_READY:
- jam();
- break;
- case MasterGCPConf::GCP_PREPARE_RECEIVED:
- jam();
- break;
- case MasterGCPConf::GCP_COMMIT_RECEIVED:
- ndbrequire(false);
- break;
- case MasterGCPConf::GCP_TC_FINISHED:
- ndbrequire(false);
- break;
- default:
- ndbrequire(false);
- break;
- }//switch
- break;
- case ALL_PREPARED:
- switch (gcpState) {
- case MasterGCPConf::GCP_READY:
- jam();
- cgcpMasterTakeOverState = PREPARE_STARTED_NOT_COMMITTED;
- break;
- case MasterGCPConf::GCP_PREPARE_RECEIVED:
- jam();
- break;
- case MasterGCPConf::GCP_COMMIT_RECEIVED:
- jam();
- cgcpMasterTakeOverState = COMMIT_STARTED_NOT_COMPLETED;
- break;
- case MasterGCPConf::GCP_TC_FINISHED:
- jam();
- cgcpMasterTakeOverState = COMMIT_STARTED_NOT_COMPLETED;
- break;
- default:
- ndbrequire(false);
- break;
- }//switch
- break;
- case COMMIT_STARTED_NOT_COMPLETED:
- switch (gcpState) {
- case MasterGCPConf::GCP_READY:
- ndbrequire(false);
- break;
- case MasterGCPConf::GCP_PREPARE_RECEIVED:
- jam();
- break;
- case MasterGCPConf::GCP_COMMIT_RECEIVED:
- jam();
- break;
- case MasterGCPConf::GCP_TC_FINISHED:
- jam();
- break;
- default:
- ndbrequire(false);
- break;
- }//switch
- break;
- case COMMIT_COMPLETED:
- switch (gcpState) {
- case MasterGCPConf::GCP_READY:
- cgcpMasterTakeOverState = SAVE_STARTED_NOT_COMPLETED;
- break;
- case MasterGCPConf::GCP_PREPARE_RECEIVED:
- jam();
- cgcpMasterTakeOverState = COMMIT_STARTED_NOT_COMPLETED;
- break;
- case MasterGCPConf::GCP_COMMIT_RECEIVED:
- jam();
- cgcpMasterTakeOverState = COMMIT_STARTED_NOT_COMPLETED;
- break;
- case MasterGCPConf::GCP_TC_FINISHED:
- jam();
- break;
- default:
- ndbrequire(false);
- break;
- }//switch
- break;
- case SAVE_STARTED_NOT_COMPLETED:
- switch (gcpState) {
- case MasterGCPConf::GCP_READY:
- jam();
- break;
- case MasterGCPConf::GCP_PREPARE_RECEIVED:
- ndbrequire(false);
- break;
- case MasterGCPConf::GCP_COMMIT_RECEIVED:
- ndbrequire(false);
- break;
- case MasterGCPConf::GCP_TC_FINISHED:
- jam();
- break;
- default:
- ndbrequire(false);
- break;
- }//switch
- break;
- default:
- ndbrequire(false);
- break;
- }//switch
- receiveLoopMacro(MASTER_GCPREQ, senderNodePtr.i);
- /*-------------------------------------------------------------------------*/
- // We have now received all responses and are ready to take over the GCP
- // protocol as master.
- /*-------------------------------------------------------------------------*/
- MASTER_GCPhandling(signal, failedNodeId);
- return;
- }//Dbdih::execMASTER_GCPCONF()
- void Dbdih::execMASTER_GCPREF(Signal* signal)
- {
- const MasterGCPRef * const ref = (MasterGCPRef *)&signal->theData[0];
- jamEntry();
- receiveLoopMacro(MASTER_GCPREQ, ref->senderNodeId);
- /*-------------------------------------------------------------------------*/
- // We have now received all responses and are ready to take over the GCP
- // protocol as master.
- /*-------------------------------------------------------------------------*/
- MASTER_GCPhandling(signal, ref->failedNodeId);
- }//Dbdih::execMASTER_GCPREF()
- void Dbdih::MASTER_GCPhandling(Signal* signal, Uint32 failedNodeId)
- {
- NodeRecordPtr failedNodePtr;
- cmasterState = MASTER_ACTIVE;
- /*----------------------------------------------------------*/
- /* REMOVE ALL ACTIVE STATUS ON ALREADY FAILED NODES */
- /* THIS IS PERFORMED HERE SINCE WE GET THE LCP ACTIVE */
- /* STATUS AS PART OF THE COPY RESTART INFO AND THIS IS*/
- /* HANDLED BY THE MASTER GCP TAKE OVER PROTOCOL. */
- /*----------------------------------------------------------*/
-
- failedNodePtr.i = failedNodeId;
- ptrCheckGuard(failedNodePtr, MAX_NDB_NODES, nodeRecord);
- switch (cgcpMasterTakeOverState) {
- case ALL_READY:
- jam();
- startGcp(signal);
- break;
- case PREPARE_STARTED_NOT_COMMITTED:
- {
- NodeRecordPtr nodePtr;
- jam();
- c_GCP_PREPARE_Counter.clearWaitingFor();
- nodePtr.i = cfirstAliveNode;
- do {
- jam();
- ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
- if (nodePtr.p->gcpstate == NodeRecord::READY) {
- jam();
- c_GCP_PREPARE_Counter.setWaitingFor(nodePtr.i);
- sendGCP_PREPARE(signal, nodePtr.i);
- }//if
- nodePtr.i = nodePtr.p->nextNode;
- } while(nodePtr.i != RNIL);
- if (c_GCP_PREPARE_Counter.done()) {
- jam();
- gcpcommitreqLab(signal);
- }//if
- break;
- }
- case ALL_PREPARED:
- jam();
- gcpcommitreqLab(signal);
- break;
- case COMMIT_STARTED_NOT_COMPLETED:
- {
- NodeRecordPtr nodePtr;
- jam();
- c_GCP_COMMIT_Counter.clearWaitingFor();
- nodePtr.i = cfirstAliveNode;
- do {
- jam();
- ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
- if (nodePtr.p->gcpstate == NodeRecord::PREPARE_RECEIVED) {
- jam();
- sendGCP_COMMIT(signal, nodePtr.i);
- c_GCP_COMMIT_Counter.setWaitingFor(nodePtr.i);
- } else {
- ndbrequire((nodePtr.p->gcpstate == NodeRecord::NODE_FINISHED) ||
- (nodePtr.p->gcpstate == NodeRecord::COMMIT_SENT));
- }//if
- nodePtr.i = nodePtr.p->nextNode;
- } while(nodePtr.i != RNIL);
- if (c_GCP_COMMIT_Counter.done()){
- jam();
- gcpsavereqLab(signal);
- }//if
- break;
- }
- case COMMIT_COMPLETED:
- jam();
- gcpsavereqLab(signal);
- break;
- case SAVE_STARTED_NOT_COMPLETED:
- {
- NodeRecordPtr nodePtr;
- jam();
- SYSFILE->newestRestorableGCI = coldgcp;
- nodePtr.i = cfirstAliveNode;
- do {
- jam();
- ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
- SYSFILE->lastCompletedGCI[nodePtr.i] = coldgcp;
- nodePtr.i = nodePtr.p->nextNode;
- } while (nodePtr.i != RNIL);
- /**-------------------------------------------------------------------
- * THE FAILED NODE DID ALSO PARTICIPATE IN THIS GLOBAL CHECKPOINT
- * WHICH IS RECORDED.
- *-------------------------------------------------------------------*/
- SYSFILE->lastCompletedGCI[failedNodeId] = coldgcp;
- copyGciLab(signal, CopyGCIReq::GLOBAL_CHECKPOINT);
- break;
- }
- default:
- ndbrequire(false);
- break;
- }//switch
- signal->theData[0] = EventReport::GCP_TakeoverCompleted;
- sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 1, JBB);
- /*--------------------------------------------------*/
- /* WE SEPARATE HANDLING OF GLOBAL CHECKPOINTS */
- /* AND LOCAL CHECKPOINTS HERE. LCP'S HAVE TO */
- /* REMOVE ALL FAILED FRAGMENTS BEFORE WE CAN */
- /* HANDLE THE LCP PROTOCOL. */
- /*--------------------------------------------------*/
- checkLocalNodefailComplete(signal, failedNodeId, NF_GCP_TAKE_OVER);
-
- return;
- }//Dbdih::masterGcpConfFromFailedLab()
- void
- Dbdih::invalidateNodeLCP(Signal* signal, Uint32 nodeId, Uint32 tableId)
- {
- jamEntry();
- TabRecordPtr tabPtr;
- tabPtr.i = tableId;
- const Uint32 RT_BREAK = 64;
- if (ERROR_INSERTED(7125)) {
- return;
- }//if
- for (Uint32 i = 0; i<RT_BREAK; i++) {
- jam();
- if (tabPtr.i >= ctabFileSize){
- jam();
- /**
- * Ready with entire loop
- * Return to master
- */
- setAllowNodeStart(nodeId, true);
- if (getNodeStatus(nodeId) == NodeRecord::STARTING) {
- jam();
- StartInfoConf * conf = (StartInfoConf*)&signal->theData[0];
- conf->sendingNodeId = cownNodeId;
- conf->startingNodeId = nodeId;
- sendSignal(cmasterdihref, GSN_START_INFOCONF, signal,
- StartInfoConf::SignalLength, JBB);
- }//if
- return;
- }//if
- ptrAss(tabPtr, tabRecord);
- if (tabPtr.p->tabStatus == TabRecord::TS_ACTIVE) {
- jam();
- invalidateNodeLCP(signal, nodeId, tabPtr);
- return;
- }//if
- tabPtr.i++;
- }//for
- signal->theData[0] = DihContinueB::ZINVALIDATE_NODE_LCP;
- signal->theData[1] = nodeId;
- signal->theData[2] = tabPtr.i;
- sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
- }//Dbdih::invalidateNodeLCP()
- void
- Dbdih::invalidateNodeLCP(Signal* signal, Uint32 nodeId, TabRecordPtr tabPtr)
- {
- /**
- * Check so that no one else is using the tab descriptior
- */
- if (tabPtr.p->tabCopyStatus != TabRecord::CS_IDLE) {
- jam();
- signal->theData[0] = DihContinueB::ZINVALIDATE_NODE_LCP;
- signal->theData[1] = nodeId;
- signal->theData[2] = tabPtr.i;
- sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 20, 3);
- return;
- }//if
- /**
- * For each fragment
- */
- bool modified = false;
- FragmentstorePtr fragPtr;
- for(Uint32 fragNo = 0; fragNo < tabPtr.p->totalfragments; fragNo++){
- jam();
- getFragstore(tabPtr.p, fragNo, fragPtr);
- /**
- * For each of replica record
- */
- ReplicaRecordPtr replicaPtr;
- for(replicaPtr.i = fragPtr.p->oldStoredReplicas; replicaPtr.i != RNIL;
- replicaPtr.i = replicaPtr.p->nextReplica) {
- jam();
- ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
- if(replicaPtr.p->procNode == nodeId){
- jam();
- /**
- * Found one with correct node id
- */
- /**
- * Invalidate all LCP's
- */
- modified = true;
- for(int i = 0; i < MAX_LCP_STORED; i++) {
- replicaPtr.p->lcpStatus[i] = ZINVALID;
- }//if
- /**
- * And reset nextLcp
- */
- replicaPtr.p->nextLcp = 0;
- replicaPtr.p->noCrashedReplicas = 0;
- }//if
- }//for
- }//for
- if (modified) {
- jam();
- /**
- * Save table description to disk
- */
- tabPtr.p->tabCopyStatus = TabRecord::CS_INVALIDATE_NODE_LCP;
- tabPtr.p->tabUpdateState = TabRecord::US_INVALIDATE_NODE_LCP;
- tabPtr.p->tabRemoveNode = nodeId;
- signal->theData[0] = DihContinueB::ZPACK_TABLE_INTO_PAGES;
- signal->theData[1] = tabPtr.i;
- sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
- return;
- }
-
- jam();
- /**
- * Move to next table
- */
- tabPtr.i++;
- signal->theData[0] = DihContinueB::ZINVALIDATE_NODE_LCP;
- signal->theData[1] = nodeId;
- signal->theData[2] = tabPtr.i;
- sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
- return;
- }//Dbdih::invalidateNodeLCP()
- /*------------------------------------------------*/
- /* INPUT: TABPTR */
- /* TNODEID */
- /*------------------------------------------------*/
- void Dbdih::removeNodeFromTables(Signal* signal,
- Uint32 nodeId, Uint32 tableId)
- {
- jamEntry();
- TabRecordPtr tabPtr;
- tabPtr.i = tableId;
- const Uint32 RT_BREAK = 64;
- for (Uint32 i = 0; i<RT_BREAK; i++) {
- jam();
- if (tabPtr.i >= ctabFileSize){
- jam();
- removeNodeFromTablesComplete(signal, nodeId);
- return;
- }//if
- ptrAss(tabPtr, tabRecord);
- if (tabPtr.p->tabStatus == TabRecord::TS_ACTIVE) {
- jam();
- removeNodeFromTable(signal, nodeId, tabPtr);
- return;
- }//if
- tabPtr.i++;
- }//for
- signal->theData[0] = DihContinueB::ZREMOVE_NODE_FROM_TABLE;
- signal->theData[1] = nodeId;
- signal->theData[2] = tabPtr.i;
- sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
- }
- void Dbdih::removeNodeFromTable(Signal* signal,
- Uint32 nodeId, TabRecordPtr tabPtr){
-
- /**
- * Check so that no one else is using the tab descriptior
- */
- if (tabPtr.p->tabCopyStatus != TabRecord::CS_IDLE) {
- jam();
- signal->theData[0] = DihContinueB::ZREMOVE_NODE_FROM_TABLE;
- signal->theData[1] = nodeId;
- signal->theData[2] = tabPtr.i;
- sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 20, 3);
- return;
- }//if
- /**
- * For each fragment
- */
- Uint32 noOfRemovedReplicas = 0; // No of replicas removed
- Uint32 noOfRemovedLcpReplicas = 0; // No of replicas in LCP removed
- Uint32 noOfRemainingLcpReplicas = 0;// No of replicas in LCP remaining
- //const Uint32 lcpId = SYSFILE->latestLCP_ID;
- const bool lcpOngoingFlag = (tabPtr.p->tabLcpStatus== TabRecord::TLS_ACTIVE);
-
- FragmentstorePtr fragPtr;
- for(Uint32 fragNo = 0; fragNo < tabPtr.p->totalfragments; fragNo++){
- jam();
- getFragstore(tabPtr.p, fragNo, fragPtr);
-
- /**
- * For each of replica record
- */
- Uint32 replicaNo = 0;
- ReplicaRecordPtr replicaPtr;
- for(replicaPtr.i = fragPtr.p->storedReplicas; replicaPtr.i != RNIL;
- replicaPtr.i = replicaPtr.p->nextReplica, replicaNo++) {
- jam();
- ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
- if(replicaPtr.p->procNode == nodeId){
- jam();
- noOfRemovedReplicas++;
- removeNodeFromStored(nodeId, fragPtr, replicaPtr);
- if(replicaPtr.p->lcpOngoingFlag){
- jam();
- /**
- * This replica is currently LCP:ed
- */
- ndbrequire(fragPtr.p->noLcpReplicas > 0);
- fragPtr.p->noLcpReplicas --;
-
- noOfRemovedLcpReplicas ++;
- replicaPtr.p->lcpOngoingFlag = false;
- }
- }
- }
- noOfRemainingLcpReplicas += fragPtr.p->noLcpReplicas;
- }
-
- if(noOfRemovedReplicas == 0){
- jam();
- /**
- * The table had no replica on the failed node
- * continue with next table
- */
- tabPtr.i++;
- signal->theData[0] = DihContinueB::ZREMOVE_NODE_FROM_TABLE;
- signal->theData[1] = nodeId;
- signal->theData[2] = tabPtr.i;
- sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
- return;
- }
-
- /**
- * We did remove at least one replica
- */
- bool ok = false;
- switch(tabPtr.p->tabLcpStatus){
- case TabRecord::TLS_COMPLETED:
- ok = true;
- jam();
- /**
- * WE WILL WRITE THE TABLE DESCRIPTION TO DISK AT THIS TIME
- * INDEPENDENT OF WHAT THE LOCAL CHECKPOINT NEEDED.
- * THIS IS TO ENSURE THAT THE FAILED NODES ARE ALSO UPDATED ON DISK
- * IN THE DIH DATA STRUCTURES BEFORE WE COMPLETE HANDLING OF THE
- * NODE FAILURE.
- */
- ndbrequire(noOfRemovedLcpReplicas == 0);
-
- tabPtr.p->tabCopyStatus = TabRecord::CS_REMOVE_NODE;
- tabPtr.p->tabUpdateState = TabRecord::US_REMOVE_NODE;
- tabPtr.p->tabRemoveNode = nodeId;
- signal->theData[0] = DihContinueB::ZPACK_TABLE_INTO_PAGES;
- signal->theData[1] = tabPtr.i;
- sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
- return;
- break;
- case TabRecord::TLS_ACTIVE:
- ok = true;
- jam();
- /**
- * The table is participating in an LCP currently
- */
- // Fall through
- break;
- case TabRecord::TLS_WRITING_TO_FILE:
- ok = true;
- jam();
- /**
- * This should never happen since we in the beginning of this function
- * checks the tabCopyStatus
- */
- ndbrequire(lcpOngoingFlag);
- ndbrequire(false);
- break;
- }
- ndbrequire(ok);
- /**
- * The table is participating in an LCP currently
- * and we removed some replicas that should have been checkpointed
- */
- ndbrequire(c_lcpState.lcpStatus != LCP_STATUS_IDLE);
- ndbrequire(tabPtr.p->tabLcpStatus == TabRecord::TLS_ACTIVE);
-
- /**
- * Save the table
- */
- tabPtr.p->tabCopyStatus = TabRecord::CS_REMOVE_NODE;
- tabPtr.p->tabUpdateState = TabRecord::US_REMOVE_NODE;
- tabPtr.p->tabRemoveNode = nodeId;
- signal->theData[0] = DihContinueB::ZPACK_TABLE_INTO_PAGES;
- signal->theData[1] = tabPtr.i;
- sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
-
- if(noOfRemainingLcpReplicas == 0){
- jam();
- /**
- * The removal on the failed node made the LCP complete
- */
- tabPtr.p->tabLcpStatus = TabRecord::TLS_WRITING_TO_FILE;
- checkLcpAllTablesDoneInLqh();
- }
- }
-
- void
- Dbdih::removeNodeFromTablesComplete(Signal* signal, Uint32 nodeId){
- jam();
- /**
- * Check if we "accidently" completed a LCP
- */
- checkLcpCompletedLab(signal);
-
- /**
- * Check if we (DIH) are finished with node fail handling
- */
- checkLocalNodefailComplete(signal, nodeId, NF_REMOVE_NODE_FROM_TABLE);
- }
- void
- Dbdih::checkLocalNodefailComplete(Signal* signal, Uint32 failedNodeId,
- NodefailHandlingStep step){
- jam();
- NodeRecordPtr nodePtr;
- nodePtr.i = failedNodeId;
- ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
-
- ndbrequire(nodePtr.p->m_nodefailSteps.get(step));
- nodePtr.p->m_nodefailSteps.clear(step);
- if(nodePtr.p->m_nodefailSteps.count() > 0){
- jam();
- return;
- }
- NFCompleteRep * const nf = (NFCompleteRep *)&signal->theData[0];
- nf->blockNo = DBDIH;
- nf->nodeId = cownNodeId;
- nf->failedNodeId = failedNodeId;
- nf->from = __LINE__;
- sendSignal(reference(), GSN_NF_COMPLETEREP, signal,
- NFCompleteRep::SignalLength, JBB);
- }
- void
- Dbdih::setLocalNodefailHandling(Signal* signal, Uint32 failedNodeId,
- NodefailHandlingStep step){
- jam();
-
- NodeRecordPtr nodePtr;
- nodePtr.i = failedNodeId;
- ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
-
- ndbrequire(!nodePtr.p->m_nodefailSteps.get(step));
- nodePtr.p->m_nodefailSteps.set(step);
- }
- void Dbdih::startLcpTakeOverLab(Signal* signal, Uint32 failedNodeId)
- {
- /*--------------------------------------------------------------------*/
- // Start LCP master take over process. Consists of the following steps.
- // 1) Ensure that all LQH's have reported all fragments they have been
- // told to checkpoint. Can be a fairly long step time-wise.
- // 2) Query all nodes about their LCP status.
- // During the query process we do not want our own state to change.
- // This can change due to delayed reception of LCP_REPORT, completed
- // save of table on disk or reception of DIH_LCPCOMPLETE from other
- // node.
- /*--------------------------------------------------------------------*/
- }//Dbdih::startLcpTakeOver()
- void Dbdih::execEMPTY_LCP_CONF(Signal* signal)
- {
- jamEntry();
-
- ndbrequire(c_lcpMasterTakeOverState.state == LMTOS_WAIT_EMPTY_LCP);
-
- const EmptyLcpConf * const conf = (EmptyLcpConf *)&signal->theData[0];
- Uint32 nodeId = conf->senderNodeId;
- if(!conf->idle){
- jam();
- if (conf->tableId < c_lcpMasterTakeOverState.minTableId) {
- jam();
- c_lcpMasterTakeOverState.minTableId = conf->tableId;
- c_lcpMasterTakeOverState.minFragId = conf->fragmentId;
- } else if (conf->tableId == c_lcpMasterTakeOverState.minTableId &&
- conf->fragmentId < c_lcpMasterTakeOverState.minFragId) {
- jam();
- c_lcpMasterTakeOverState.minFragId = conf->fragmentId;
- }//if
- if(isMaster()){
- jam();
- c_lcpState.m_LAST_LCP_FRAG_ORD.setWaitingFor(nodeId);
- }
- }
-
- receiveLoopMacro(EMPTY_LCP_REQ, nodeId);
- /*--------------------------------------------------------------------*/
- // Received all EMPTY_LCPCONF. We can continue with next phase of the
- // take over LCP master process.
- /*--------------------------------------------------------------------*/
- c_lcpMasterTakeOverState.set(LMTOS_WAIT_LCP_FRAG_REP, __LINE__);
- checkEmptyLcpComplete(signal);
- return;
- }//Dbdih::execEMPTY_LCPCONF()
- void
- Dbdih::checkEmptyLcpComplete(Signal *signal){
-
- ndbrequire(c_lcpMasterTakeOverState.state == LMTOS_WAIT_LCP_FRAG_REP);
-
- if(c_lcpState.noOfLcpFragRepOutstanding > 0){
- jam();
- return;
- }
-
- if(isMaster()){
- jam();
- signal->theData[0] = EventReport::LCP_TakeoverStarted;
- sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 1, JBB);
-
- signal->theData[0] = 7012;
- execDUMP_STATE_ORD(signal);
-
- c_lcpMasterTakeOverState.set(LMTOS_INITIAL, __LINE__);
- MasterLCPReq * const req = (MasterLCPReq *)&signal->theData[0];
- req->masterRef = reference();
- req->failedNodeId = c_lcpMasterTakeOverState.failedNodeId;
- sendLoopMacro(MASTER_LCPREQ, sendMASTER_LCPREQ);
- } else {
- sendMASTER_LCPCONF(signal);
- }
- }
- /*--------------------------------------------------*/
- /* THE MASTER HAS FAILED AND THE NEW MASTER IS*/
- /* QUERYING THIS NODE ABOUT THE STATE OF THE */
- /* LOCAL CHECKPOINT PROTOCOL. */
- /*--------------------------------------------------*/
- void Dbdih::execMASTER_LCPREQ(Signal* signal)
- {
- const MasterLCPReq * const req = (MasterLCPReq *)&signal->theData[0];
- jamEntry();
- const BlockReference newMasterBlockref = req->masterRef;
- Uint32 failedNodeId = req->failedNodeId;
- /**
- * There can be no take over with the same master
- */
- ndbrequire(c_lcpState.m_masterLcpDihRef != newMasterBlockref);
- c_lcpState.m_masterLcpDihRef = newMasterBlockref;
- c_lcpState.m_MASTER_LCPREQ_Received = true;
- c_lcpState.m_MASTER_LCPREQ_FailedNodeId = failedNodeId;
-
- if(newMasterBlockref != cmasterdihref){
- jam();
- ndbrequire(0);
- }
-
- sendMASTER_LCPCONF(signal);
- }//Dbdih::execMASTER_LCPREQ()
- void
- Dbdih::sendMASTER_LCPCONF(Signal * signal){
- if(!c_EMPTY_LCP_REQ_Counter.done()){
- /**
- * Have not received all EMPTY_LCP_REP
- * dare not answer MASTER_LCP_CONF yet
- */
- jam();
- return;
- }
- if(!c_lcpState.m_MASTER_LCPREQ_Received){
- jam();
- /**
- * Has not received MASTER_LCPREQ yet
- */
- return;
- }
-
- if(c_lcpState.lcpStatus == LCP_INIT_TABLES){
- jam();
- /**
- * Still aborting old initLcpLab
- */
- return;
- }
- if(c_lcpState.lcpStatus == LCP_COPY_GCI){
- jam();
- /**
- * Restart it
- */
- //Uint32 lcpId = SYSFILE->latestLCP_ID;
- SYSFILE->latestLCP_ID--;
- c_lcpState.setLcpStatus(LCP_STATUS_IDLE, __LINE__);
- #if 0
- if(c_copyGCISlave.m_copyReason == CopyGCIReq::LOCAL_CHECKPOINT){
- ndbout_c("Dbdih: Also resetting c_copyGCISlave");
- c_copyGCISlave.m_copyReason = CopyGCIReq::IDLE;
- c_copyGCISlave.m_expectedNextWord = 0;
- }
- #endif
- }
- bool ok = false;
- MasterLCPConf::State lcpState;
- switch (c_lcpState.lcpStatus) {
- case LCP_STATUS_IDLE:
- ok = true;
- jam();
- /*------------------------------------------------*/
- /* LOCAL CHECKPOINT IS CURRENTLY NOT ACTIVE */
- /* SINCE NO COPY OF RESTART INFORMATION HAVE*/
- /* BEEN RECEIVED YET. ALSO THE PREVIOUS */
- /* CHECKPOINT HAVE BEEN FULLY COMPLETED. */
- /*------------------------------------------------*/
- lcpState = MasterLCPConf::LCP_STATUS_IDLE;
- break;
- case LCP_STATUS_ACTIVE:
- ok = true;
- jam();
- /*--------------------------------------------------*/
- /* COPY OF RESTART INFORMATION HAS BEEN */
- /* PERFORMED AND ALSO RESPONSE HAVE BEEN SENT.*/
- /*--------------------------------------------------*/
- lcpState = MasterLCPConf::LCP_STATUS_ACTIVE;
- break;
- case LCP_TAB_COMPLETED:
- ok = true;
- jam();
- /*--------------------------------------------------------*/
- /* ALL LCP_REPORT'S HAVE BEEN COMPLETED FOR */
- /* ALL TABLES. SAVE OF AT LEAST ONE TABLE IS */
- /* ONGOING YET. */
- /*--------------------------------------------------------*/
- lcpState = MasterLCPConf::LCP_TAB_COMPLETED;
- break;
- case LCP_TAB_SAVED:
- ok = true;
- jam();
- /*--------------------------------------------------------*/
- /* ALL LCP_REPORT'S HAVE BEEN COMPLETED FOR */
- /* ALL TABLES. ALL TABLES HAVE ALSO BEEN SAVED */
- /* ALL OTHER NODES ARE NOT YET FINISHED WITH */
- /* THE LOCAL CHECKPOINT. */
- /*--------------------------------------------------------*/
- lcpState = MasterLCPConf::LCP_TAB_SAVED;
- break;
- case LCP_TCGET:
- case LCP_CALCULATE_KEEP_GCI:
- case LCP_TC_CLOPSIZE:
- case LCP_START_LCP_ROUND:
- /**
- * These should only exists on the master
- * but since this is master take over
- * it not allowed
- */
- ndbrequire(false);
- lcpState= MasterLCPConf::LCP_STATUS_IDLE; // remove warning
- break;
- case LCP_COPY_GCI:
- case LCP_INIT_TABLES:
- ok = true;
- /**
- * These two states are handled by if statements above
- */
- ndbrequire(false);
- lcpState= MasterLCPConf::LCP_STATUS_IDLE; // remove warning
- break;
- }//switch
- ndbrequire(ok);
- Uint32 failedNodeId = c_lcpState.m_MASTER_LCPREQ_FailedNodeId;
- MasterLCPConf * const conf = (MasterLCPConf *)&signal->theData[0];
- conf->senderNodeId = cownNodeId;
- conf->lcpState = lcpState;
- conf->failedNodeId = failedNodeId;
- sendSignal(c_lcpState.m_masterLcpDihRef, GSN_MASTER_LCPCONF,
- signal, MasterLCPConf::SignalLength, JBB);
- // Answer to MASTER_LCPREQ sent, reset flag so
- // that it's not sent again before another request comes in
- c_lcpState.m_MASTER_LCPREQ_Received = false;
- if(c_lcpState.lcpStatus == LCP_TAB_SAVED){
- #ifdef VM_TRACE
- ndbout_c("Sending extra GSN_LCP_COMPLETE_REP to new master");
- #endif
- sendLCP_COMPLETE_REP(signal);
- }
- if(!isMaster()){
- c_lcpMasterTakeOverState.set(LMTOS_IDLE, __LINE__);
- checkLocalNodefailComplete(signal, failedNodeId, NF_LCP_TAKE_OVER);
- }
-
- return;
- }
- NdbOut&
- operator<<(NdbOut& out, const Dbdih::LcpMasterTakeOverState state){
- switch(state){
- case Dbdih::LMTOS_IDLE:
- out << "LMTOS_IDLE";
- break;
- case Dbdih::LMTOS_WAIT_EMPTY_LCP:
- out << "LMTOS_WAIT_EMPTY_LCP";
- break;
- case Dbdih::LMTOS_WAIT_LCP_FRAG_REP:
- out << "LMTOS_WAIT_EMPTY_LCP";
- break;
- case Dbdih::LMTOS_INITIAL:
- out << "LMTOS_INITIAL";
- break;
- case Dbdih::LMTOS_ALL_IDLE:
- out << "LMTOS_ALL_IDLE";
- break;
- case Dbdih::LMTOS_ALL_ACTIVE:
- out << "LMTOS_ALL_ACTIVE";
- break;
- case Dbdih::LMTOS_LCP_CONCLUDING:
- out << "LMTOS_LCP_CONCLUDING";
- break;
- case Dbdih::LMTOS_COPY_ONGOING:
- out << "LMTOS_COPY_ONGOING";
- break;
- }
- return out;
- }
- struct MASTERLCP_StateTransitions {
- Dbdih::LcpMasterTakeOverState CurrentState;
- MasterLCPConf::State ParticipantState;
- Dbdih::LcpMasterTakeOverState NewState;
- };
- static const
- MASTERLCP_StateTransitions g_masterLCPTakeoverStateTransitions[] = {
- /**
- * Current = LMTOS_INITIAL
- */
- { Dbdih::LMTOS_INITIAL,
- MasterLCPConf::LCP_STATUS_IDLE,
- Dbdih::LMTOS_ALL_IDLE },
-
- { Dbdih::LMTOS_INITIAL,
- MasterLCPConf::LCP_STATUS_ACTIVE,
- Dbdih::LMTOS_ALL_ACTIVE },
- { Dbdih::LMTOS_INITIAL,
- MasterLCPConf::LCP_TAB_COMPLETED,
- Dbdih::LMTOS_LCP_CONCLUDING },
- { Dbdih::LMTOS_INITIAL,
- MasterLCPConf::LCP_TAB_SAVED,
- Dbdih::LMTOS_LCP_CONCLUDING },
- /**
- * Current = LMTOS_ALL_IDLE
- */
- { Dbdih::LMTOS_ALL_IDLE,
- MasterLCPConf::LCP_STATUS_IDLE,
- Dbdih::LMTOS_ALL_IDLE },
- { Dbdih::LMTOS_ALL_IDLE,
- MasterLCPConf::LCP_STATUS_ACTIVE,
- Dbdih::LMTOS_COPY_ONGOING },
- { Dbdih::LMTOS_ALL_IDLE,
- MasterLCPConf::LCP_TAB_COMPLETED,
- Dbdih::LMTOS_LCP_CONCLUDING },
- { Dbdih::LMTOS_ALL_IDLE,
- MasterLCPConf::LCP_TAB_SAVED,
- Dbdih::LMTOS_LCP_CONCLUDING },
- /**
- * Current = LMTOS_COPY_ONGOING
- */
- { Dbdih::LMTOS_COPY_ONGOING,
- MasterLCPConf::LCP_STATUS_IDLE,
- Dbdih::LMTOS_COPY_ONGOING },
- { Dbdih::LMTOS_COPY_ONGOING,
- MasterLCPConf::LCP_STATUS_ACTIVE,
- Dbdih::LMTOS_COPY_ONGOING },
-
- /**
- * Current = LMTOS_ALL_ACTIVE
- */
- { Dbdih::LMTOS_ALL_ACTIVE,
- MasterLCPConf::LCP_STATUS_IDLE,
- Dbdih::LMTOS_COPY_ONGOING },
- { Dbdih::LMTOS_ALL_ACTIVE,
- MasterLCPConf::LCP_STATUS_ACTIVE,
- Dbdih::LMTOS_ALL_ACTIVE },
- { Dbdih::LMTOS_ALL_ACTIVE,
- MasterLCPConf::LCP_TAB_COMPLETED,
- Dbdih::LMTOS_LCP_CONCLUDING },
-
- { Dbdih::LMTOS_ALL_ACTIVE,
- MasterLCPConf::LCP_TAB_SAVED,
- Dbdih::LMTOS_LCP_CONCLUDING },
- /**
- * Current = LMTOS_LCP_CONCLUDING
- */
- { Dbdih::LMTOS_LCP_CONCLUDING,
- MasterLCPConf::LCP_STATUS_IDLE,
- Dbdih::LMTOS_LCP_CONCLUDING },
-
- { Dbdih::LMTOS_LCP_CONCLUDING,
- MasterLCPConf::LCP_STATUS_ACTIVE,
- Dbdih::LMTOS_LCP_CONCLUDING },
- { Dbdih::LMTOS_LCP_CONCLUDING,
- MasterLCPConf::LCP_TAB_COMPLETED,
- Dbdih::LMTOS_LCP_CONCLUDING },
- { Dbdih::LMTOS_LCP_CONCLUDING,
- MasterLCPConf::LCP_TAB_SAVED,
- Dbdih::LMTOS_LCP_CONCLUDING }
- };
- const Uint32 g_masterLCPTakeoverStateTransitionsRows =
- sizeof(g_masterLCPTakeoverStateTransitions) / sizeof(struct MASTERLCP_StateTransitions);
- void Dbdih::execMASTER_LCPCONF(Signal* signal)
- {
- const MasterLCPConf * const conf = (MasterLCPConf *)&signal->theData[0];
- jamEntry();
- Uint32 senderNodeId = conf->senderNodeId;
- MasterLCPConf::State lcpState = (MasterLCPConf::State)conf->lcpState;
- const Uint32 failedNodeId = conf->failedNodeId;
- NodeRecordPtr nodePtr;
- nodePtr.i = senderNodeId;
- ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
- nodePtr.p->lcpStateAtTakeOver = lcpState;
- #ifdef VM_TRACE
- ndbout_c("MASTER_LCPCONF");
- printMASTER_LCP_CONF(stdout, &signal->theData[0], 0, 0);
- #endif
- bool found = false;
- for(Uint32 i = 0; i<g_masterLCPTakeoverStateTransitionsRows; i++){
- const struct MASTERLCP_StateTransitions * valid =
- &g_masterLCPTakeoverStateTransitions[i];
-
- if(valid->CurrentState == c_lcpMasterTakeOverState.state &&
- valid->ParticipantState == lcpState){
- jam();
- found = true;
- c_lcpMasterTakeOverState.set(valid->NewState, __LINE__);
- break;
- }
- }
- ndbrequire(found);
- bool ok = false;
- switch(lcpState){
- case MasterLCPConf::LCP_STATUS_IDLE:
- ok = true;
- break;
- case MasterLCPConf::LCP_STATUS_ACTIVE:
- case MasterLCPConf::LCP_TAB_COMPLETED:
- case MasterLCPConf::LCP_TAB_SAVED:
- ok = true;
- c_lcpState.m_LCP_COMPLETE_REP_Counter_DIH.setWaitingFor(nodePtr.i);
- break;
- }
- ndbrequire(ok);
- receiveLoopMacro(MASTER_LCPREQ, senderNodeId);
- /*-------------------------------------------------------------------------*/
- // We have now received all responses and are ready to take over the LCP
- // protocol as master.
- /*-------------------------------------------------------------------------*/
- MASTER_LCPhandling(signal, failedNodeId);
- }//Dbdih::execMASTER_LCPCONF()
- void Dbdih::execMASTER_LCPREF(Signal* signal)
- {
- const MasterLCPRef * const ref = (MasterLCPRef *)&signal->theData[0];
- jamEntry();
- receiveLoopMacro(MASTER_LCPREQ, ref->senderNodeId);
- /*-------------------------------------------------------------------------*/
- // We have now received all responses and are ready to take over the LCP
- // protocol as master.
- /*-------------------------------------------------------------------------*/
- MASTER_LCPhandling(signal, ref->failedNodeId);
- }//Dbdih::execMASTER_LCPREF()
- void Dbdih::MASTER_LCPhandling(Signal* signal, Uint32 failedNodeId)
- {
- /*-------------------------------------------------------------------------
- *
- * WE ARE NOW READY TO CONCLUDE THE TAKE OVER AS MASTER.
- * WE HAVE ENOUGH INFO TO START UP ACTIVITIES IN THE PROPER PLACE.
- * ALSO SET THE PROPER STATE VARIABLES.
- *------------------------------------------------------------------------*/
- c_lcpState.currentFragment.tableId = c_lcpMasterTakeOverState.minTableId;
- c_lcpState.currentFragment.fragmentId = c_lcpMasterTakeOverState.minFragId;
- c_lcpState.m_LAST_LCP_FRAG_ORD = c_lcpState.m_LCP_COMPLETE_REP_Counter_LQH;
- NodeRecordPtr failedNodePtr;
- failedNodePtr.i = failedNodeId;
- ptrCheckGuard(failedNodePtr, MAX_NDB_NODES, nodeRecord);
- switch (c_lcpMasterTakeOverState.state) {
- case LMTOS_ALL_IDLE:
- jam();
- /* --------------------------------------------------------------------- */
- // All nodes were idle in the LCP protocol. Start checking for start of LCP
- // protocol.
- /* --------------------------------------------------------------------- */
- #ifdef VM_TRACE
- ndbout_c("MASTER_LCPhandling:: LMTOS_ALL_IDLE -> checkLcpStart");
- #endif
- checkLcpStart(signal, __LINE__);
- break;
- case LMTOS_COPY_ONGOING:
- jam();
- /* --------------------------------------------------------------------- */
- // We were in the starting process of the LCP protocol. We will restart the
- // protocol by calculating the keep gci and storing the new lcp id.
- /* --------------------------------------------------------------------- */
- #ifdef VM_TRACE
- ndbout_c("MASTER_LCPhandling:: LMTOS_COPY_ONGOING -> storeNewLcpId");
- #endif
- if (c_lcpState.lcpStatus == LCP_STATUS_ACTIVE) {
- jam();
- /*---------------------------------------------------------------------*/
- /* WE NEED TO DECREASE THE LATEST LCP ID SINCE WE HAVE ALREADY */
- /* STARTED THIS */
- /* LOCAL CHECKPOINT. */
- /*---------------------------------------------------------------------*/
- Uint32 lcpId = SYSFILE->latestLCP_ID;
- #ifdef VM_TRACE
- ndbout_c("Decreasing latestLCP_ID from %d to %d", lcpId, lcpId - 1);
- #endif
- SYSFILE->latestLCP_ID--;
- }//if
- storeNewLcpIdLab(signal);
- break;
- case LMTOS_ALL_ACTIVE:
- {
- jam();
- /* -------------------------------------------------------------------
- * Everybody was in the active phase. We will restart sending
- * LCP_FRAGORD to the nodes from the new master.
- * We also need to set dihLcpStatus to ZACTIVE
- * in the master node since the master will wait for all nodes to
- * complete before finalising the LCP process.
- * ------------------------------------------------------------------ */
- #ifdef VM_TRACE
- ndbout_c("MASTER_LCPhandling:: LMTOS_ALL_ACTIVE -> "
- "startLcpRoundLoopLab(table=%u, fragment=%u)",
- c_lcpMasterTakeOverState.minTableId,
- c_lcpMasterTakeOverState.minFragId);
- #endif
-
- c_lcpState.keepGci = SYSFILE->keepGCI;
- c_lcpState.setLcpStatus(LCP_START_LCP_ROUND, __LINE__);
- startLcpRoundLoopLab(signal, 0, 0);
- break;
- }
- case LMTOS_LCP_CONCLUDING:
- {
- jam();
- /* ------------------------------------------------------------------- */
- // The LCP process is in the finalisation phase. We simply wait for it to
- // complete with signals arriving in. We need to check also if we should
- // change state due to table write completion during state
- // collection phase.
- /* ------------------------------------------------------------------- */
- ndbrequire(c_lcpState.lcpStatus != LCP_STATUS_IDLE);
- startLcpRoundLoopLab(signal, 0, 0);
- break;
- }
- default:
- ndbrequire(false);
- break;
- }//switch
- signal->theData[0] = EventReport::LCP_TakeoverCompleted;
- signal->theData[1] = c_lcpMasterTakeOverState.state;
- sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
-
- signal->theData[0] = 7012;
- execDUMP_STATE_ORD(signal);
- signal->theData[0] = 7015;
- execDUMP_STATE_ORD(signal);
- c_lcpMasterTakeOverState.set(LMTOS_IDLE, __LINE__);
- checkLocalNodefailComplete(signal, failedNodePtr.i, NF_LCP_TAKE_OVER);
- }
- /* ------------------------------------------------------------------------- */
- /* A BLOCK OR A NODE HAS COMPLETED THE HANDLING OF THE NODE FAILURE. */
- /* ------------------------------------------------------------------------- */
- void Dbdih::execNF_COMPLETEREP(Signal* signal)
- {
- NodeRecordPtr failedNodePtr;
- NFCompleteRep * const nfCompleteRep = (NFCompleteRep *)&signal->theData[0];
- jamEntry();
- const Uint32 blockNo = nfCompleteRep->blockNo;
- Uint32 nodeId = nfCompleteRep->nodeId;
- failedNodePtr.i = nfCompleteRep->failedNodeId;
- ptrCheckGuard(failedNodePtr, MAX_NDB_NODES, nodeRecord);
- switch (blockNo) {
- case DBTC:
- jam();
- ndbrequire(failedNodePtr.p->dbtcFailCompleted == ZFALSE);
- /* -------------------------------------------------------------------- */
- // Report the event that DBTC completed node failure handling.
- /* -------------------------------------------------------------------- */
- signal->theData[0] = EventReport::NodeFailCompleted;
- signal->theData[1] = DBTC;
- signal->theData[2] = failedNodePtr.i;
- sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB);
- failedNodePtr.p->dbtcFailCompleted = ZTRUE;
- break;
- case DBDICT:
- jam();
- ndbrequire(failedNodePtr.p->dbdictFailCompleted == ZFALSE);
- /* --------------------------------------------------------------------- */
- // Report the event that DBDICT completed node failure handling.
- /* --------------------------------------------------------------------- */
- signal->theData[0] = EventReport::NodeFailCompleted;
- signal->theData[1] = DBDICT;
- signal->theData[2] = failedNodePtr.i;
- sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB);
- failedNodePtr.p->dbdictFailCompleted = ZTRUE;
- break;
- case DBDIH:
- jam();
- ndbrequire(failedNodePtr.p->dbdihFailCompleted == ZFALSE);
- /* --------------------------------------------------------------------- */
- // Report the event that DBDIH completed node failure handling.
- /* --------------------------------------------------------------------- */
- signal->theData[0] = EventReport::NodeFailCompleted;
- signal->theData[1] = DBDIH;
- signal->theData[2] = failedNodePtr.i;
- sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB);
- failedNodePtr.p->dbdihFailCompleted = ZTRUE;
- break;
- case DBLQH:
- jam();
- ndbrequire(failedNodePtr.p->dblqhFailCompleted == ZFALSE);
- /* --------------------------------------------------------------------- */
- // Report the event that DBDIH completed node failure handling.
- /* --------------------------------------------------------------------- */
- signal->theData[0] = EventReport::NodeFailCompleted;
- signal->theData[1] = DBLQH;
- signal->theData[2] = failedNodePtr.i;
- sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB);
- failedNodePtr.p->dblqhFailCompleted = ZTRUE;
- break;
- case 0: /* Node has finished */
- jam();
- ndbrequire(nodeId < MAX_NDB_NODES);
- if (failedNodePtr.p->recNODE_FAILREP == ZFALSE) {
- jam();
- /* ------------------------------------------------------------------- */
- // We received a report about completion of node failure before we
- // received the message about the NODE failure ourselves.
- // We will send the signal to ourselves with a small delay
- // (10 milliseconds).
- /* ------------------------------------------------------------------- */
- //nf->from = __LINE__;
- sendSignalWithDelay(reference(), GSN_NF_COMPLETEREP, signal, 10,
- signal->length());
- return;
- }//if
-
- if (!failedNodePtr.p->m_NF_COMPLETE_REP.isWaitingFor(nodeId)){
- jam();
- return;
- }
-
- failedNodePtr.p->m_NF_COMPLETE_REP.clearWaitingFor(nodeId);;
-
- /* -------------------------------------------------------------------- */
- // Report the event that nodeId has completed node failure handling.
- /* -------------------------------------------------------------------- */
- signal->theData[0] = EventReport::NodeFailCompleted;
- signal->theData[1] = 0;
- signal->theData[2] = failedNodePtr.i;
- signal->theData[3] = nodeId;
- sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
-
- nodeFailCompletedCheckLab(signal, failedNodePtr);
- return;
- break;
- default:
- ndbrequire(false);
- return;
- break;
- }//switch
- if (failedNodePtr.p->dbtcFailCompleted == ZFALSE) {
- jam();
- return;
- }//if
- if (failedNodePtr.p->dbdictFailCompleted == ZFALSE) {
- jam();
- return;
- }//if
- if (failedNodePtr.p->dbdihFailCompleted == ZFALSE) {
- jam();
- return;
- }//if
- if (failedNodePtr.p->dblqhFailCompleted == ZFALSE) {
- jam();
- return;
- }//if
- /* ----------------------------------------------------------------------- */
- /* ALL BLOCKS IN THIS NODE HAVE COMPLETED THEIR PART OF HANDLING THE */
- /* NODE FAILURE. WE CAN NOW REPORT THIS COMPLETION TO ALL OTHER NODES. */
- /* ----------------------------------------------------------------------- */
- NodeRecordPtr nodePtr;
- for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
- jam();
- ptrAss(nodePtr, nodeRecord);
- if (nodePtr.p->nodeStatus == NodeRecord::ALIVE) {
- jam();
- BlockReference ref = calcDihBlockRef(nodePtr.i);
- NFCompleteRep * const nf = (NFCompleteRep *)&signal->theData[0];
- nf->blockNo = 0;
- nf->nodeId = cownNodeId;
- nf->failedNodeId = failedNodePtr.i;
- nf->from = __LINE__;
- sendSignal(ref, GSN_NF_COMPLETEREP, signal,
- NFCompleteRep::SignalLength, JBB);
- }//if
- }//for
- return;
- }//Dbdih::execNF_COMPLETEREP()
- void Dbdih::nodeFailCompletedCheckLab(Signal* signal,
- NodeRecordPtr failedNodePtr)
- {
- jam();
- if (!failedNodePtr.p->m_NF_COMPLETE_REP.done()){
- jam();
- return;
- }//if
- /* ---------------------------------------------------------------------- */
- /* ALL BLOCKS IN ALL NODES HAVE NOW REPORTED COMPLETION OF THE NODE */
- /* FAILURE HANDLING. WE ARE NOW READY TO ACCEPT THAT THIS NODE STARTS */
- /* AGAIN. */
- /* ---------------------------------------------------------------------- */
- jam();
- failedNodePtr.p->nodeStatus = NodeRecord::DEAD;
- failedNodePtr.p->recNODE_FAILREP = ZFALSE;
-
- /* ---------------------------------------------------------------------- */
- // Report the event that all nodes completed node failure handling.
- /* ---------------------------------------------------------------------- */
- signal->theData[0] = EventReport::NodeFailCompleted;
- signal->theData[1] = 0;
- signal->theData[2] = failedNodePtr.i;
- signal->theData[3] = 0;
- sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
- /* ---------------------------------------------------------------------- */
- // Report to QMGR that we have concluded recovery handling of this node.
- /* ---------------------------------------------------------------------- */
- signal->theData[0] = failedNodePtr.i;
- sendSignal(QMGR_REF, GSN_NDB_FAILCONF, signal, 1, JBB);
-
- if (isMaster()) {
- jam();
- /* --------------------------------------------------------------------- */
- /* IF WE ARE MASTER WE MUST CHECK IF COPY FRAGMENT WAS INTERRUPTED */
- /* BY THE FAILED NODES. */
- /* --------------------------------------------------------------------- */
- TakeOverRecordPtr takeOverPtr;
- takeOverPtr.i = 0;
- ptrAss(takeOverPtr, takeOverRecord);
- if ((takeOverPtr.p->toMasterStatus == TakeOverRecord::COPY_FRAG) &&
- (failedNodePtr.i == takeOverPtr.p->toCopyNode)) {
- jam();
- #ifdef VM_TRACE
- ndbrequire("Tell jonas" == 0);
- #endif
- /*------------------------------------------------------------------*/
- /* WE ARE CURRENTLY IN THE PROCESS OF COPYING A FRAGMENT. WE */
- /* WILL CHECK IF THE COPY NODE HAVE FAILED. */
- /*------------------------------------------------------------------*/
- takeOverPtr.p->toMasterStatus = TakeOverRecord::SELECTING_NEXT;
- startNextCopyFragment(signal, takeOverPtr.i);
- return;
- }//if
- checkStartTakeOver(signal);
- }//if
- return;
- }//Dbdih::nodeFailCompletedCheckLab()
- /*****************************************************************************/
- /* ********** SEIZING / RELEASING MODULE *************/
- /*****************************************************************************/
- /*
- 3.4 L O C A L N O D E S E I Z E
- ************************************
- */
- /*
- 3.4.1 L O C A L N O D E S E I Z E R E Q U E S T
- ******************************************************
- */
- void Dbdih::execDISEIZEREQ(Signal* signal)
- {
- ConnectRecordPtr connectPtr;
- jamEntry();
- Uint32 userPtr = signal->theData[0];
- BlockReference userRef = signal->theData[1];
- ndbrequire(cfirstconnect != RNIL);
- connectPtr.i = cfirstconnect;
- ptrCheckGuard(connectPtr, cconnectFileSize, connectRecord);
- cfirstconnect = connectPtr.p->nfConnect;
- connectPtr.p->nfConnect = RNIL;
- connectPtr.p->userpointer = userPtr;
- connectPtr.p->userblockref = userRef;
- connectPtr.p->connectState = ConnectRecord::INUSE;
- signal->theData[0] = connectPtr.p->userpointer;
- signal->theData[1] = connectPtr.i;
- sendSignal(userRef, GSN_DISEIZECONF, signal, 2, JBB);
- }//Dbdih::execDISEIZEREQ()
- /*
- 3.5 L O C A L N O D E R E L E A S E
- ****************************************
- */
- /*
- 3.5.1 L O C A L N O D E R E L E A S E R E Q U E S T
- *******************************************************=
- */
- void Dbdih::execDIRELEASEREQ(Signal* signal)
- {
- ConnectRecordPtr connectPtr;
- jamEntry();
- connectPtr.i = signal->theData[0];
- Uint32 userRef = signal->theData[2];
- ptrCheckGuard(connectPtr, cconnectFileSize, connectRecord);
- ndbrequire(connectPtr.p->connectState != ConnectRecord::FREE);
- ndbrequire(connectPtr.p->userblockref == userRef);
- signal->theData[0] = connectPtr.p->userpointer;
- sendSignal(connectPtr.p->userblockref, GSN_DIRELEASECONF, signal, 1, JBB);
- release_connect(connectPtr);
- }//Dbdih::execDIRELEASEREQ()
- /*
- 3.7 A D D T A B L E
- **********************=
- */
- /*****************************************************************************/
- /* ********** TABLE ADDING MODULE *************/
- /*****************************************************************************/
- /*
- 3.7.1 A D D T A B L E M A I N L Y
- ***************************************
- */
- void Dbdih::execCREATE_FRAGMENTATION_REQ(Signal * signal){
- jamEntry();
- CreateFragmentationReq * const req =
- (CreateFragmentationReq*)signal->getDataPtr();
-
- const Uint32 senderRef = req->senderRef;
- const Uint32 senderData = req->senderData;
- const Uint32 fragmentNode = req->fragmentNode;
- const Uint32 fragmentType = req->fragmentationType;
- //const Uint32 fragmentCount = req->noOfFragments;
- const Uint32 primaryTableId = req->primaryTableId;
- Uint32 err = 0;
-
- do {
- Uint32 noOfFragments = 0;
- Uint32 noOfReplicas = cnoReplicas;
- switch(fragmentType){
- case DictTabInfo::AllNodesSmallTable:
- jam();
- noOfFragments = csystemnodes;
- break;
- case DictTabInfo::AllNodesMediumTable:
- jam();
- noOfFragments = 2 * csystemnodes;
- break;
- case DictTabInfo::AllNodesLargeTable:
- jam();
- noOfFragments = 4 * csystemnodes;
- break;
- case DictTabInfo::SingleFragment:
- jam();
- noOfFragments = 1;
- break;
- #if 0
- case DictTabInfo::SpecifiedFragmentCount:
- noOfFragments = (fragmentCount == 0 ? 1 : (fragmentCount + 1)/ 2);
- break;
- #endif
- default:
- jam();
- err = CreateFragmentationRef::InvalidFragmentationType;
- break;
- }
- if(err)
- break;
-
- NodeGroupRecordPtr NGPtr;
- TabRecordPtr primTabPtr;
- if (primaryTableId == RNIL) {
- if(fragmentNode == 0){
- jam();
- // needs to be fixed for single fragment tables
- NGPtr.i = 0; //c_nextNodeGroup;
- c_nextNodeGroup = (NGPtr.i + 1 == cnoOfNodeGroups ? 0 : NGPtr.i + 1);
- } else if(! (fragmentNode < MAX_NDB_NODES)) {
- jam();
- err = CreateFragmentationRef::InvalidNodeId;
- } else {
- jam();
- const Uint32 stat = Sysfile::getNodeStatus(fragmentNode,
- SYSFILE->nodeStatus);
- switch (stat) {
- case Sysfile::NS_Active:
- case Sysfile::NS_ActiveMissed_1:
- case Sysfile::NS_ActiveMissed_2:
- case Sysfile::NS_TakeOver:
- jam();
- break;
- case Sysfile::NS_NotActive_NotTakenOver:
- jam();
- break;
- case Sysfile::NS_HotSpare:
- jam();
- case Sysfile::NS_NotDefined:
- jam();
- default:
- jam();
- err = CreateFragmentationRef::InvalidNodeType;
- break;
- }
- if(err)
- break;
- NGPtr.i = Sysfile::getNodeGroup(fragmentNode,
- SYSFILE->nodeGroups);
- break;
- }
- } else {
- if (primaryTableId >= ctabFileSize) {
- jam();
- err = CreateFragmentationRef::InvalidPrimaryTable;
- break;
- }
- primTabPtr.i = primaryTableId;
- ptrAss(primTabPtr, tabRecord);
- if (primTabPtr.p->tabStatus != TabRecord::TS_ACTIVE) {
- jam();
- err = CreateFragmentationRef::InvalidPrimaryTable;
- break;
- }
- if (noOfFragments != primTabPtr.p->totalfragments) {
- jam();
- err = CreateFragmentationRef::InvalidFragmentationType;
- break;
- }
- }
-
- //@todo use section writer
- Uint32 count = 2;
- Uint32 fragments[2 + 8*MAX_REPLICAS*MAX_NDB_NODES];
- Uint32 next_replica_node[MAX_NDB_NODES];
- memset(next_replica_node,0,sizeof(next_replica_node));
- if (primaryTableId == RNIL) {
- jam();
- for(Uint32 fragNo = 0; fragNo<noOfFragments; fragNo++){
- jam();
- ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);
- Uint32 ind = next_replica_node[NGPtr.i];
- const Uint32 max = NGPtr.p->nodeCount;
- //-------------------------------------------------------------------
- // We make an extra step to ensure that the primary replicas are
- // spread among the nodes.
- //-------------------------------------------------------------------
- next_replica_node[NGPtr.i] = (ind + 1 >= max ? 0 : ind + 1);
-
- for(Uint32 replicaNo = 0; replicaNo<noOfReplicas; replicaNo++){
- jam();
- const Uint32 nodeId = NGPtr.p->nodesInGroup[ind++];
- fragments[count++] = nodeId;
- ind = (ind == max ? 0 : ind);
- }
-
- /**
- * Next node group for next fragment
- */
- NGPtr.i++;
- NGPtr.i = (NGPtr.i == cnoOfNodeGroups ? 0 : NGPtr.i);
- }
- } else {
- for (Uint32 fragNo = 0;
- fragNo < primTabPtr.p->totalfragments; fragNo++) {
- jam();
- FragmentstorePtr fragPtr;
- ReplicaRecordPtr replicaPtr;
- getFragstore(primTabPtr.p, fragNo, fragPtr);
- fragments[count++] = fragPtr.p->preferredPrimary;
- for (replicaPtr.i = fragPtr.p->storedReplicas;
- replicaPtr.i != RNIL;
- replicaPtr.i = replicaPtr.p->nextReplica) {
- jam();
- ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
- if (replicaPtr.p->procNode != fragPtr.p->preferredPrimary) {
- jam();
- fragments[count++] = replicaPtr.p->procNode;
- }//if
- }//for
- for (replicaPtr.i = fragPtr.p->oldStoredReplicas;
- replicaPtr.i != RNIL;
- replicaPtr.i = replicaPtr.p->nextReplica) {
- jam();
- ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
- if (replicaPtr.p->procNode != fragPtr.p->preferredPrimary) {
- jam();
- fragments[count++] = replicaPtr.p->procNode;
- }//if
- }//for
- }
- }
- ndbrequire(count == (2 + noOfReplicas * noOfFragments));
-
- CreateFragmentationConf * const conf =
- (CreateFragmentationConf*)signal->getDataPtrSend();
- conf->senderRef = reference();
- conf->senderData = senderData;
- conf->noOfReplicas = noOfReplicas;
- conf->noOfFragments = noOfFragments;
- fragments[0] = noOfReplicas;
- fragments[1] = noOfFragments;
-
- LinearSectionPtr ptr[3];
- ptr[0].p = &fragments[0];
- ptr[0].sz = count;
- sendSignal(senderRef,
- GSN_CREATE_FRAGMENTATION_CONF,
- signal,
- CreateFragmentationConf::SignalLength,
- JBB,
- ptr,
- 1);
- return;
- } while(false);
-
- CreateFragmentationRef * const ref =
- (CreateFragmentationRef*)signal->getDataPtrSend();
- ref->senderRef = reference();
- ref->senderData = senderData;
- ref->errorCode = err;
- sendSignal(senderRef, GSN_CREATE_FRAGMENTATION_REF, signal,
- CreateFragmentationRef::SignalLength, JBB);
- }
- void Dbdih::execDIADDTABREQ(Signal* signal)
- {
- jamEntry();
- DiAddTabReq * const req = (DiAddTabReq*)signal->getDataPtr();
- // Seize connect record
- ndbrequire(cfirstconnect != RNIL);
- ConnectRecordPtr connectPtr;
- connectPtr.i = cfirstconnect;
- ptrCheckGuard(connectPtr, cconnectFileSize, connectRecord);
- cfirstconnect = connectPtr.p->nfConnect;
-
- const Uint32 userPtr = req->connectPtr;
- const BlockReference userRef = signal->getSendersBlockRef();
- connectPtr.p->nfConnect = RNIL;
- connectPtr.p->userpointer = userPtr;
- connectPtr.p->userblockref = userRef;
- connectPtr.p->connectState = ConnectRecord::INUSE;
- connectPtr.p->table = req->tableId;
-
- TabRecordPtr tabPtr;
- tabPtr.i = req->tableId;
- ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
- tabPtr.p->connectrec = connectPtr.i;
- tabPtr.p->tableType = req->tableType;
- tabPtr.p->schemaVersion = req->schemaVersion;
- tabPtr.p->primaryTableId = req->primaryTableId;
- if(tabPtr.p->tabStatus == TabRecord::TS_ACTIVE){
- jam();
- tabPtr.p->tabStatus = TabRecord::TS_CREATING;
- sendAddFragreq(signal, connectPtr, tabPtr, 0);
- return;
- }
- if(getNodeState().getSystemRestartInProgress() &&
- tabPtr.p->tabStatus == TabRecord::TS_IDLE){
- jam();
-
- ndbrequire(cmasterNodeId == getOwnNodeId());
- tabPtr.p->tabStatus = TabRecord::TS_CREATING;
-
- initTableFile(tabPtr);
- FileRecordPtr filePtr;
- filePtr.i = tabPtr.p->tabFile[0];
- ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
- openFileRw(signal, filePtr);
- filePtr.p->reqStatus = FileRecord::OPENING_TABLE;
- return;
- }
- /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
- /* AT THE TIME OF INITIATING THE FILE OF TABLE */
- /* DESCRIPTION IS CREATED FOR APPROPRIATE SIZE. EACH */
- /* EACH RECORD IN THIS FILE HAS THE INFORMATION ABOUT */
- /* ONE TABLE. THE POINTER TO THIS RECORD IS THE TABLE */
- /* REFERENCE. IN THE BEGINNING ALL RECORDS ARE CREATED */
- /* BUT THEY DO NOT HAVE ANY INFORMATION ABOUT ANY TABLE*/
- /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
- tabPtr.p->tabStatus = TabRecord::TS_CREATING;
- tabPtr.p->storedTable = req->storedTable;
- tabPtr.p->method = TabRecord::HASH;
- tabPtr.p->kvalue = req->kValue;
- Uint32 fragments[2 + 8*MAX_REPLICAS*MAX_NDB_NODES];
- SegmentedSectionPtr fragDataPtr;
- signal->getSection(fragDataPtr, DiAddTabReq::FRAGMENTATION);
- copy(fragments, fragDataPtr);
- releaseSections(signal);
- const Uint32 noReplicas = fragments[0];
- const Uint32 noFragments = fragments[1];
- tabPtr.p->noOfBackups = noReplicas - 1;
- tabPtr.p->totalfragments = noFragments;
- ndbrequire(noReplicas == cnoReplicas); // Only allowed
- if (ERROR_INSERTED(7173)) {
- addtabrefuseLab(signal, connectPtr, ZREPLERROR1);
- return;
- }
- if ((noReplicas * noFragments) > cnoFreeReplicaRec) {
- jam();
- addtabrefuseLab(signal, connectPtr, ZREPLERROR1);
- return;
- }//if
- if (noFragments > cremainingfrags) {
- jam();
- addtabrefuseLab(signal, connectPtr, ZREPLERROR1);
- return;
- }//if
-
- Uint32 logTotalFragments = 1;
- while (logTotalFragments <= tabPtr.p->totalfragments) {
- jam();
- logTotalFragments <<= 1;
- }
- logTotalFragments >>= 1;
- tabPtr.p->mask = logTotalFragments - 1;
- tabPtr.p->hashpointer = tabPtr.p->totalfragments - logTotalFragments;
- allocFragments(tabPtr.p->totalfragments, tabPtr);
- Uint32 index = 2;
- for (Uint32 fragId = 0; fragId < noFragments; fragId++) {
- jam();
- FragmentstorePtr fragPtr;
- Uint32 activeIndex = 0;
- getFragstore(tabPtr.p, fragId, fragPtr);
- fragPtr.p->preferredPrimary = fragments[index];
- for (Uint32 i = 0; i<noReplicas; i++) {
- const Uint32 nodeId = fragments[index++];
- ReplicaRecordPtr replicaPtr;
- allocStoredReplica(fragPtr, replicaPtr, nodeId);
- if (getNodeStatus(nodeId) == NodeRecord::ALIVE) {
- jam();
- ndbrequire(activeIndex < MAX_REPLICAS);
- fragPtr.p->activeNodes[activeIndex] = nodeId;
- activeIndex++;
- } else {
- jam();
- removeStoredReplica(fragPtr, replicaPtr);
- linkOldStoredReplica(fragPtr, replicaPtr);
- }//if
- }//for
- fragPtr.p->fragReplicas = activeIndex;
- ndbrequire(activeIndex > 0 && fragPtr.p->storedReplicas != RNIL);
- }
- initTableFile(tabPtr);
- tabPtr.p->tabCopyStatus = TabRecord::CS_ADD_TABLE_MASTER;
- signal->theData[0] = DihContinueB::ZPACK_TABLE_INTO_PAGES;
- signal->theData[1] = tabPtr.i;
- sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
- }
- void
- Dbdih::addTable_closeConf(Signal * signal, Uint32 tabPtrI){
- TabRecordPtr tabPtr;
- tabPtr.i = tabPtrI;
- ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
- ConnectRecordPtr connectPtr;
- connectPtr.i = tabPtr.p->connectrec;
- ptrCheckGuard(connectPtr, cconnectFileSize, connectRecord);
-
- sendAddFragreq(signal, connectPtr, tabPtr, 0);
- }
- void
- Dbdih::sendAddFragreq(Signal* signal, ConnectRecordPtr connectPtr,
- TabRecordPtr tabPtr, Uint32 fragId){
- jam();
- const Uint32 fragCount = tabPtr.p->totalfragments;
- ReplicaRecordPtr replicaPtr; replicaPtr.i = RNIL;
- for(; fragId<fragCount; fragId++){
- jam();
- FragmentstorePtr fragPtr;
- getFragstore(tabPtr.p, fragId, fragPtr);
-
- replicaPtr.i = fragPtr.p->storedReplicas;
- while(replicaPtr.i != RNIL){
- jam();
- ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
- if(replicaPtr.p->procNode == getOwnNodeId()){
- break;
- }
- replicaPtr.i = replicaPtr.p->nextReplica;
- }
-
- if(replicaPtr.i != RNIL){
- jam();
- break;
- }
-
- replicaPtr.i = fragPtr.p->oldStoredReplicas;
- while(replicaPtr.i != RNIL){
- jam();
- ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
- if(replicaPtr.p->procNode == getOwnNodeId()){
- break;
- }
- replicaPtr.i = replicaPtr.p->nextReplica;
- }
- if(replicaPtr.i != RNIL){
- jam();
- break;
- }
- }
-
- if(replicaPtr.i != RNIL){
- jam();
- ndbrequire(fragId < fragCount);
- ndbrequire(replicaPtr.p->procNode == getOwnNodeId());
- Uint32 requestInfo = 0;
- if(!tabPtr.p->storedTable){
- requestInfo |= LqhFragReq::TemporaryTable;
- }
-
- if(getNodeState().getNodeRestartInProgress()){
- requestInfo |= LqhFragReq::CreateInRunning;
- }
-
- AddFragReq* const req = (AddFragReq*)signal->getDataPtr();
- req->dihPtr = connectPtr.i;
- req->senderData = connectPtr.p->userpointer;
- req->fragmentId = fragId;
- req->requestInfo = requestInfo;
- req->tableId = tabPtr.i;
- req->nextLCP = 0;
- req->nodeId = getOwnNodeId();
- req->totalFragments = fragCount;
- req->startGci = SYSFILE->newestRestorableGCI;
- sendSignal(DBDICT_REF, GSN_ADD_FRAGREQ, signal,
- AddFragReq::SignalLength, JBB);
- return;
- }
-
- // Done
- DiAddTabConf * const conf = (DiAddTabConf*)signal->getDataPtr();
- conf->senderData = connectPtr.p->userpointer;
- sendSignal(connectPtr.p->userblockref, GSN_DIADDTABCONF, signal,
- DiAddTabConf::SignalLength, JBB);
- // Release
- release_connect(connectPtr);
- }
- void
- Dbdih::release_connect(ConnectRecordPtr ptr)
- {
- ptr.p->userblockref = ZNIL;
- ptr.p->userpointer = RNIL;
- ptr.p->connectState = ConnectRecord::FREE;
- ptr.p->nfConnect = cfirstconnect;
- cfirstconnect = ptr.i;
- }
- void
- Dbdih::execADD_FRAGCONF(Signal* signal){
- jamEntry();
- AddFragConf * const conf = (AddFragConf*)signal->getDataPtr();
- ConnectRecordPtr connectPtr;
- connectPtr.i = conf->dihPtr;
- ptrCheckGuard(connectPtr, cconnectFileSize, connectRecord);
- TabRecordPtr tabPtr;
- tabPtr.i = connectPtr.p->table;
- ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
- sendAddFragreq(signal, connectPtr, tabPtr, conf->fragId + 1);
- }
- void
- Dbdih::execADD_FRAGREF(Signal* signal){
- jamEntry();
- AddFragRef * const ref = (AddFragRef*)signal->getDataPtr();
- ConnectRecordPtr connectPtr;
- connectPtr.i = ref->dihPtr;
- ptrCheckGuard(connectPtr, cconnectFileSize, connectRecord);
- {
- DiAddTabRef * const ref = (DiAddTabRef*)signal->getDataPtr();
- ref->senderData = connectPtr.p->userpointer;
- ref->errorCode = ~0;
- sendSignal(connectPtr.p->userblockref, GSN_DIADDTABREF, signal,
- DiAddTabRef::SignalLength, JBB);
- }
-
- // Release
- release_connect(connectPtr);
- }
- /*
- 3.7.1.3 R E F U S E
- *********************
- */
- void Dbdih::addtabrefuseLab(Signal* signal, ConnectRecordPtr connectPtr, Uint32 errorCode)
- {
- signal->theData[0] = connectPtr.p->userpointer;
- signal->theData[1] = errorCode;
- sendSignal(connectPtr.p->userblockref, GSN_DIADDTABREF, signal, 2, JBB);
- release_connect(connectPtr);
- return;
- }//Dbdih::addtabrefuseLab()
- /*
- 3.7.2 A D D T A B L E D U P L I C A T I O N
- *************************************************
- */
- /*
- 3.7.2.1 A D D T A B L E D U P L I C A T I O N R E Q U E S T
- *******************************************************************=
- */
- /*
- D E L E T E T A B L E
- **********************=
- */
- /*****************************************************************************/
- /*********** DELETE TABLE MODULE *************/
- /*****************************************************************************/
- void
- Dbdih::execDROP_TAB_REQ(Signal* signal){
- jamEntry();
- DropTabReq* req = (DropTabReq*)signal->getDataPtr();
- TabRecordPtr tabPtr;
- tabPtr.i = req->tableId;
- ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
-
- tabPtr.p->m_dropTab.tabUserRef = req->senderRef;
- tabPtr.p->m_dropTab.tabUserPtr = req->senderData;
- DropTabReq::RequestType rt = (DropTabReq::RequestType)req->requestType;
- switch(rt){
- case DropTabReq::OnlineDropTab:
- jam();
- ndbrequire(tabPtr.p->tabStatus == TabRecord::TS_DROPPING);
- releaseTable(tabPtr);
- break;
- case DropTabReq::CreateTabDrop:
- jam();
- releaseTable(tabPtr);
- break;
- case DropTabReq::RestartDropTab:
- break;
- }
-
- startDeleteFile(signal, tabPtr);
- }
- void Dbdih::startDeleteFile(Signal* signal, TabRecordPtr tabPtr)
- {
- if (tabPtr.p->tabFile[0] == RNIL) {
- jam();
- initTableFile(tabPtr);
- }//if
- openTableFileForDelete(signal, tabPtr.p->tabFile[0]);
- }//Dbdih::startDeleteFile()
- void Dbdih::openTableFileForDelete(Signal* signal, Uint32 fileIndex)
- {
- FileRecordPtr filePtr;
- filePtr.i = fileIndex;
- ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
- openFileRw(signal, filePtr);
- filePtr.p->reqStatus = FileRecord::TABLE_OPEN_FOR_DELETE;
- }//Dbdih::openTableFileForDelete()
- void Dbdih::tableOpenLab(Signal* signal, FileRecordPtr filePtr)
- {
- closeFileDelete(signal, filePtr);
- filePtr.p->reqStatus = FileRecord::TABLE_CLOSE_DELETE;
- return;
- }//Dbdih::tableOpenLab()
- void Dbdih::tableDeleteLab(Signal* signal, FileRecordPtr filePtr)
- {
- TabRecordPtr tabPtr;
- tabPtr.i = filePtr.p->tabRef;
- ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
- if (filePtr.i == tabPtr.p->tabFile[0]) {
- jam();
- openTableFileForDelete(signal, tabPtr.p->tabFile[1]);
- return;
- }//if
- ndbrequire(filePtr.i == tabPtr.p->tabFile[1]);
-
- releaseFile(tabPtr.p->tabFile[0]);
- releaseFile(tabPtr.p->tabFile[1]);
- tabPtr.p->tabFile[0] = tabPtr.p->tabFile[1] = RNIL;
- tabPtr.p->tabStatus = TabRecord::TS_IDLE;
-
- DropTabConf * const dropConf = (DropTabConf *)signal->getDataPtrSend();
- dropConf->senderRef = reference();
- dropConf->senderData = tabPtr.p->m_dropTab.tabUserPtr;
- dropConf->tableId = tabPtr.i;
- sendSignal(tabPtr.p->m_dropTab.tabUserRef, GSN_DROP_TAB_CONF,
- signal, DropTabConf::SignalLength, JBB);
-
- tabPtr.p->m_dropTab.tabUserPtr = RNIL;
- tabPtr.p->m_dropTab.tabUserRef = 0;
- }//Dbdih::tableDeleteLab()
- void Dbdih::releaseTable(TabRecordPtr tabPtr)
- {
- FragmentstorePtr fragPtr;
- if (tabPtr.p->noOfFragChunks > 0) {
- for (Uint32 fragId = 0; fragId < tabPtr.p->totalfragments; fragId++) {
- jam();
- getFragstore(tabPtr.p, fragId, fragPtr);
- releaseReplicas(fragPtr.p->storedReplicas);
- releaseReplicas(fragPtr.p->oldStoredReplicas);
- }//for
- releaseFragments(tabPtr);
- }
- if (tabPtr.p->tabFile[0] != RNIL) {
- jam();
- releaseFile(tabPtr.p->tabFile[0]);
- releaseFile(tabPtr.p->tabFile[1]);
- tabPtr.p->tabFile[0] = tabPtr.p->tabFile[1] = RNIL;
- }//if
- }//Dbdih::releaseTable()
- void Dbdih::releaseReplicas(Uint32 replicaPtrI)
- {
- ReplicaRecordPtr replicaPtr;
- replicaPtr.i = replicaPtrI;
- jam();
- while (replicaPtr.i != RNIL) {
- jam();
- ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
- Uint32 tmp = replicaPtr.p->nextReplica;
- replicaPtr.p->nextReplica = cfirstfreeReplica;
- cfirstfreeReplica = replicaPtr.i;
- replicaPtr.i = tmp;
- cnoFreeReplicaRec++;
- }//while
- }//Dbdih::releaseReplicas()
- void Dbdih::seizeReplicaRec(ReplicaRecordPtr& replicaPtr)
- {
- replicaPtr.i = cfirstfreeReplica;
- ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
- cfirstfreeReplica = replicaPtr.p->nextReplica;
- cnoFreeReplicaRec--;
- replicaPtr.p->nextReplica = RNIL;
- }//Dbdih::seizeReplicaRec()
- void Dbdih::releaseFile(Uint32 fileIndex)
- {
- FileRecordPtr filePtr;
- filePtr.i = fileIndex;
- ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
- filePtr.p->nextFile = cfirstfreeFile;
- cfirstfreeFile = filePtr.i;
- }//Dbdih::releaseFile()
- void Dbdih::execALTER_TAB_REQ(Signal * signal)
- {
- AlterTabReq* const req = (AlterTabReq*)signal->getDataPtr();
- const Uint32 senderRef = req->senderRef;
- const Uint32 senderData = req->senderData;
- const Uint32 changeMask = req->changeMask;
- const Uint32 tableId = req->tableId;
- const Uint32 tableVersion = req->tableVersion;
- const Uint32 gci = req->gci;
- AlterTabReq::RequestType requestType =
- (AlterTabReq::RequestType) req->requestType;
- TabRecordPtr tabPtr;
- tabPtr.i = tableId;
- ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
- tabPtr.p->schemaVersion = tableVersion;
- // Request handled successfully
- AlterTabConf * conf = (AlterTabConf*)signal->getDataPtrSend();
- conf->senderRef = reference();
- conf->senderData = senderData;
- conf->changeMask = changeMask;
- conf->tableId = tableId;
- conf->tableVersion = tableVersion;
- conf->gci = gci;
- conf->requestType = requestType;
- sendSignal(senderRef, GSN_ALTER_TAB_CONF, signal,
- AlterTabConf::SignalLength, JBB);
- }
- /*
- G E T N O D E S
- **********************=
- */
- /*****************************************************************************/
- /* ********** TRANSACTION HANDLING MODULE *************/
- /*****************************************************************************/
- /*
- 3.8.1 G E T N O D E S R E Q U E S T
- ******************************************
- Asks what nodes should be part of a transaction.
- */
- void Dbdih::execDIGETNODESREQ(Signal* signal)
- {
- const DiGetNodesReq * const req = (DiGetNodesReq *)&signal->theData[0];
- FragmentstorePtr fragPtr;
- TabRecordPtr tabPtr;
- tabPtr.i = req->tableId;
- Uint32 hashValue = req->hashValue;
- Uint32 ttabFileSize = ctabFileSize;
- TabRecord* regTabDesc = tabRecord;
- jamEntry();
- ptrCheckGuard(tabPtr, ttabFileSize, regTabDesc);
- hashValue = hashValue >> tabPtr.p->kvalue;
- Uint32 fragId = tabPtr.p->mask & hashValue;
- ndbrequire(tabPtr.p->tabStatus == TabRecord::TS_ACTIVE);
- if (fragId < tabPtr.p->hashpointer) {
- jam();
- fragId = hashValue & ((tabPtr.p->mask << 1) + 1);
- }//if
- getFragstore(tabPtr.p, fragId, fragPtr);
- DiGetNodesConf * const conf = (DiGetNodesConf *)&signal->theData[0];
- Uint32 nodeCount = extractNodeInfo(fragPtr.p, conf->nodes);
- Uint32 sig2 = (nodeCount - 1) +
- (fragPtr.p->distributionKey << 16);
- conf->zero = 0;
- conf->reqinfo = sig2;
- conf->fragId = fragId;
- }//Dbdih::execDIGETNODESREQ()
- Uint32 Dbdih::extractNodeInfo(const Fragmentstore * fragPtr, Uint32 nodes[])
- {
- Uint32 nodeCount = 0;
- for (Uint32 i = 0; i < fragPtr->fragReplicas; i++) {
- jam();
- NodeRecordPtr nodePtr;
- ndbrequire(i < MAX_REPLICAS);
- nodePtr.i = fragPtr->activeNodes[i];
- ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
- if (nodePtr.p->useInTransactions) {
- jam();
- nodes[nodeCount] = nodePtr.i;
- nodeCount++;
- }//if
- }//for
- ndbrequire(nodeCount > 0);
- return nodeCount;
- }//Dbdih::extractNodeInfo()
- void
- Dbdih::getFragstore(TabRecord * tab, //In parameter
- Uint32 fragNo, //In parameter
- FragmentstorePtr & fragptr) //Out parameter
- {
- FragmentstorePtr fragPtr;
- Uint32 chunkNo = fragNo >> LOG_NO_OF_FRAGS_PER_CHUNK;
- Uint32 chunkIndex = fragNo & (NO_OF_FRAGS_PER_CHUNK - 1);
- Uint32 TfragstoreFileSize = cfragstoreFileSize;
- Fragmentstore* TfragStore = fragmentstore;
- if (chunkNo < MAX_NDB_NODES) {
- fragPtr.i = tab->startFid[chunkNo] + chunkIndex;
- ptrCheckGuard(fragPtr, TfragstoreFileSize, TfragStore);
- fragptr = fragPtr;
- return;
- }//if
- ndbrequire(false);
- }//Dbdih::getFragstore()
- void Dbdih::allocFragments(Uint32 noOfFragments, TabRecordPtr tabPtr)
- {
- FragmentstorePtr fragPtr;
- Uint32 noOfChunks = (noOfFragments + (NO_OF_FRAGS_PER_CHUNK - 1)) >> LOG_NO_OF_FRAGS_PER_CHUNK;
- ndbrequire(cremainingfrags >= noOfFragments);
- for (Uint32 i = 0; i < noOfChunks; i++) {
- jam();
- Uint32 baseFrag = cfirstfragstore;
- tabPtr.p->startFid[i] = baseFrag;
- fragPtr.i = baseFrag;
- ptrCheckGuard(fragPtr, cfragstoreFileSize, fragmentstore);
- cfirstfragstore = fragPtr.p->nextFragmentChunk;
- cremainingfrags -= NO_OF_FRAGS_PER_CHUNK;
- for (Uint32 j = 0; j < NO_OF_FRAGS_PER_CHUNK; j++) {
- jam();
- fragPtr.i = baseFrag + j;
- ptrCheckGuard(fragPtr, cfragstoreFileSize, fragmentstore);
- initFragstore(fragPtr);
- }//if
- }//for
- tabPtr.p->noOfFragChunks = noOfChunks;
- }//Dbdih::allocFragments()
- void Dbdih::releaseFragments(TabRecordPtr tabPtr)
- {
- FragmentstorePtr fragPtr;
- for (Uint32 i = 0; i < tabPtr.p->noOfFragChunks; i++) {
- jam();
- Uint32 baseFrag = tabPtr.p->startFid[i];
- fragPtr.i = baseFrag;
- ptrCheckGuard(fragPtr, cfragstoreFileSize, fragmentstore);
- fragPtr.p->nextFragmentChunk = cfirstfragstore;
- cfirstfragstore = baseFrag;
- tabPtr.p->startFid[i] = RNIL;
- cremainingfrags += NO_OF_FRAGS_PER_CHUNK;
- }//for
- tabPtr.p->noOfFragChunks = 0;
- }//Dbdih::releaseFragments()
- void Dbdih::initialiseFragstore()
- {
- Uint32 i;
- FragmentstorePtr fragPtr;
- for (i = 0; i < cfragstoreFileSize; i++) {
- fragPtr.i = i;
- ptrCheckGuard(fragPtr, cfragstoreFileSize, fragmentstore);
- initFragstore(fragPtr);
- }//for
- Uint32 noOfChunks = cfragstoreFileSize >> LOG_NO_OF_FRAGS_PER_CHUNK;
- fragPtr.i = 0;
- cfirstfragstore = RNIL;
- cremainingfrags = 0;
- for (i = 0; i < noOfChunks; i++) {
- refresh_watch_dog();
- ptrCheckGuard(fragPtr, cfragstoreFileSize, fragmentstore);
- fragPtr.p->nextFragmentChunk = cfirstfragstore;
- cfirstfragstore = fragPtr.i;
- fragPtr.i += NO_OF_FRAGS_PER_CHUNK;
- cremainingfrags += NO_OF_FRAGS_PER_CHUNK;
- }//for
- }//Dbdih::initialiseFragstore()
- /*
- 3.9 V E R I F I C A T I O N
- ****************************=
- */
- /****************************************************************************/
- /* ********** VERIFICATION SUB-MODULE *************/
- /****************************************************************************/
- /*
- 3.9.1 R E C E I V I N G O F V E R I F I C A T I O N R E Q U E S T
- *************************************************************************
- */
- void Dbdih::execDIVERIFYREQ(Signal* signal)
- {
- jamEntry();
- if ((getBlockCommit() == false) &&
- (cfirstVerifyQueue == RNIL)) {
- jam();
- /*-----------------------------------------------------------------------*/
- // We are not blocked and the verify queue was empty currently so we can
- // simply reply back to TC immediately. The method was called with
- // EXECUTE_DIRECT so we reply back by setting signal data and returning.
- // theData[0] already contains the correct information so
- // we need not touch it.
- /*-----------------------------------------------------------------------*/
- signal->theData[1] = currentgcp;
- signal->theData[2] = 0;
- return;
- }//if
- /*-------------------------------------------------------------------------*/
- // Since we are blocked we need to put this operation last in the verify
- // queue to ensure that operation starts up in the correct order.
- /*-------------------------------------------------------------------------*/
- ApiConnectRecordPtr tmpApiConnectptr;
- ApiConnectRecordPtr localApiConnectptr;
- cverifyQueueCounter++;
- localApiConnectptr.i = signal->theData[0];
- tmpApiConnectptr.i = clastVerifyQueue;
- ptrCheckGuard(localApiConnectptr, capiConnectFileSize, apiConnectRecord);
- localApiConnectptr.p->apiGci = cnewgcp;
- localApiConnectptr.p->nextApi = RNIL;
- clastVerifyQueue = localApiConnectptr.i;
- if (tmpApiConnectptr.i == RNIL) {
- jam();
- cfirstVerifyQueue = localApiConnectptr.i;
- } else {
- jam();
- ptrCheckGuard(tmpApiConnectptr, capiConnectFileSize, apiConnectRecord);
- tmpApiConnectptr.p->nextApi = localApiConnectptr.i;
- }//if
- emptyverificbuffer(signal, false);
- signal->theData[2] = 1; // Indicate no immediate return
- return;
- }//Dbdih::execDIVERIFYREQ()
- void Dbdih::execDI_FCOUNTREQ(Signal* signal)
- {
- ConnectRecordPtr connectPtr;
- TabRecordPtr tabPtr;
- jamEntry();
- connectPtr.i = signal->theData[0];
- tabPtr.i = signal->theData[1];
- ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
- ndbrequire(tabPtr.p->tabStatus == TabRecord::TS_ACTIVE);
- if(connectPtr.i != RNIL){
- ptrCheckGuard(connectPtr, cconnectFileSize, connectRecord);
- if (connectPtr.p->connectState == ConnectRecord::INUSE) {
- jam();
- signal->theData[0] = connectPtr.p->userpointer;
- signal->theData[1] = tabPtr.p->totalfragments;
- sendSignal(connectPtr.p->userblockref, GSN_DI_FCOUNTCONF, signal,2, JBB);
- return;
- }//if
- signal->theData[0] = connectPtr.p->userpointer;
- signal->theData[1] = ZERRONOUSSTATE;
- sendSignal(connectPtr.p->userblockref, GSN_DI_FCOUNTREF, signal, 2, JBB);
- return;
- }//if
- //connectPtr.i == RNIL -> question without connect record
- const Uint32 senderData = signal->theData[2];
- const BlockReference senderRef = signal->senderBlockRef();
- signal->theData[0] = RNIL;
- signal->theData[1] = tabPtr.p->totalfragments;
- signal->theData[2] = tabPtr.i;
- signal->theData[3] = senderData;
- signal->theData[4] = tabPtr.p->noOfBackups;
- sendSignal(senderRef, GSN_DI_FCOUNTCONF, signal, 5, JBB);
- }//Dbdih::execDI_FCOUNTREQ()
- void Dbdih::execDIGETPRIMREQ(Signal* signal)
- {
- FragmentstorePtr fragPtr;
- ConnectRecordPtr connectPtr;
- TabRecordPtr tabPtr;
- jamEntry();
- Uint32 passThrough = signal->theData[1];
- tabPtr.i = signal->theData[2];
- ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
- if (DictTabInfo::isOrderedIndex(tabPtr.p->tableType)) {
- jam();
- tabPtr.i = tabPtr.p->primaryTableId;
- ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
- }
- Uint32 fragId = signal->theData[3];
-
- ndbrequire(tabPtr.p->tabStatus == TabRecord::TS_ACTIVE);
- connectPtr.i = signal->theData[0];
- if(connectPtr.i != RNIL)
- {
- jam();
- ptrCheckGuard(connectPtr, cconnectFileSize, connectRecord);
- signal->theData[0] = connectPtr.p->userpointer;
- }
- else
- {
- jam();
- signal->theData[0] = RNIL;
- }
-
- Uint32 nodes[MAX_REPLICAS];
- getFragstore(tabPtr.p, fragId, fragPtr);
- Uint32 count = extractNodeInfo(fragPtr.p, nodes);
-
- signal->theData[1] = passThrough;
- signal->theData[2] = nodes[0];
- signal->theData[3] = nodes[1];
- signal->theData[4] = nodes[2];
- signal->theData[5] = nodes[3];
- signal->theData[6] = count;
- signal->theData[7] = tabPtr.i;
- signal->theData[8] = fragId;
- const BlockReference senderRef = signal->senderBlockRef();
- sendSignal(senderRef, GSN_DIGETPRIMCONF, signal, 9, JBB);
- }//Dbdih::execDIGETPRIMREQ()
- /****************************************************************************/
- /* ********** GLOBAL-CHECK-POINT HANDLING MODULE *************/
- /****************************************************************************/
- /*
- 3.10 G L O B A L C H E C K P O I N T ( IN M A S T E R R O L E)
- *******************************************************************
- */
- void Dbdih::checkGcpStopLab(Signal* signal)
- {
- Uint32 tgcpStatus;
- tgcpStatus = cgcpStatus;
- if (tgcpStatus == coldGcpStatus) {
- jam();
- if (coldGcpId == cnewgcp) {
- jam();
- if (cgcpStatus != GCP_READY) {
- jam();
- cgcpSameCounter++;
- if (cgcpSameCounter == 1200) {
- jam();