DbdihMain.cpp
上传用户:romrleung
上传日期:2022-05-23
资源大小:18897k
文件大小:494k
- /* Copyright (C) 2003 MySQL AB
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
- #define DBDIH_C
- #include <ndb_limits.h>
- #include <ndb_version.h>
- #include <NdbOut.hpp>
- #include "Dbdih.hpp"
- #include "Configuration.hpp"
- #include <signaldata/BlockCommitOrd.hpp>
- #include <signaldata/CheckNodeGroups.hpp>
- #include <signaldata/CreateFrag.hpp>
- #include <signaldata/CopyActive.hpp>
- #include <signaldata/CopyFrag.hpp>
- #include <signaldata/CopyGCIReq.hpp>
- #include <signaldata/DiAddTab.hpp>
- #include <signaldata/DictStart.hpp>
- #include <signaldata/DiGetNodes.hpp>
- #include <signaldata/DihContinueB.hpp>
- #include <signaldata/DihSwitchReplica.hpp>
- #include <signaldata/DumpStateOrd.hpp>
- #include <signaldata/EmptyLcp.hpp>
- #include <signaldata/EndTo.hpp>
- #include <signaldata/EventReport.hpp>
- #include <signaldata/GCPSave.hpp>
- #include <signaldata/HotSpareRep.hpp>
- #include <signaldata/MasterGCP.hpp>
- #include <signaldata/MasterLCP.hpp>
- #include <signaldata/NFCompleteRep.hpp>
- #include <signaldata/NodeFailRep.hpp>
- #include <signaldata/ReadNodesConf.hpp>
- #include <signaldata/StartFragReq.hpp>
- #include <signaldata/StartInfo.hpp>
- #include <signaldata/StartMe.hpp>
- #include <signaldata/StartPerm.hpp>
- #include <signaldata/StartRec.hpp>
- #include <signaldata/StartTo.hpp>
- #include <signaldata/StopPerm.hpp>
- #include <signaldata/StopMe.hpp>
- #include <signaldata/TestOrd.hpp>
- #include <signaldata/UpdateTo.hpp>
- #include <signaldata/WaitGCP.hpp>
- #include <signaldata/DihStartTab.hpp>
- #include <signaldata/LCP.hpp>
- #include <signaldata/SystemError.hpp>
- #include <signaldata/DropTab.hpp>
- #include <signaldata/AlterTab.hpp>
- #include <signaldata/PrepDropTab.hpp>
- #include <signaldata/SumaImpl.hpp>
- #include <signaldata/DictTabInfo.hpp>
- #include <signaldata/CreateFragmentation.hpp>
- #include <signaldata/LqhFrag.hpp>
- #include <signaldata/FsOpenReq.hpp>
- #include <DebuggerNames.hpp>
- #include <EventLogger.hpp>
- extern EventLogger g_eventLogger;
- #define SYSFILE ((Sysfile *)&sysfileData[0])
- #define RETURN_IF_NODE_NOT_ALIVE(node)
- if (!checkNodeAlive((node))) {
- jam();
- return;
- }
- #define RETURN_IF_TAKE_OVER_INTERRUPTED(takeOverIndex, regTOPtr)
- regTOPtr.i = takeOverIndex;
- ptrCheckGuard(regTOPtr, MAX_NDB_NODES, takeOverRecord);
- if (checkToInterrupted(regTOPtr)) {
- jam();
- return;
- }
- #define receiveLoopMacro(sigName, receiveNodeId)
- {
- c_##sigName##_Counter.clearWaitingFor(receiveNodeId);
- if(c_##sigName##_Counter.done() == false){
- jam();
- return;
- }
- }
- #define sendLoopMacro(sigName, signalRoutine)
- {
- c_##sigName##_Counter.clearWaitingFor();
- NodeRecordPtr specNodePtr;
- specNodePtr.i = cfirstAliveNode;
- do {
- jam();
- ptrCheckGuard(specNodePtr, MAX_NDB_NODES, nodeRecord);
- c_##sigName##_Counter.setWaitingFor(specNodePtr.i);
- signalRoutine(signal, specNodePtr.i);
- specNodePtr.i = specNodePtr.p->nextNode;
- } while (specNodePtr.i != RNIL);
- }
- static
- Uint32
- prevLcpNo(Uint32 lcpNo){
- if(lcpNo == 0)
- return MAX_LCP_STORED - 1;
- return lcpNo - 1;
- }
- static
- Uint32
- nextLcpNo(Uint32 lcpNo){
- lcpNo++;
- if(lcpNo == MAX_LCP_STORED)
- return 0;
- return lcpNo;
- }
- #define gth(x, y) ndbrequire(((int)x)>((int)y))
- void Dbdih::nullRoutine(Signal* signal, Uint32 nodeId)
- {
- }//Dbdih::nullRoutine()
- void Dbdih::sendCOPY_GCIREQ(Signal* signal, Uint32 nodeId)
- {
- ndbrequire(c_copyGCIMaster.m_copyReason != CopyGCIReq::IDLE);
-
- const BlockReference ref = calcDihBlockRef(nodeId);
- const Uint32 wordPerSignal = CopyGCIReq::DATA_SIZE;
- const Uint32 noOfSignals = ((Sysfile::SYSFILE_SIZE32 + (wordPerSignal - 1)) /
- wordPerSignal);
-
- CopyGCIReq * const copyGCI = (CopyGCIReq *)&signal->theData[0];
- copyGCI->anyData = nodeId;
- copyGCI->copyReason = c_copyGCIMaster.m_copyReason;
- copyGCI->startWord = 0;
-
- for(Uint32 i = 0; i < noOfSignals; i++) {
- jam();
- { // Do copy
- const int startWord = copyGCI->startWord;
- for(Uint32 j = 0; j < wordPerSignal; j++) {
- copyGCI->data[j] = sysfileData[j+startWord];
- }//for
- }
- sendSignal(ref, GSN_COPY_GCIREQ, signal, 25, JBB);
- copyGCI->startWord += wordPerSignal;
- }//for
- }//Dbdih::sendCOPY_GCIREQ()
- void Dbdih::sendDIH_SWITCH_REPLICA_REQ(Signal* signal, Uint32 nodeId)
- {
- const BlockReference ref = calcDihBlockRef(nodeId);
- sendSignal(ref, GSN_DIH_SWITCH_REPLICA_REQ, signal,
- DihSwitchReplicaReq::SignalLength, JBB);
- }//Dbdih::sendDIH_SWITCH_REPLICA_REQ()
- void Dbdih::sendEMPTY_LCP_REQ(Signal* signal, Uint32 nodeId)
- {
- BlockReference ref = calcLqhBlockRef(nodeId);
- sendSignal(ref, GSN_EMPTY_LCP_REQ, signal, EmptyLcpReq::SignalLength, JBB);
- }//Dbdih::sendEMPTY_LCPREQ()
- void Dbdih::sendEND_TOREQ(Signal* signal, Uint32 nodeId)
- {
- BlockReference ref = calcDihBlockRef(nodeId);
- sendSignal(ref, GSN_END_TOREQ, signal, EndToReq::SignalLength, JBB);
- }//Dbdih::sendEND_TOREQ()
- void Dbdih::sendGCP_COMMIT(Signal* signal, Uint32 nodeId)
- {
- BlockReference ref = calcDihBlockRef(nodeId);
- signal->theData[0] = cownNodeId;
- signal->theData[1] = cnewgcp;
- sendSignal(ref, GSN_GCP_COMMIT, signal, 2, JBA);
- }//Dbdih::sendGCP_COMMIT()
- void Dbdih::sendGCP_PREPARE(Signal* signal, Uint32 nodeId)
- {
- BlockReference ref = calcDihBlockRef(nodeId);
- signal->theData[0] = cownNodeId;
- signal->theData[1] = cnewgcp;
- sendSignal(ref, GSN_GCP_PREPARE, signal, 2, JBA);
- }//Dbdih::sendGCP_PREPARE()
- void Dbdih::sendGCP_SAVEREQ(Signal* signal, Uint32 nodeId)
- {
- GCPSaveReq * const saveReq = (GCPSaveReq*)&signal->theData[0];
- BlockReference ref = calcLqhBlockRef(nodeId);
- saveReq->dihBlockRef = reference();
- saveReq->dihPtr = nodeId;
- saveReq->gci = coldgcp;
- sendSignal(ref, GSN_GCP_SAVEREQ, signal, GCPSaveReq::SignalLength, JBB);
- }//Dbdih::sendGCP_SAVEREQ()
- void Dbdih::sendINCL_NODEREQ(Signal* signal, Uint32 nodeId)
- {
- BlockReference nodeDihRef = calcDihBlockRef(nodeId);
- signal->theData[0] = reference();
- signal->theData[1] = c_nodeStartMaster.startNode;
- signal->theData[2] = c_nodeStartMaster.failNr;
- signal->theData[3] = 0;
- signal->theData[4] = currentgcp;
- sendSignal(nodeDihRef, GSN_INCL_NODEREQ, signal, 5, JBB);
- }//Dbdih::sendINCL_NODEREQ()
- void Dbdih::sendMASTER_GCPREQ(Signal* signal, Uint32 nodeId)
- {
- BlockReference ref = calcDihBlockRef(nodeId);
- sendSignal(ref, GSN_MASTER_GCPREQ, signal, MasterGCPReq::SignalLength, JBB);
- }//Dbdih::sendMASTER_GCPREQ()
- void Dbdih::sendMASTER_LCPREQ(Signal* signal, Uint32 nodeId)
- {
- BlockReference ref = calcDihBlockRef(nodeId);
- sendSignal(ref, GSN_MASTER_LCPREQ, signal, MasterLCPReq::SignalLength, JBB);
- }//Dbdih::sendMASTER_LCPREQ()
- void Dbdih::sendSTART_INFOREQ(Signal* signal, Uint32 nodeId)
- {
- const BlockReference ref = calcDihBlockRef(nodeId);
- sendSignal(ref, GSN_START_INFOREQ, signal, StartInfoReq::SignalLength, JBB);
- }//sendSTART_INFOREQ()
- void Dbdih::sendSTART_RECREQ(Signal* signal, Uint32 nodeId)
- {
- StartRecReq * const req = (StartRecReq*)&signal->theData[0];
- BlockReference ref = calcLqhBlockRef(nodeId);
- req->receivingNodeId = nodeId;
- req->senderRef = reference();
- req->keepGci = SYSFILE->keepGCI;
- req->lastCompletedGci = SYSFILE->lastCompletedGCI[nodeId];
- req->newestGci = SYSFILE->newestRestorableGCI;
- sendSignal(ref, GSN_START_RECREQ, signal, StartRecReq::SignalLength, JBB);
- signal->theData[0] = EventReport::StartREDOLog;
- signal->theData[1] = nodeId;
- signal->theData[2] = SYSFILE->keepGCI;
- signal->theData[3] = SYSFILE->lastCompletedGCI[nodeId];
- signal->theData[4] = SYSFILE->newestRestorableGCI;
- sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 5, JBB);
- }//Dbdih::sendSTART_RECREQ()
- void Dbdih::sendSTART_TOREQ(Signal* signal, Uint32 nodeId)
- {
- BlockReference ref = calcDihBlockRef(nodeId);
- sendSignal(ref, GSN_START_TOREQ, signal, StartToReq::SignalLength, JBB);
- }//Dbdih::sendSTART_TOREQ()
- void Dbdih::sendSTOP_ME_REQ(Signal* signal, Uint32 nodeId)
- {
- if (nodeId != getOwnNodeId()) {
- jam();
- const BlockReference ref = calcDihBlockRef(nodeId);
- sendSignal(ref, GSN_STOP_ME_REQ, signal, StopMeReq::SignalLength, JBB);
- }//if
- }//Dbdih::sendSTOP_ME_REQ()
- void Dbdih::sendTC_CLOPSIZEREQ(Signal* signal, Uint32 nodeId)
- {
- BlockReference ref = calcTcBlockRef(nodeId);
- signal->theData[0] = nodeId;
- signal->theData[1] = reference();
- sendSignal(ref, GSN_TC_CLOPSIZEREQ, signal, 2, JBB);
- }//Dbdih::sendTC_CLOPSIZEREQ()
- void Dbdih::sendTCGETOPSIZEREQ(Signal* signal, Uint32 nodeId)
- {
- BlockReference ref = calcTcBlockRef(nodeId);
- signal->theData[0] = nodeId;
- signal->theData[1] = reference();
- sendSignal(ref, GSN_TCGETOPSIZEREQ, signal, 2, JBB);
- }//Dbdih::sendTCGETOPSIZEREQ()
- void Dbdih::sendUPDATE_TOREQ(Signal* signal, Uint32 nodeId)
- {
- const BlockReference ref = calcDihBlockRef(nodeId);
- sendSignal(ref, GSN_UPDATE_TOREQ, signal, UpdateToReq::SignalLength, JBB);
- }//sendUPDATE_TOREQ()
- void Dbdih::execCONTINUEB(Signal* signal)
- {
- jamEntry();
- switch ((DihContinueB::Type)signal->theData[0]) {
- case DihContinueB::ZPACK_TABLE_INTO_PAGES:
- {
- jam();
- Uint32 tableId = signal->theData[1];
- packTableIntoPagesLab(signal, tableId);
- return;
- break;
- }
- case DihContinueB::ZPACK_FRAG_INTO_PAGES:
- {
- RWFragment wf;
- jam();
- wf.rwfTabPtr.i = signal->theData[1];
- ptrCheckGuard(wf.rwfTabPtr, ctabFileSize, tabRecord);
- wf.fragId = signal->theData[2];
- wf.pageIndex = signal->theData[3];
- wf.wordIndex = signal->theData[4];
- packFragIntoPagesLab(signal, &wf);
- return;
- break;
- }
- case DihContinueB::ZREAD_PAGES_INTO_TABLE:
- {
- jam();
- Uint32 tableId = signal->theData[1];
- readPagesIntoTableLab(signal, tableId);
- return;
- break;
- }
- case DihContinueB::ZREAD_PAGES_INTO_FRAG:
- {
- RWFragment rf;
- jam();
- rf.rwfTabPtr.i = signal->theData[1];
- ptrCheckGuard(rf.rwfTabPtr, ctabFileSize, tabRecord);
- rf.fragId = signal->theData[2];
- rf.pageIndex = signal->theData[3];
- rf.wordIndex = signal->theData[4];
- readPagesIntoFragLab(signal, &rf);
- return;
- break;
- }
- case DihContinueB::ZCOPY_TABLE:
- {
- jam();
- Uint32 tableId = signal->theData[1];
- copyTableLab(signal, tableId);
- return;
- }
- case DihContinueB::ZCOPY_TABLE_NODE:
- {
- NodeRecordPtr nodePtr;
- CopyTableNode ctn;
- jam();
- ctn.ctnTabPtr.i = signal->theData[1];
- ptrCheckGuard(ctn.ctnTabPtr, ctabFileSize, tabRecord);
- nodePtr.i = signal->theData[2];
- ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
- ctn.pageIndex = signal->theData[3];
- ctn.wordIndex = signal->theData[4];
- ctn.noOfWords = signal->theData[5];
- copyTableNode(signal, &ctn, nodePtr);
- return;
- }
- case DihContinueB::ZSTART_FRAGMENT:
- {
- jam();
- Uint32 tableId = signal->theData[1];
- Uint32 fragId = signal->theData[2];
- startFragment(signal, tableId, fragId);
- return;
- }
- case DihContinueB::ZCOMPLETE_RESTART:
- jam();
- completeRestartLab(signal);
- return;
- case DihContinueB::ZREAD_TABLE_FROM_PAGES:
- {
- TabRecordPtr tabPtr;
- jam();
- tabPtr.i = signal->theData[1];
- ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
- readTableFromPagesLab(signal, tabPtr);
- return;
- }
- case DihContinueB::ZSR_PHASE2_READ_TABLE:
- {
- TabRecordPtr tabPtr;
- jam();
- tabPtr.i = signal->theData[1];
- ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
- srPhase2ReadTableLab(signal, tabPtr);
- return;
- }
- case DihContinueB::ZCHECK_TC_COUNTER:
- jam();
- #ifndef NO_LCP
- checkTcCounterLab(signal);
- #endif
- return;
- case DihContinueB::ZCALCULATE_KEEP_GCI:
- {
- jam();
- Uint32 tableId = signal->theData[1];
- Uint32 fragId = signal->theData[2];
- calculateKeepGciLab(signal, tableId, fragId);
- return;
- }
- case DihContinueB::ZSTORE_NEW_LCP_ID:
- jam();
- storeNewLcpIdLab(signal);
- return;
- case DihContinueB::ZTABLE_UPDATE:
- {
- TabRecordPtr tabPtr;
- jam();
- tabPtr.i = signal->theData[1];
- ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
- tableUpdateLab(signal, tabPtr);
- return;
- }
- case DihContinueB::ZCHECK_LCP_COMPLETED:
- {
- jam();
- checkLcpCompletedLab(signal);
- return;
- }
- case DihContinueB::ZINIT_LCP:
- {
- jam();
- Uint32 senderRef = signal->theData[1];
- Uint32 tableId = signal->theData[2];
- initLcpLab(signal, senderRef, tableId);
- return;
- }
- case DihContinueB::ZADD_TABLE_MASTER_PAGES:
- {
- TabRecordPtr tabPtr;
- jam();
- tabPtr.i = signal->theData[1];
- ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
- tabPtr.p->tabUpdateState = TabRecord::US_ADD_TABLE_MASTER;
- tableUpdateLab(signal, tabPtr);
- return;
- break;
- }
- case DihContinueB::ZDIH_ADD_TABLE_MASTER:
- {
- jam();
- addTable_closeConf(signal, signal->theData[1]);
- return;
- }
- case DihContinueB::ZADD_TABLE_SLAVE_PAGES:
- {
- TabRecordPtr tabPtr;
- jam();
- tabPtr.i = signal->theData[1];
- ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
- tabPtr.p->tabUpdateState = TabRecord::US_ADD_TABLE_SLAVE;
- tableUpdateLab(signal, tabPtr);
- return;
- }
- case DihContinueB::ZDIH_ADD_TABLE_SLAVE:
- {
- ndbrequire(false);
- return;
- }
- case DihContinueB::ZSTART_GCP:
- jam();
- #ifndef NO_GCP
- startGcpLab(signal, signal->theData[1]);
- #endif
- return;
- break;
- case DihContinueB::ZCOPY_GCI:{
- jam();
- CopyGCIReq::CopyReason reason = (CopyGCIReq::CopyReason)signal->theData[1];
- ndbrequire(c_copyGCIMaster.m_copyReason == reason);
- sendLoopMacro(COPY_GCIREQ, sendCOPY_GCIREQ);
- return;
- }
- break;
- case DihContinueB::ZEMPTY_VERIFY_QUEUE:
- jam();
- emptyverificbuffer(signal, true);
- return;
- break;
- case DihContinueB::ZCHECK_GCP_STOP:
- jam();
- #ifndef NO_GCP
- checkGcpStopLab(signal);
- #endif
- return;
- break;
- case DihContinueB::ZREMOVE_NODE_FROM_TABLE:
- {
- jam();
- Uint32 nodeId = signal->theData[1];
- Uint32 tableId = signal->theData[2];
- removeNodeFromTables(signal, nodeId, tableId);
- return;
- }
- case DihContinueB::ZCOPY_NODE:
- {
- jam();
- Uint32 tableId = signal->theData[1];
- copyNodeLab(signal, tableId);
- return;
- }
- case DihContinueB::ZSTART_TAKE_OVER:
- {
- jam();
- Uint32 takeOverPtrI = signal->theData[1];
- Uint32 startNode = signal->theData[2];
- Uint32 toNode = signal->theData[3];
- startTakeOver(signal, takeOverPtrI, startNode, toNode);
- return;
- break;
- }
- case DihContinueB::ZCHECK_START_TAKE_OVER:
- jam();
- checkStartTakeOver(signal);
- break;
- case DihContinueB::ZTO_START_COPY_FRAG:
- {
- jam();
- Uint32 takeOverPtrI = signal->theData[1];
- startNextCopyFragment(signal, takeOverPtrI);
- return;
- }
- case DihContinueB::ZINVALIDATE_NODE_LCP:
- {
- jam();
- const Uint32 nodeId = signal->theData[1];
- const Uint32 tableId = signal->theData[2];
- invalidateNodeLCP(signal, nodeId, tableId);
- return;
- }
- case DihContinueB::ZINITIALISE_RECORDS:
- jam();
- initialiseRecordsLab(signal,
- signal->theData[1],
- signal->theData[2],
- signal->theData[3]);
- return;
- break;
- case DihContinueB::ZSTART_PERMREQ_AGAIN:
- jam();
- nodeRestartPh2Lab(signal);
- return;
- break;
- case DihContinueB::SwitchReplica:
- {
- jam();
- const Uint32 nodeId = signal->theData[1];
- const Uint32 tableId = signal->theData[2];
- const Uint32 fragNo = signal->theData[3];
- switchReplica(signal, nodeId, tableId, fragNo);
- return;
- }
- case DihContinueB::ZSEND_START_TO:
- {
- jam();
- Uint32 takeOverPtrI = signal->theData[1];
- sendStartTo(signal, takeOverPtrI);
- return;
- }
- case DihContinueB::ZSEND_ADD_FRAG:
- {
- jam();
- Uint32 takeOverPtrI = signal->theData[1];
- toCopyFragLab(signal, takeOverPtrI);
- return;
- }
- case DihContinueB::ZSEND_UPDATE_TO:
- {
- jam();
- Uint32 takeOverPtrI = signal->theData[1];
- Uint32 updateState = signal->theData[4];
- sendUpdateTo(signal, takeOverPtrI, updateState);
- return;
- }
- case DihContinueB::ZSEND_END_TO:
- {
- jam();
- Uint32 takeOverPtrI = signal->theData[1];
- sendEndTo(signal, takeOverPtrI);
- return;
- }
- case DihContinueB::ZSEND_CREATE_FRAG:
- {
- jam();
- Uint32 takeOverPtrI = signal->theData[1];
- Uint32 storedType = signal->theData[2];
- Uint32 startGci = signal->theData[3];
- sendCreateFragReq(signal, startGci, storedType, takeOverPtrI);
- return;
- }
- case DihContinueB::WAIT_DROP_TAB_WRITING_TO_FILE:{
- jam();
- TabRecordPtr tabPtr;
- tabPtr.i = signal->theData[1];
- ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
- waitDropTabWritingToFile(signal, tabPtr);
- return;
- }
- case DihContinueB::CHECK_WAIT_DROP_TAB_FAILED_LQH:{
- jam();
- Uint32 nodeId = signal->theData[1];
- Uint32 tableId = signal->theData[2];
- checkWaitDropTabFailedLqh(signal, nodeId, tableId);
- return;
- }
- }//switch
-
- ndbrequire(false);
- return;
- }//Dbdih::execCONTINUEB()
- void Dbdih::execCOPY_GCIREQ(Signal* signal)
- {
- CopyGCIReq * const copyGCI = (CopyGCIReq *)&signal->theData[0];
- jamEntry();
- CopyGCIReq::CopyReason reason = (CopyGCIReq::CopyReason)copyGCI->copyReason;
- const Uint32 tstart = copyGCI->startWord;
-
- ndbrequire(cmasterdihref == signal->senderBlockRef()) ;
- ndbrequire(c_copyGCISlave.m_copyReason == CopyGCIReq::IDLE);
- ndbrequire(c_copyGCISlave.m_expectedNextWord == tstart);
- ndbrequire(reason != CopyGCIReq::IDLE);
-
- arrGuard(tstart + CopyGCIReq::DATA_SIZE, sizeof(sysfileData)/4);
- for(Uint32 i = 0; i<CopyGCIReq::DATA_SIZE; i++)
- cdata[tstart+i] = copyGCI->data[i];
-
- if ((tstart + CopyGCIReq::DATA_SIZE) >= Sysfile::SYSFILE_SIZE32) {
- jam();
- c_copyGCISlave.m_expectedNextWord = 0;
- } else {
- jam();
- c_copyGCISlave.m_expectedNextWord += CopyGCIReq::DATA_SIZE;
- return;
- }//if
-
- memcpy(sysfileData, cdata, sizeof(sysfileData));
-
- c_copyGCISlave.m_copyReason = reason;
- c_copyGCISlave.m_senderRef = signal->senderBlockRef();
- c_copyGCISlave.m_senderData = copyGCI->anyData;
- CRASH_INSERTION2(7020, reason==CopyGCIReq::LOCAL_CHECKPOINT);
- CRASH_INSERTION2(7008, reason==CopyGCIReq::GLOBAL_CHECKPOINT);
- /* -------------------------------------------------------------------------*/
- /* WE SET THE REQUESTER OF THE COPY GCI TO THE CURRENT MASTER. IF THE */
- /* CURRENT MASTER WE DO NOT WANT THE NEW MASTER TO RECEIVE CONFIRM OF */
- /* SOMETHING HE HAS NOT SENT. THE TAKE OVER MUST BE CAREFUL. */
- /* -------------------------------------------------------------------------*/
- bool ok = false;
- switch(reason){
- case CopyGCIReq::IDLE:
- ok = true;
- jam();
- ndbrequire(false);
- break;
- case CopyGCIReq::LOCAL_CHECKPOINT: {
- ok = true;
- jam();
- c_lcpState.setLcpStatus(LCP_COPY_GCI, __LINE__);
- c_lcpState.m_masterLcpDihRef = cmasterdihref;
- setNodeInfo(signal);
- break;
- }
- case CopyGCIReq::RESTART: {
- ok = true;
- jam();
- coldgcp = SYSFILE->newestRestorableGCI;
- crestartGci = SYSFILE->newestRestorableGCI;
- Sysfile::setRestartOngoing(SYSFILE->systemRestartBits);
- currentgcp = coldgcp + 1;
- cnewgcp = coldgcp + 1;
- setNodeInfo(signal);
- if ((Sysfile::getLCPOngoing(SYSFILE->systemRestartBits))) {
- jam();
- /* -------------------------------------------------------------------- */
- // IF THERE WAS A LOCAL CHECKPOINT ONGOING AT THE CRASH MOMENT WE WILL
- // INVALIDATE THAT LOCAL CHECKPOINT.
- /* -------------------------------------------------------------------- */
- invalidateLcpInfoAfterSr();
- }//if
- break;
- }
- case CopyGCIReq::GLOBAL_CHECKPOINT: {
- ok = true;
- jam();
- cgcpParticipantState = GCP_PARTICIPANT_COPY_GCI_RECEIVED;
- setNodeInfo(signal);
- break;
- }//if
- case CopyGCIReq::INITIAL_START_COMPLETED:
- ok = true;
- jam();
- break;
- }
- ndbrequire(ok);
-
- /* ----------------------------------------------------------------------- */
- /* WE START BY TRYING TO OPEN THE FIRST RESTORABLE GCI FILE. */
- /* ----------------------------------------------------------------------- */
- FileRecordPtr filePtr;
- filePtr.i = crestartInfoFile[0];
- ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
- if (filePtr.p->fileStatus == FileRecord::OPEN) {
- jam();
- openingCopyGciSkipInitLab(signal, filePtr);
- return;
- }//if
- openFileRw(signal, filePtr);
- filePtr.p->reqStatus = FileRecord::OPENING_COPY_GCI;
- return;
- }//Dbdih::execCOPY_GCIREQ()
- void Dbdih::execDICTSTARTCONF(Signal* signal)
- {
- jamEntry();
- Uint32 nodeId = refToNode(signal->getSendersBlockRef());
- if (nodeId != getOwnNodeId()) {
- jam();
- nodeDictStartConfLab(signal);
- } else {
- jam();
- dictStartConfLab(signal);
- }//if
- }//Dbdih::execDICTSTARTCONF()
- void Dbdih::execFSCLOSECONF(Signal* signal)
- {
- FileRecordPtr filePtr;
- jamEntry();
- filePtr.i = signal->theData[0];
- ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
- filePtr.p->fileStatus = FileRecord::CLOSED;
- FileRecord::ReqStatus status = filePtr.p->reqStatus;
- filePtr.p->reqStatus = FileRecord::IDLE;
- switch (status) {
- case FileRecord::CLOSING_GCP:
- jam();
- closingGcpLab(signal, filePtr);
- break;
- case FileRecord::CLOSING_GCP_CRASH:
- jam();
- closingGcpCrashLab(signal, filePtr);
- break;
- case FileRecord::CLOSING_TABLE_CRASH:
- jam();
- closingTableCrashLab(signal, filePtr);
- break;
- case FileRecord::CLOSING_TABLE_SR:
- jam();
- closingTableSrLab(signal, filePtr);
- break;
- case FileRecord::TABLE_CLOSE:
- jam();
- tableCloseLab(signal, filePtr);
- break;
- case FileRecord::TABLE_CLOSE_DELETE:
- jam();
- tableDeleteLab(signal, filePtr);
- break;
- default:
- ndbrequire(false);
- break;
- }//switch
- return;
- }//Dbdih::execFSCLOSECONF()
- void Dbdih::execFSCLOSEREF(Signal* signal)
- {
- FileRecordPtr filePtr;
- jamEntry();
- filePtr.i = signal->theData[0];
- ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
- FileRecord::ReqStatus status = filePtr.p->reqStatus;
- filePtr.p->reqStatus = FileRecord::IDLE;
- switch (status) {
- case FileRecord::CLOSING_GCP:
- jam();
- break;
- case FileRecord::CLOSING_GCP_CRASH:
- jam();
- closingGcpCrashLab(signal, filePtr);
- return;
- case FileRecord::CLOSING_TABLE_CRASH:
- jam();
- closingTableCrashLab(signal, filePtr);
- return;
- case FileRecord::CLOSING_TABLE_SR:
- jam();
- break;
- case FileRecord::TABLE_CLOSE:
- jam();
- break;
- case FileRecord::TABLE_CLOSE_DELETE:
- jam();
- break;
- default:
- jam();
- break;
- }//switch
- {
- char msg[100];
- sprintf(msg, "File system close failed during FileRecord status %d", (Uint32)status);
- fsRefError(signal,__LINE__,msg);
- }
- return;
- }//Dbdih::execFSCLOSEREF()
- void Dbdih::execFSOPENCONF(Signal* signal)
- {
- FileRecordPtr filePtr;
- jamEntry();
- filePtr.i = signal->theData[0];
- ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
- filePtr.p->fileRef = signal->theData[1];
- filePtr.p->fileStatus = FileRecord::OPEN;
- FileRecord::ReqStatus status = filePtr.p->reqStatus;
- filePtr.p->reqStatus = FileRecord::IDLE;
- switch (status) {
- case FileRecord::CREATING_GCP:
- jam();
- creatingGcpLab(signal, filePtr);
- break;
- case FileRecord::OPENING_COPY_GCI:
- jam();
- openingCopyGciSkipInitLab(signal, filePtr);
- break;
- case FileRecord::CREATING_COPY_GCI:
- jam();
- openingCopyGciSkipInitLab(signal, filePtr);
- break;
- case FileRecord::OPENING_GCP:
- jam();
- openingGcpLab(signal, filePtr);
- break;
- case FileRecord::OPENING_TABLE:
- jam();
- openingTableLab(signal, filePtr);
- break;
- case FileRecord::TABLE_CREATE:
- jam();
- tableCreateLab(signal, filePtr);
- break;
- case FileRecord::TABLE_OPEN_FOR_DELETE:
- jam();
- tableOpenLab(signal, filePtr);
- break;
- default:
- ndbrequire(false);
- break;
- }//switch
- return;
- }//Dbdih::execFSOPENCONF()
- void Dbdih::execFSOPENREF(Signal* signal)
- {
- FileRecordPtr filePtr;
- jamEntry();
- filePtr.i = signal->theData[0];
- ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
- FileRecord::ReqStatus status = filePtr.p->reqStatus;
- filePtr.p->reqStatus = FileRecord::IDLE;
- switch (status) {
- case FileRecord::CREATING_GCP:
- /* --------------------------------------------------------------------- */
- /* WE DID NOT MANAGE TO CREATE A GLOBAL CHECKPOINT FILE. SERIOUS ERROR */
- /* WHICH CAUSES A SYSTEM RESTART. */
- /* --------------------------------------------------------------------- */
- jam();
- break;
- case FileRecord::OPENING_COPY_GCI:
- jam();
- openingCopyGciErrorLab(signal, filePtr);
- return;
- case FileRecord::CREATING_COPY_GCI:
- jam();
- break;
- case FileRecord::OPENING_GCP:
- jam();
- openingGcpErrorLab(signal, filePtr);
- return;
- case FileRecord::OPENING_TABLE:
- jam();
- openingTableErrorLab(signal, filePtr);
- return;
- case FileRecord::TABLE_CREATE:
- jam();
- break;
- case FileRecord::TABLE_OPEN_FOR_DELETE:
- jam();
- tableDeleteLab(signal, filePtr);
- return;
- default:
- jam();
- break;
- }//switch
- {
- char msg[100];
- sprintf(msg, "File system open failed during FileRecord status %d", (Uint32)status);
- fsRefError(signal,__LINE__,msg);
- }
- return;
- }//Dbdih::execFSOPENREF()
- void Dbdih::execFSREADCONF(Signal* signal)
- {
- FileRecordPtr filePtr;
- jamEntry();
- filePtr.i = signal->theData[0];
- ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
- FileRecord::ReqStatus status = filePtr.p->reqStatus;
- filePtr.p->reqStatus = FileRecord::IDLE;
- switch (status) {
- case FileRecord::READING_GCP:
- jam();
- readingGcpLab(signal, filePtr);
- break;
- case FileRecord::READING_TABLE:
- jam();
- readingTableLab(signal, filePtr);
- break;
- default:
- ndbrequire(false);
- break;
- }//switch
- return;
- }//Dbdih::execFSREADCONF()
- void Dbdih::execFSREADREF(Signal* signal)
- {
- FileRecordPtr filePtr;
- jamEntry();
- filePtr.i = signal->theData[0];
- ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
- FileRecord::ReqStatus status = filePtr.p->reqStatus;
- filePtr.p->reqStatus = FileRecord::IDLE;
- switch (status) {
- case FileRecord::READING_GCP:
- jam();
- readingGcpErrorLab(signal, filePtr);
- return;
- case FileRecord::READING_TABLE:
- jam();
- readingTableErrorLab(signal, filePtr);
- return;
- default:
- break;
- }//switch
- {
- char msg[100];
- sprintf(msg, "File system read failed during FileRecord status %d", (Uint32)status);
- fsRefError(signal,__LINE__,msg);
- }
- }//Dbdih::execFSREADREF()
- void Dbdih::execFSWRITECONF(Signal* signal)
- {
- FileRecordPtr filePtr;
- jamEntry();
- filePtr.i = signal->theData[0];
- ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
- FileRecord::ReqStatus status = filePtr.p->reqStatus;
- filePtr.p->reqStatus = FileRecord::IDLE;
- switch (status) {
- case FileRecord::WRITING_COPY_GCI:
- jam();
- writingCopyGciLab(signal, filePtr);
- break;
- case FileRecord::WRITE_INIT_GCP:
- jam();
- writeInitGcpLab(signal, filePtr);
- break;
- case FileRecord::TABLE_WRITE:
- jam();
- tableWriteLab(signal, filePtr);
- break;
- default:
- ndbrequire(false);
- break;
- }//switch
- return;
- }//Dbdih::execFSWRITECONF()
- void Dbdih::execFSWRITEREF(Signal* signal)
- {
- FileRecordPtr filePtr;
- jamEntry();
- filePtr.i = signal->theData[0];
- ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
- FileRecord::ReqStatus status = filePtr.p->reqStatus;
- filePtr.p->reqStatus = FileRecord::IDLE;
- switch (status) {
- case FileRecord::WRITING_COPY_GCI:
- /* --------------------------------------------------------------------- */
- /* EVEN CREATING THE FILE DID NOT WORK. WE WILL THEN CRASH. */
- /* ERROR IN WRITING FILE. WE WILL NOT CONTINUE FROM HERE. */
- /* --------------------------------------------------------------------- */
- jam();
- break;
- case FileRecord::WRITE_INIT_GCP:
- /* --------------------------------------------------------------------- */
- /* AN ERROR OCCURRED IN WRITING A GCI FILE WHICH IS A SERIOUS ERROR */
- /* THAT CAUSE A SYSTEM RESTART. */
- /* --------------------------------------------------------------------- */
- jam();
- break;
- case FileRecord::TABLE_WRITE:
- jam();
- break;
- default:
- jam();
- break;
- }//switch
- {
- char msg[100];
- sprintf(msg, "File system write failed during FileRecord status %d", (Uint32)status);
- fsRefError(signal,__LINE__,msg);
- }
- return;
- }//Dbdih::execFSWRITEREF()
- void Dbdih::execGETGCIREQ(Signal* signal)
- {
- jamEntry();
- Uint32 userPtr = signal->theData[0];
- BlockReference userRef = signal->theData[1];
- signal->theData[0] = userPtr;
- signal->theData[1] = SYSFILE->newestRestorableGCI;
- sendSignal(userRef, GSN_GETGCICONF, signal, 2, JBB);
- }//Dbdih::execGETGCIREQ()
- void Dbdih::execREAD_CONFIG_REQ(Signal* signal)
- {
- const ReadConfigReq * req = (ReadConfigReq*)signal->getDataPtr();
- Uint32 ref = req->senderRef;
- Uint32 senderData = req->senderData;
- ndbrequire(req->noOfParameters == 0);
- jamEntry();
- const ndb_mgm_configuration_iterator * p =
- theConfiguration.getOwnConfigIterator();
- ndbrequire(p != 0);
- ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_DIH_API_CONNECT,
- &capiConnectFileSize));
- ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_DIH_CONNECT,&cconnectFileSize));
- ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_DIH_FRAG_CONNECT,
- &cfragstoreFileSize));
- ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_DIH_REPLICAS,
- &creplicaFileSize));
- ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_DIH_TABLE, &ctabFileSize))
- cfileFileSize = (2 * ctabFileSize) + 2;
- initRecords();
- initialiseRecordsLab(signal, 0, ref, senderData);
- return;
- }//Dbdih::execSIZEALT_REP()
- void Dbdih::execSTART_COPYREF(Signal* signal)
- {
- jamEntry();
- ndbrequire(false);
- }//Dbdih::execSTART_COPYREF()
- void Dbdih::execSTART_FRAGCONF(Signal* signal)
- {
- (void)signal; // Don't want compiler warning
- /* ********************************************************************* */
- /* If anyone wants to add functionality in this method, be aware that */
- /* for temporary tables no START_FRAGREQ is sent and therefore no */
- /* START_FRAGCONF signal will be received for those tables!! */
- /* ********************************************************************* */
- jamEntry();
- return;
- }//Dbdih::execSTART_FRAGCONF()
- void Dbdih::execSTART_MEREF(Signal* signal)
- {
- jamEntry();
- ndbrequire(false);
- }//Dbdih::execSTART_MEREF()
- void Dbdih::execTAB_COMMITREQ(Signal* signal)
- {
- TabRecordPtr tabPtr;
- jamEntry();
- Uint32 tdictPtr = signal->theData[0];
- BlockReference tdictBlockref = signal->theData[1];
- tabPtr.i = signal->theData[2];
- ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
- ndbrequire(tabPtr.p->tabStatus == TabRecord::TS_CREATING);
- tabPtr.p->tabStatus = TabRecord::TS_ACTIVE;
- signal->theData[0] = tdictPtr;
- signal->theData[1] = cownNodeId;
- signal->theData[2] = tabPtr.i;
- sendSignal(tdictBlockref, GSN_TAB_COMMITCONF, signal, 3, JBB);
- return;
- }//Dbdih::execTAB_COMMITREQ()
- /*
- 3.2 S T A N D A R D S U B P R O G R A M S I N P L E X
- *************************************************************
- */
- /*
- 3.2.1 S T A R T / R E S T A R T
- **********************************
- */
- /*****************************************************************************/
- /* ********** START / RESTART MODULE *************/
- /*****************************************************************************/
- /*
- 3.2.1.1 LOADING O W N B L O C K R E F E R E N C E (ABSOLUTE PHASE 1)
- *****************************************************************************
- */
- void Dbdih::execDIH_RESTARTREQ(Signal* signal)
- {
- jamEntry();
- cntrlblockref = signal->theData[0];
- if(theConfiguration.getInitialStart()){
- sendSignal(cntrlblockref, GSN_DIH_RESTARTREF, signal, 1, JBB);
- } else {
- readGciFileLab(signal);
- }
- return;
- }//Dbdih::execDIH_RESTARTREQ()
- void Dbdih::execSTTOR(Signal* signal)
- {
- jamEntry();
- signal->theData[0] = 0;
- signal->theData[1] = 0;
- signal->theData[2] = 0;
- signal->theData[3] = 1; // Next start phase
- signal->theData[4] = 255; // Next start phase
- sendSignal(NDBCNTR_REF, GSN_STTORRY, signal, 5, JBB);
- return;
- }//Dbdih::execSTTOR()
- void Dbdih::initialStartCompletedLab(Signal* signal)
- {
- /*-------------------------------------------------------------------------*/
- /* NOW THAT (RE)START IS COMPLETED WE CAN START THE LCP.*/
- /*-------------------------------------------------------------------------*/
- return;
- }//Dbdih::initialStartCompletedLab()
- /*
- * ***************************************************************************
- * S E N D I N G R E P L Y T O S T A R T / R E S T A R T R E Q U E S T S
- * ****************************************************************************
- */
- void Dbdih::ndbsttorry10Lab(Signal* signal, Uint32 _line)
- {
- /*-------------------------------------------------------------------------*/
- // AN NDB START PHASE HAS BEEN COMPLETED. WHEN START PHASE 6 IS COMPLETED WE
- // RECORD THAT THE SYSTEM IS RUNNING.
- /*-------------------------------------------------------------------------*/
- signal->theData[0] = reference();
- sendSignal(cntrlblockref, GSN_NDB_STTORRY, signal, 1, JBB);
- return;
- }//Dbdih::ndbsttorry10Lab()
- /*
- ****************************************
- I N T E R N A L P H A S E S
- ****************************************
- */
- /*---------------------------------------------------------------------------*/
- /*NDB_STTOR START SIGNAL AT START/RESTART */
- /*---------------------------------------------------------------------------*/
- void Dbdih::execNDB_STTOR(Signal* signal)
- {
- jamEntry();
- BlockReference cntrRef = signal->theData[0]; /* SENDERS BLOCK REFERENCE */
- Uint32 ownNodeId = signal->theData[1]; /* OWN PROCESSOR ID*/
- Uint32 phase = signal->theData[2]; /* INTERNAL START PHASE*/
- Uint32 typestart = signal->theData[3];
- cstarttype = typestart;
- cstartPhase = phase;
- switch (phase){
- case ZNDB_SPH1:
- jam();
- /*----------------------------------------------------------------------*/
- /* Set the delay between local checkpoints in ndb startphase 1. */
- /*----------------------------------------------------------------------*/
- cownNodeId = ownNodeId;
- /*-----------------------------------------------------------------------*/
- // Compute all static block references in this node as part of
- // ndb start phase 1.
- /*-----------------------------------------------------------------------*/
- cntrlblockref = cntrRef;
- clocaltcblockref = calcTcBlockRef(ownNodeId);
- clocallqhblockref = calcLqhBlockRef(ownNodeId);
- cdictblockref = calcDictBlockRef(ownNodeId);
- ndbsttorry10Lab(signal, __LINE__);
- break;
-
- case ZNDB_SPH2:
- jam();
- /*-----------------------------------------------------------------------*/
- // Set the number of replicas, maximum is 4 replicas.
- // Read the ndb nodes from the configuration.
- /*-----------------------------------------------------------------------*/
-
- /*-----------------------------------------------------------------------*/
- // For node restarts we will also add a request for permission
- // to continue the system restart.
- // The permission is given by the master node in the alive set.
- /*-----------------------------------------------------------------------*/
- createMutexes(signal, 0);
- break;
-
- case ZNDB_SPH3:
- jam();
- /*-----------------------------------------------------------------------*/
- // Non-master nodes performing an initial start will execute
- // the start request here since the
- // initial start do not synchronise so much from the master.
- // In the master nodes the start
- // request will be sent directly to dih (in ndb_startreq) when all
- // nodes have completed phase 3 of the start.
- /*-----------------------------------------------------------------------*/
- cmasterState = MASTER_IDLE;
- if(cstarttype == NodeState::ST_INITIAL_START ||
- cstarttype == NodeState::ST_SYSTEM_RESTART){
- jam();
- cmasterState = isMaster() ? MASTER_ACTIVE : MASTER_IDLE;
- }
- if (!isMaster() && cstarttype == NodeState::ST_INITIAL_START) {
- jam();
- ndbStartReqLab(signal, cntrRef);
- return;
- }//if
- ndbsttorry10Lab(signal, __LINE__);
- break;
-
- case ZNDB_SPH4:
- jam();
- c_lcpState.setLcpStatus(LCP_STATUS_IDLE, __LINE__);
- cmasterTakeOverNode = ZNIL;
- switch(typestart){
- case NodeState::ST_INITIAL_START:
- jam();
- ndbsttorry10Lab(signal, __LINE__);
- return;
- case NodeState::ST_SYSTEM_RESTART:
- jam();
- if (isMaster()) {
- jam();
- systemRestartTakeOverLab(signal);
- if (anyActiveTakeOver() && false) {
- jam();
- ndbout_c("1 - anyActiveTakeOver == true");
- return;
- }
- }
- ndbsttorry10Lab(signal, __LINE__);
- return;
- case NodeState::ST_INITIAL_NODE_RESTART:
- case NodeState::ST_NODE_RESTART:
- jam();
- /***********************************************************************
- * When starting nodes while system is operational we must be controlled
- * by the master since only one node restart is allowed at a time.
- * When this signal is confirmed the master has also copied the
- * dictionary and the distribution information.
- */
- StartMeReq * req = (StartMeReq*)&signal->theData[0];
- req->startingRef = reference();
- req->startingVersion = 0; // Obsolete
- sendSignal(cmasterdihref, GSN_START_MEREQ, signal,
- StartMeReq::SignalLength, JBB);
- return;
- }
- ndbrequire(false);
- break;
- case ZNDB_SPH5:
- jam();
- switch(typestart){
- case NodeState::ST_INITIAL_START:
- case NodeState::ST_SYSTEM_RESTART:
- jam();
- jam();
- /*---------------------------------------------------------------------*/
- // WE EXECUTE A LOCAL CHECKPOINT AS A PART OF A SYSTEM RESTART.
- // THE IDEA IS THAT WE NEED TO
- // ENSURE THAT WE CAN RECOVER FROM PROBLEMS CAUSED BY MANY NODE
- // CRASHES THAT CAUSES THE LOG
- // TO GROW AND THE NUMBER OF LOG ROUNDS TO EXECUTE TO GROW.
- // THIS CAN OTHERWISE GET US INTO
- // A SITUATION WHICH IS UNREPAIRABLE. THUS WE EXECUTE A CHECKPOINT
- // BEFORE ALLOWING ANY TRANSACTIONS TO START.
- /*---------------------------------------------------------------------*/
- if (!isMaster()) {
- jam();
- ndbsttorry10Lab(signal, __LINE__);
- return;
- }//if
-
- c_lcpState.immediateLcpStart = true;
- cwaitLcpSr = true;
- checkLcpStart(signal, __LINE__);
- return;
- case NodeState::ST_NODE_RESTART:
- case NodeState::ST_INITIAL_NODE_RESTART:
- jam();
- signal->theData[0] = cownNodeId;
- signal->theData[1] = reference();
- sendSignal(cmasterdihref, GSN_START_COPYREQ, signal, 2, JBB);
- return;
- }
- ndbrequire(false);
- case ZNDB_SPH6:
- jam();
- switch(typestart){
- case NodeState::ST_INITIAL_START:
- case NodeState::ST_SYSTEM_RESTART:
- jam();
- if(isMaster()){
- jam();
- startGcp(signal);
- }
- ndbsttorry10Lab(signal, __LINE__);
- return;
- case NodeState::ST_NODE_RESTART:
- case NodeState::ST_INITIAL_NODE_RESTART:
- ndbsttorry10Lab(signal, __LINE__);
- return;
- }
- ndbrequire(false);
- break;
- default:
- jam();
- ndbsttorry10Lab(signal, __LINE__);
- break;
- }//switch
- }//Dbdih::execNDB_STTOR()
- void
- Dbdih::createMutexes(Signal * signal, Uint32 count){
- Callback c = { safe_cast(&Dbdih::createMutex_done), count };
- switch(count){
- case 0:{
- Mutex mutex(signal, c_mutexMgr, c_startLcpMutexHandle);
- mutex.create(c);
- return;
- }
- case 1:{
- Mutex mutex(signal, c_mutexMgr, c_switchPrimaryMutexHandle);
- mutex.create(c);
- return;
- }
- }
- signal->theData[0] = reference();
- sendSignal(cntrlblockref, GSN_READ_NODESREQ, signal, 1, JBB);
- }
- void
- Dbdih::createMutex_done(Signal* signal, Uint32 senderData, Uint32 retVal){
- jamEntry();
- ndbrequire(retVal == 0);
- switch(senderData){
- case 0:{
- Mutex mutex(signal, c_mutexMgr, c_startLcpMutexHandle);
- mutex.release();
- }
- case 1:{
- Mutex mutex(signal, c_mutexMgr, c_switchPrimaryMutexHandle);
- mutex.release();
- }
- }
-
- createMutexes(signal, senderData + 1);
- }
- /*****************************************************************************/
- /* ------------------------------------------------------------------------- */
- /* WE HAVE BEEN REQUESTED BY NDBCNTR TO PERFORM A RESTART OF THE */
- /* DATABASE TABLES. */
- /* THIS SIGNAL IS SENT AFTER COMPLETING PHASE 3 IN ALL BLOCKS IN A */
- /* SYSTEM RESTART. WE WILL ALSO JUMP TO THIS LABEL FROM PHASE 3 IN AN */
- /* INITIAL START. */
- /* ------------------------------------------------------------------------- */
- /*****************************************************************************/
- void Dbdih::execNDB_STARTREQ(Signal* signal)
- {
- jamEntry();
- BlockReference ref = signal->theData[0];
- cstarttype = signal->theData[1];
- ndbStartReqLab(signal, ref);
- }//Dbdih::execNDB_STARTREQ()
- void Dbdih::ndbStartReqLab(Signal* signal, BlockReference ref)
- {
- cndbStartReqBlockref = ref;
- if (cstarttype == NodeState::ST_INITIAL_START) {
- jam();
- initRestartInfo();
- initGciFilesLab(signal);
- return;
- }
-
- ndbrequire(isMaster());
- copyGciLab(signal, CopyGCIReq::RESTART); // We have already read the file!
- }//Dbdih::ndbStartReqLab()
- void Dbdih::execREAD_NODESCONF(Signal* signal)
- {
- unsigned i;
- ReadNodesConf * const readNodes = (ReadNodesConf *)&signal->theData[0];
- jamEntry();
- Uint32 nodeArray[MAX_NDB_NODES];
- csystemnodes = readNodes->noOfNodes;
- cmasterNodeId = readNodes->masterNodeId;
- int index = 0;
- NdbNodeBitmask tmp; tmp.assign(2, readNodes->allNodes);
- for (i = 1; i < MAX_NDB_NODES; i++){
- jam();
- if(tmp.get(i)){
- jam();
- nodeArray[index] = i;
- if(NodeBitmask::get(readNodes->inactiveNodes, i) == false){
- jam();
- con_lineNodes++;
- }//if
- index++;
- }//if
- }//for
-
- if(cstarttype == NodeState::ST_SYSTEM_RESTART ||
- cstarttype == NodeState::ST_NODE_RESTART){
- for(i = 1; i<MAX_NDB_NODES; i++){
- const Uint32 stat = Sysfile::getNodeStatus(i, SYSFILE->nodeStatus);
- if(stat == Sysfile::NS_NotDefined && !tmp.get(i)){
- jam();
- continue;
- }
-
- if(tmp.get(i) && stat != Sysfile::NS_NotDefined){
- jam();
- continue;
- }
- char buf[255];
- BaseString::snprintf(buf, sizeof(buf),
- "Illegal configuration change."
- " Initial start needs to be performed "
- " when changing no of storage nodes (node %d)", i);
- progError(__LINE__,
- ERR_INVALID_CONFIG,
- buf);
- }
- }
-
- ndbrequire(csystemnodes >= 1 && csystemnodes < MAX_NDB_NODES);
- if (cstarttype == NodeState::ST_INITIAL_START) {
- jam();
- ndbrequire(cnoReplicas <= csystemnodes);
- calculateHotSpare();
- ndbrequire(cnoReplicas <= (csystemnodes - cnoHotSpare));
- }//if
- cmasterdihref = calcDihBlockRef(cmasterNodeId);
- /*-------------------------------------------------------------------------*/
- /* MAKE THE LIST OF PRN-RECORD WHICH IS ONE OF THE NODES-LIST IN THIS BLOCK*/
- /*-------------------------------------------------------------------------*/
- makePrnList(readNodes, nodeArray);
- if (cstarttype == NodeState::ST_INITIAL_START) {
- jam();
- /**----------------------------------------------------------------------
- * WHEN WE INITIALLY START A DATABASE WE WILL CREATE NODE GROUPS.
- * ALL NODES ARE PUT INTO NODE GROUPS ALTHOUGH HOT SPARE NODES ARE PUT
- * INTO A SPECIAL NODE GROUP. IN EACH NODE GROUP WE HAVE THE SAME AMOUNT
- * OF NODES AS THERE ARE NUMBER OF REPLICAS.
- * ONE POSSIBLE USAGE OF NODE GROUPS ARE TO MAKE A NODE GROUP A COMPLETE
- * FRAGMENT OF THE DATABASE. THIS MEANS THAT ALL REPLICAS WILL BE STORED
- * IN THE NODE GROUP.
- *-----------------------------------------------------------------------*/
- makeNodeGroups(nodeArray);
- }//if
- ndbrequire(checkNodeAlive(cmasterNodeId));
- if (cstarttype == NodeState::ST_INITIAL_START) {
- jam();
- /**-----------------------------------------------------------------------
- * INITIALISE THE SECOND NODE-LIST AND SET NODE BITS AND SOME NODE STATUS.
- * VERY CONNECTED WITH MAKE_NODE_GROUPS. CHANGING ONE WILL AFFECT THE
- * OTHER AS WELL.
- *-----------------------------------------------------------------------*/
- setInitialActiveStatus();
- } else if (cstarttype == NodeState::ST_SYSTEM_RESTART) {
- jam();
- /*empty*/;
- } else if ((cstarttype == NodeState::ST_NODE_RESTART) ||
- (cstarttype == NodeState::ST_INITIAL_NODE_RESTART)) {
- jam();
- nodeRestartPh2Lab(signal);
- return;
- } else {
- ndbrequire(false);
- }//if
- /**------------------------------------------------------------------------
- * ESTABLISH CONNECTIONS WITH THE OTHER DIH BLOCKS AND INITIALISE THIS
- * NODE-LIST THAT HANDLES CONNECTION WITH OTHER DIH BLOCKS.
- *-------------------------------------------------------------------------*/
- ndbsttorry10Lab(signal, __LINE__);
- }//Dbdih::execREAD_NODESCONF()
- /*---------------------------------------------------------------------------*/
- /* START NODE LOGIC FOR NODE RESTART */
- /*---------------------------------------------------------------------------*/
- void Dbdih::nodeRestartPh2Lab(Signal* signal)
- {
- /*------------------------------------------------------------------------*/
- // REQUEST FOR PERMISSION FROM MASTER TO START A NODE IN AN ALREADY
- // RUNNING SYSTEM.
- /*------------------------------------------------------------------------*/
- StartPermReq * const req = (StartPermReq *)&signal->theData[0];
- req->blockRef = reference();
- req->nodeId = cownNodeId;
- req->startType = cstarttype;
- sendSignal(cmasterdihref, GSN_START_PERMREQ, signal, 3, JBB);
- }//Dbdih::nodeRestartPh2Lab()
- void Dbdih::execSTART_PERMCONF(Signal* signal)
- {
- jamEntry();
- CRASH_INSERTION(7121);
- Uint32 nodeId = signal->theData[0];
- cfailurenr = signal->theData[1];
- ndbrequire(nodeId == cownNodeId);
- ndbsttorry10Lab(signal, __LINE__);
- }//Dbdih::execSTART_PERMCONF()
- void Dbdih::execSTART_PERMREF(Signal* signal)
- {
- jamEntry();
- Uint32 errorCode = signal->theData[1];
- if (errorCode == ZNODE_ALREADY_STARTING_ERROR) {
- jam();
- /*-----------------------------------------------------------------------*/
- // The master was busy adding another node. We will wait for a second and
- // try again.
- /*-----------------------------------------------------------------------*/
- signal->theData[0] = DihContinueB::ZSTART_PERMREQ_AGAIN;
- sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 3000, 1);
- return;
- }//if
- /*------------------------------------------------------------------------*/
- // Some node process in another node involving our node was still active. We
- // will recover from this by crashing here.
- // This is controlled restart using the
- // already existing features of node crashes. It is not a bug getting here.
- /*-------------------------------------------------------------------------*/
- ndbrequire(false);
- return;
- }//Dbdih::execSTART_PERMREF()
- /*---------------------------------------------------------------------------*/
- /* THIS SIGNAL IS RECEIVED IN THE STARTING NODE WHEN THE START_MEREQ */
- /* HAS BEEN EXECUTED IN THE MASTER NODE. */
- /*---------------------------------------------------------------------------*/
- void Dbdih::execSTART_MECONF(Signal* signal)
- {
- jamEntry();
- StartMeConf * const startMe = (StartMeConf *)&signal->theData[0];
- Uint32 nodeId = startMe->startingNodeId;
- const Uint32 startWord = startMe->startWord;
- Uint32 i;
-
- CRASH_INSERTION(7130);
- ndbrequire(nodeId == cownNodeId);
- arrGuard(startWord + StartMeConf::DATA_SIZE, sizeof(cdata)/4);
- for(i = 0; i < StartMeConf::DATA_SIZE; i++)
- cdata[startWord+i] = startMe->data[i];
-
- if(startWord + StartMeConf::DATA_SIZE < Sysfile::SYSFILE_SIZE32){
- jam();
- /**
- * We are still waiting for data
- */
- return;
- }
- jam();
- /**
- * Copy into sysfile
- *
- * But dont copy lastCompletedGCI:s
- */
- Uint32 tempGCP[MAX_NDB_NODES];
- for(i = 0; i < MAX_NDB_NODES; i++)
- tempGCP[i] = SYSFILE->lastCompletedGCI[i];
- for(i = 0; i < Sysfile::SYSFILE_SIZE32; i++)
- sysfileData[i] = cdata[i];
- for(i = 0; i < MAX_NDB_NODES; i++)
- SYSFILE->lastCompletedGCI[i] = tempGCP[i];
- setNodeActiveStatus();
- setNodeGroups();
- ndbsttorry10Lab(signal, __LINE__);
- }//Dbdih::execSTART_MECONF()
- void Dbdih::execSTART_COPYCONF(Signal* signal)
- {
- jamEntry();
- Uint32 nodeId = signal->theData[0];
- ndbrequire(nodeId == cownNodeId);
- CRASH_INSERTION(7132);
- ndbsttorry10Lab(signal, __LINE__);
- return;
- }//Dbdih::execSTART_COPYCONF()
- /*---------------------------------------------------------------------------*/
- /* MASTER LOGIC FOR NODE RESTART */
- /*---------------------------------------------------------------------------*/
- /* NODE RESTART PERMISSION REQUEST */
- /*---------------------------------------------------------------------------*/
- // A REQUEST FROM A STARTING NODE TO PERFORM A NODE RESTART. IF NO OTHER NODE
- // IS ACTIVE IN PERFORMING A NODE RESTART AND THERE ARE NO ACTIVE PROCESSES IN
- // THIS NODE INVOLVING THE STARTING NODE THIS REQUEST WILL BE GRANTED.
- /*---------------------------------------------------------------------------*/
- void Dbdih::execSTART_PERMREQ(Signal* signal)
- {
- StartPermReq * const req = (StartPermReq*)&signal->theData[0];
- jamEntry();
- const BlockReference retRef = req->blockRef;
- const Uint32 nodeId = req->nodeId;
- const Uint32 typeStart = req->startType;
-
- CRASH_INSERTION(7122);
- ndbrequire(isMaster());
- ndbrequire(refToNode(retRef) == nodeId);
- if ((c_nodeStartMaster.activeState) ||
- (c_nodeStartMaster.wait != ZFALSE)) {
- jam();
- signal->theData[0] = nodeId;
- signal->theData[1] = ZNODE_ALREADY_STARTING_ERROR;
- sendSignal(retRef, GSN_START_PERMREF, signal, 2, JBB);
- return;
- }//if
- if (getNodeStatus(nodeId) != NodeRecord::DEAD){
- ndbout << "nodeStatus in START_PERMREQ = "
- << (Uint32) getNodeStatus(nodeId) << endl;
- ndbrequire(false);
- }//if
- /*----------------------------------------------------------------------
- * WE START THE INCLUSION PROCEDURE
- * ---------------------------------------------------------------------*/
- c_nodeStartMaster.failNr = cfailurenr;
- c_nodeStartMaster.wait = ZFALSE;
- c_nodeStartMaster.startInfoErrorCode = 0;
- c_nodeStartMaster.startNode = nodeId;
- c_nodeStartMaster.activeState = true;
- c_nodeStartMaster.m_outstandingGsn = GSN_START_INFOREQ;
-
- setNodeStatus(nodeId, NodeRecord::STARTING);
- /**
- * But if it's a NodeState::ST_INITIAL_NODE_RESTART
- *
- * We first have to clear LCP's
- * For normal node restart we simply ensure that all nodes
- * are informed of the node restart
- */
- StartInfoReq *const r =(StartInfoReq*)&signal->theData[0];
- r->startingNodeId = nodeId;
- r->typeStart = typeStart;
- r->systemFailureNo = cfailurenr;
- sendLoopMacro(START_INFOREQ, sendSTART_INFOREQ);
- }//Dbdih::execSTART_PERMREQ()
- void Dbdih::execSTART_INFOREF(Signal* signal)
- {
- StartInfoRef * ref = (StartInfoRef*)&signal->theData[0];
- if (getNodeStatus(ref->startingNodeId) != NodeRecord::STARTING) {
- jam();
- return;
- }//if
- ndbrequire(c_nodeStartMaster.startNode == ref->startingNodeId);
- c_nodeStartMaster.startInfoErrorCode = ref->errorCode;
- startInfoReply(signal, ref->sendingNodeId);
- }//Dbdih::execSTART_INFOREF()
- void Dbdih::execSTART_INFOCONF(Signal* signal)
- {
- jamEntry();
- StartInfoConf * conf = (StartInfoConf*)&signal->theData[0];
- if (getNodeStatus(conf->startingNodeId) != NodeRecord::STARTING) {
- jam();
- return;
- }//if
- ndbrequire(c_nodeStartMaster.startNode == conf->startingNodeId);
- startInfoReply(signal, conf->sendingNodeId);
- }//Dbdih::execSTART_INFOCONF()
- void Dbdih::startInfoReply(Signal* signal, Uint32 nodeId)
- {
- receiveLoopMacro(START_INFOREQ, nodeId);
- /**
- * We're finished with the START_INFOREQ's
- */
- if (c_nodeStartMaster.startInfoErrorCode == 0) {
- jam();
- /**
- * Everything has been a success so far
- */
- StartPermConf * conf = (StartPermConf*)&signal->theData[0];
- conf->startingNodeId = c_nodeStartMaster.startNode;
- conf->systemFailureNo = cfailurenr;
- sendSignal(calcDihBlockRef(c_nodeStartMaster.startNode),
- GSN_START_PERMCONF, signal, StartPermConf::SignalLength, JBB);
- c_nodeStartMaster.m_outstandingGsn = GSN_START_PERMCONF;
- } else {
- jam();
- StartPermRef * ref = (StartPermRef*)&signal->theData[0];
- ref->startingNodeId = c_nodeStartMaster.startNode;
- ref->errorCode = c_nodeStartMaster.startInfoErrorCode;
- sendSignal(calcDihBlockRef(c_nodeStartMaster.startNode),
- GSN_START_PERMREF, signal, StartPermRef::SignalLength, JBB);
- nodeResetStart();
- }//if
- }//Dbdih::startInfoReply()
- /*---------------------------------------------------------------------------*/
- /* NODE RESTART CONTINUE REQUEST */
- /*---------------------------------------------------------------------------*/
- // THIS SIGNAL AND THE CODE BELOW IS EXECUTED BY THE MASTER WHEN IT HAS BEEN
- // REQUESTED TO START UP A NEW NODE. The master instructs the starting node
- // how to set up its log for continued execution.
- /*---------------------------------------------------------------------------*/
- void Dbdih::execSTART_MEREQ(Signal* signal)
- {
- StartMeReq * req = (StartMeReq*)&signal->theData[0];
- jamEntry();
- const BlockReference Tblockref = req->startingRef;
- const Uint32 Tnodeid = refToNode(Tblockref);
- ndbrequire(isMaster());
- ndbrequire(c_nodeStartMaster.startNode == Tnodeid);
- ndbrequire(getNodeStatus(Tnodeid) == NodeRecord::STARTING);
-
- sendSTART_RECREQ(signal, Tnodeid);
- }//Dbdih::execSTART_MEREQ()
- void Dbdih::nodeRestartStartRecConfLab(Signal* signal)
- {
- c_nodeStartMaster.blockLcp = true;
- if ((c_lcpState.lcpStatus != LCP_STATUS_IDLE) &&
- (c_lcpState.lcpStatus != LCP_TCGET)) {
- jam();
- /*-----------------------------------------------------------------------*/
- // WE WILL NOT ALLOW A NODE RESTART TO COME IN WHEN A LOCAL CHECKPOINT IS
- // ONGOING. IT WOULD COMPLICATE THE LCP PROTOCOL TOO MUCH. WE WILL ADD THIS
- // LATER.
- /*-----------------------------------------------------------------------*/
- return;
- }//if
- lcpBlockedLab(signal);
- }//Dbdih::nodeRestartStartRecConfLab()
- void Dbdih::lcpBlockedLab(Signal* signal)
- {
- ndbrequire(getNodeStatus(c_nodeStartMaster.startNode)==NodeRecord::STARTING);
- /*------------------------------------------------------------------------*/
- // NOW WE HAVE COPIED ALL INFORMATION IN DICT WE ARE NOW READY TO COPY ALL
- // INFORMATION IN DIH TO THE NEW NODE.
- /*------------------------------------------------------------------------*/
- c_nodeStartMaster.wait = 10;
- signal->theData[0] = DihContinueB::ZCOPY_NODE;
- signal->theData[1] = 0;
- sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
- c_nodeStartMaster.m_outstandingGsn = GSN_COPY_TABREQ;
- }//Dbdih::lcpBlockedLab()
- void Dbdih::nodeDictStartConfLab(Signal* signal)
- {
- /*-------------------------------------------------------------------------*/
- // NOW WE HAVE COPIED BOTH DIH AND DICT INFORMATION. WE ARE NOW READY TO
- // INTEGRATE THE NODE INTO THE LCP AND GCP PROTOCOLS AND TO ALLOW UPDATES OF
- // THE DICTIONARY AGAIN.
- /*-------------------------------------------------------------------------*/
- c_nodeStartMaster.wait = ZFALSE;
- c_nodeStartMaster.blockGcp = true;
- if (cgcpStatus != GCP_READY) {
- /*-----------------------------------------------------------------------*/
- // The global checkpoint is executing. Wait until it is completed before we
- // continue processing the node recovery.
- /*-----------------------------------------------------------------------*/
- jam();
- return;
- }//if
- gcpBlockedLab(signal);
- /*-----------------------------------------------------------------*/
- // Report that node restart has completed copy of dictionary.
- /*-----------------------------------------------------------------*/
- signal->theData[0] = EventReport::NR_CopyDict;
- sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 1, JBB);
- }//Dbdih::nodeDictStartConfLab()
- void Dbdih::dihCopyCompletedLab(Signal* signal)
- {
- BlockReference ref = calcDictBlockRef(c_nodeStartMaster.startNode);
- DictStartReq * req = (DictStartReq*)&signal->theData[0];
- req->restartGci = cnewgcp;
- req->senderRef = reference();
- sendSignal(ref, GSN_DICTSTARTREQ,
- signal, DictStartReq::SignalLength, JBB);
- c_nodeStartMaster.m_outstandingGsn = GSN_DICTSTARTREQ;
- c_nodeStartMaster.wait = 0;
- }//Dbdih::dihCopyCompletedLab()
- void Dbdih::gcpBlockedLab(Signal* signal)
- {
- /*-----------------------------------------------------------------*/
- // Report that node restart has completed copy of distribution info.
- /*-----------------------------------------------------------------*/
- signal->theData[0] = EventReport::NR_CopyDistr;
- sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 1, JBB);
- /**
- * The node DIH will be part of LCP
- */
- NodeRecordPtr nodePtr;
- nodePtr.i = c_nodeStartMaster.startNode;
- ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
- nodePtr.p->m_inclDihLcp = true;
-
- /*-------------------------------------------------------------------------*/
- // NOW IT IS TIME TO INFORM ALL OTHER NODES IN THE CLUSTER OF THE STARTED
- // NODE SUCH THAT THEY ALSO INCLUDE THE NODE IN THE NODE LISTS AND SO FORTH.
- /*------------------------------------------------------------------------*/
- sendLoopMacro(INCL_NODEREQ, sendINCL_NODEREQ);
- /*-------------------------------------------------------------------------*/
- // We also need to send to the starting node to ensure he is aware of the
- // global checkpoint id and the correct state. We do not wait for any reply
- // since the starting node will not send any.
- /*-------------------------------------------------------------------------*/
- sendINCL_NODEREQ(signal, c_nodeStartMaster.startNode);
- }//Dbdih::gcpBlockedLab()
- /*---------------------------------------------------------------------------*/
- // THIS SIGNAL IS EXECUTED IN BOTH SLAVES AND IN THE MASTER
- /*---------------------------------------------------------------------------*/
- void Dbdih::execINCL_NODECONF(Signal* signal)
- {
- Uint32 TsendNodeId;
- Uint32 TstartNode_or_blockref;
-
- jamEntry();
- TstartNode_or_blockref = signal->theData[0];
- TsendNodeId = signal->theData[1];
- if (TstartNode_or_blockref == clocallqhblockref) {
- jam();
- /*-----------------------------------------------------------------------*/
- // THIS SIGNAL CAME FROM THE LOCAL LQH BLOCK.
- // WE WILL NOW SEND INCLUDE TO THE TC BLOCK.
- /*-----------------------------------------------------------------------*/
- signal->theData[0] = reference();
- signal->theData[1] = c_nodeStartSlave.nodeId;
- sendSignal(clocaltcblockref, GSN_INCL_NODEREQ, signal, 2, JBB);
- return;
- }//if
- if (TstartNode_or_blockref == clocaltcblockref) {
- jam();
- /*----------------------------------------------------------------------*/
- // THIS SIGNAL CAME FROM THE LOCAL LQH BLOCK.
- // WE WILL NOW SEND INCLUDE TO THE DICT BLOCK.
- /*----------------------------------------------------------------------*/
- signal->theData[0] = reference();
- signal->theData[1] = c_nodeStartSlave.nodeId;
- sendSignal(cdictblockref, GSN_INCL_NODEREQ, signal, 2, JBB);
- return;
- }//if
- if (TstartNode_or_blockref == cdictblockref) {
- jam();
- /*-----------------------------------------------------------------------*/
- // THIS SIGNAL CAME FROM THE LOCAL DICT BLOCK. WE WILL NOW SEND CONF TO THE
- // BACKUP.
- /*-----------------------------------------------------------------------*/
- signal->theData[0] = reference();
- signal->theData[1] = c_nodeStartSlave.nodeId;
- sendSignal(BACKUP_REF, GSN_INCL_NODEREQ, signal, 2, JBB);
-
- // Suma will not send response to this for now, later...
- sendSignal(SUMA_REF, GSN_INCL_NODEREQ, signal, 2, JBB);
- return;
- }//if
- if (TstartNode_or_blockref == numberToRef(BACKUP, getOwnNodeId())){
- jam();
- signal->theData[0] = c_nodeStartSlave.nodeId;
- signal->theData[1] = cownNodeId;
- sendSignal(cmasterdihref, GSN_INCL_NODECONF, signal, 2, JBB);
- c_nodeStartSlave.nodeId = 0;
- return;
- }
-
- ndbrequire(cmasterdihref = reference());
- receiveLoopMacro(INCL_NODEREQ, TsendNodeId);
- CRASH_INSERTION(7128);
- /*-------------------------------------------------------------------------*/
- // Now that we have included the starting node in the node lists in the
- // various blocks we are ready to start the global checkpoint protocol
- /*------------------------------------------------------------------------*/
- c_nodeStartMaster.wait = 11;
- c_nodeStartMaster.blockGcp = false;
- signal->theData[0] = reference();
- sendSignal(reference(), GSN_UNBLO_DICTCONF, signal, 1, JBB);
- }//Dbdih::execINCL_NODECONF()
- void Dbdih::execUNBLO_DICTCONF(Signal* signal)
- {
- jamEntry();
- c_nodeStartMaster.wait = ZFALSE;
- if (!c_nodeStartMaster.activeState) {
- jam();
- return;
- }//if
- CRASH_INSERTION(7129);
- /**-----------------------------------------------------------------------
- * WE HAVE NOW PREPARED IT FOR INCLUSION IN THE LCP PROTOCOL.
- * WE CAN NOW START THE LCP PROTOCOL AGAIN.
- * WE HAVE ALSO MADE THIS FOR THE GCP PROTOCOL.
- * WE ARE READY TO START THE PROTOCOLS AND RESPOND TO THE START REQUEST
- * FROM THE STARTING NODE.
- *------------------------------------------------------------------------*/
-
- StartMeConf * const startMe = (StartMeConf *)&signal->theData[0];
-
- const Uint32 wordPerSignal = StartMeConf::DATA_SIZE;
- const int noOfSignals = ((Sysfile::SYSFILE_SIZE32 + (wordPerSignal - 1)) /
- wordPerSignal);
-
- startMe->startingNodeId = c_nodeStartMaster.startNode;
- startMe->startWord = 0;
-
- const Uint32 ref = calcDihBlockRef(c_nodeStartMaster.startNode);
- for(int i = 0; i < noOfSignals; i++){
- jam();
- { // Do copy
- const int startWord = startMe->startWord;
- for(Uint32 j = 0; j < wordPerSignal; j++){
- startMe->data[j] = sysfileData[j+startWord];
- }
- }
- sendSignal(ref, GSN_START_MECONF, signal, StartMeConf::SignalLength, JBB);
- startMe->startWord += wordPerSignal;
- }//for
- c_nodeStartMaster.m_outstandingGsn = GSN_START_MECONF;
- }//Dbdih::execUNBLO_DICTCONF()
- /*---------------------------------------------------------------------------*/
- /* NODE RESTART COPY REQUEST */
- /*---------------------------------------------------------------------------*/
- // A NODE RESTART HAS REACHED ITS FINAL PHASE WHEN THE DATA IS TO BE COPIED
- // TO THE NODE. START_COPYREQ IS EXECUTED BY THE MASTER NODE.
- /*---------------------------------------------------------------------------*/
- void Dbdih::execSTART_COPYREQ(Signal* signal)
- {
- jamEntry();
- Uint32 startNodeId = signal->theData[0];
- //BlockReference startingRef = signal->theData[1];
- ndbrequire(c_nodeStartMaster.startNode == startNodeId);
- /*-------------------------------------------------------------------------*/
- // REPORT Copy process of node restart is now about to start up.
- /*-------------------------------------------------------------------------*/
- signal->theData[0] = EventReport::NR_CopyFragsStarted;
- signal->theData[1] = startNodeId;
- sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
- CRASH_INSERTION(7131);
- nodeRestartTakeOver(signal, startNodeId);
- // BlockReference ref = calcQmgrBlockRef(startNodeId);
- // signal->theData[0] = cownNodeId;
- // Remove comments as soon as I open up the Qmgr block
- // TODO_RONM
- // sendSignal(ref, GSN_ALLOW_NODE_CRASHORD, signal, 1, JBB);
- }//Dbdih::execSTART_COPYREQ()
- /*---------------------------------------------------------------------------*/
- /* SLAVE LOGIC FOR NODE RESTART */
- /*---------------------------------------------------------------------------*/
- void Dbdih::execSTART_INFOREQ(Signal* signal)
- {
- jamEntry();
- StartInfoReq *const req =(StartInfoReq*)&signal->theData[0];
- Uint32 startNode = req->startingNodeId;
- if (cfailurenr != req->systemFailureNo) {
- jam();
- //---------------------------------------------------------------
- // A failure occurred since master sent this request. We will ignore
- // this request since the node is already dead that is starting.
- //---------------------------------------------------------------
- return;
- }//if
- CRASH_INSERTION(7123);
- if (isMaster()) {
- jam();
- ndbrequire(getNodeStatus(startNode) == NodeRecord::STARTING);
- } else {
- jam();
- ndbrequire(getNodeStatus(startNode) == NodeRecord::DEAD);
- }//if
- if ((!getAllowNodeStart(startNode)) ||
- (c_nodeStartSlave.nodeId != 0) ||
- (ERROR_INSERTED(7124))) {
- jam();
- StartInfoRef *const ref =(StartInfoRef*)&signal->theData[0];
- ref->startingNodeId = startNode;
- ref->sendingNodeId = cownNodeId;
- ref->errorCode = ZNODE_START_DISALLOWED_ERROR;
- sendSignal(cmasterdihref, GSN_START_INFOREF, signal,
- StartInfoRef::SignalLength, JBB);
- return;
- }//if
- setNodeStatus(startNode, NodeRecord::STARTING);
- if (req->typeStart == NodeState::ST_INITIAL_NODE_RESTART) {
- jam();
- setAllowNodeStart(startNode, false);
- invalidateNodeLCP(signal, startNode, 0);
- } else {
- jam();
- StartInfoConf * c = (StartInfoConf*)&signal->theData[0];
- c->sendingNodeId = cownNodeId;
- c->startingNodeId = startNode;
- sendSignal(cmasterdihref, GSN_START_INFOCONF, signal,
- StartInfoConf::SignalLength, JBB);
- return;
- }//if
- }//Dbdih::execSTART_INFOREQ()
- void Dbdih::execINCL_NODEREQ(Signal* signal)
- {
- jamEntry();
- Uint32 retRef = signal->theData[0];
- Uint32 nodeId = signal->theData[1];
- Uint32 tnodeStartFailNr = signal->theData[2];
- currentgcp = signal->theData[4];
- CRASH_INSERTION(7127);
- cnewgcp = currentgcp;
- coldgcp = currentgcp - 1;
- if (!isMaster()) {
- jam();
- /*-----------------------------------------------------------------------*/
- // We don't want to change the state of the master since he can be in the
- // state LCP_TCGET at this time.
- /*-----------------------------------------------------------------------*/
- c_lcpState.setLcpStatus(LCP_STATUS_IDLE, __LINE__);
- }//if
- /*-------------------------------------------------------------------------*/
- // When a node is restarted we must ensure that a lcp will be run
- // as soon as possible and the reset the delay according to the original
- // configuration.
- // Without an initial local checkpoint the new node will not be available.
- /*-------------------------------------------------------------------------*/
- if (getOwnNodeId() == nodeId) {
- jam();
- /*-----------------------------------------------------------------------*/
- // We are the starting node. We came here only to set the global checkpoint
- // id's and the lcp status.
- /*-----------------------------------------------------------------------*/
- CRASH_INSERTION(7171);
- return;
- }//if
- if (getNodeStatus(nodeId) != NodeRecord::STARTING) {
- jam();
- return;
- }//if
- ndbrequire(cfailurenr == tnodeStartFailNr);
- ndbrequire (c_nodeStartSlave.nodeId == 0);
- c_nodeStartSlave.nodeId = nodeId;
-
- ndbrequire (retRef == cmasterdihref);
- NodeRecordPtr nodePtr;
- nodePtr.i = nodeId;
- ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
- Sysfile::ActiveStatus TsaveState = nodePtr.p->activeStatus;
- Uint32 TnodeGroup = nodePtr.p->nodeGroup;
- new (nodePtr.p) NodeRecord();
- nodePtr.p->nodeGroup = TnodeGroup;
- nodePtr.p->activeStatus = TsaveState;
- nodePtr.p->nodeStatus = NodeRecord::ALIVE;
- nodePtr.p->useInTransactions = true;
- nodePtr.p->m_inclDihLcp = true;
- removeDeadNode(nodePtr);
- insertAlive(nodePtr);
- con_lineNodes++;
- /*-------------------------------------------------------------------------*/
- // WE WILL ALSO SEND THE INCLUDE NODE REQUEST TO THE LOCAL LQH BLOCK.
- /*-------------------------------------------------------------------------*/
- signal->theData[0] = reference();
- signal->theData[1] = nodeId;
- signal->theData[2] = currentgcp;
- sendSignal(clocallqhblockref, GSN_INCL_NODEREQ, signal, 3, JBB);
- }//Dbdih::execINCL_NODEREQ()
- /* ------------------------------------------------------------------------- */
- // execINCL_NODECONF() is found in the master logic part since it is used by
- // both the master and the slaves.
- /* ------------------------------------------------------------------------- */
- /*****************************************************************************/
- /*********** TAKE OVER DECISION MODULE *************/
- /*****************************************************************************/
- // This module contains the subroutines that take the decision whether to take
- // over a node now or not.
- /* ------------------------------------------------------------------------- */
- /* MASTER LOGIC FOR SYSTEM RESTART */
- /* ------------------------------------------------------------------------- */
- // WE ONLY COME HERE IF WE ARE THE MASTER AND WE ARE PERFORMING A SYSTEM
- // RESTART. WE ALSO COME HERE DURING THIS SYSTEM RESTART ONE TIME PER NODE
- // THAT NEEDS TAKE OVER.
- /*---------------------------------------------------------------------------*/
- // WE CHECK IF ANY NODE NEEDS TO BE TAKEN OVER AND THE TAKE OVER HAS NOT YET
- // BEEN STARTED OR COMPLETED.
- /*---------------------------------------------------------------------------*/
- void
- Dbdih::systemRestartTakeOverLab(Signal* signal)
- {
- NodeRecordPtr nodePtr;
- for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
- jam();
- ptrAss(nodePtr, nodeRecord);
- switch (nodePtr.p->activeStatus) {
- case Sysfile::NS_Active:
- case Sysfile::NS_ActiveMissed_1:
- jam();
- break;
- /*---------------------------------------------------------------------*/
- // WE HAVE NOT REACHED A STATE YET WHERE THIS NODE NEEDS TO BE TAKEN OVER
- /*---------------------------------------------------------------------*/
- case Sysfile::NS_ActiveMissed_2:
- case Sysfile::NS_NotActive_NotTakenOver:
- jam();
- /*---------------------------------------------------------------------*/
- // THIS NODE IS IN TROUBLE.
- // WE MUST SUCCEED WITH A LOCAL CHECKPOINT WITH THIS NODE TO REMOVE THE
- // DANGER. IF THE NODE IS NOT ALIVE THEN THIS WILL NOT BE
- // POSSIBLE AND WE CAN START THE TAKE OVER IMMEDIATELY IF WE HAVE ANY
- // NODES THAT CAN PERFORM A TAKE OVER.
- /*---------------------------------------------------------------------*/
- if (nodePtr.p->nodeStatus != NodeRecord::ALIVE) {
- jam();
- Uint32 ThotSpareNode = findHotSpare();
- if (ThotSpareNode != RNIL) {
- jam();
- startTakeOver(signal, RNIL, ThotSpareNode, nodePtr.i);
- }//if
- } else if(nodePtr.p->activeStatus == Sysfile::NS_NotActive_NotTakenOver){
- jam();
- /*-------------------------------------------------------------------*/
- // NOT ACTIVE NODES THAT HAVE NOT YET BEEN TAKEN OVER NEEDS TAKE OVER
- // IMMEDIATELY. IF WE ARE ALIVE WE TAKE OVER OUR OWN NODE.
- /*-------------------------------------------------------------------*/
- startTakeOver(signal, RNIL, nodePtr.i, nodePtr.i);
- }//if
- break;
- case Sysfile::NS_TakeOver:
- /**-------------------------------------------------------------------
- * WE MUST HAVE FAILED IN THE MIDDLE OF THE TAKE OVER PROCESS.
- * WE WILL CONCLUDE THE TAKE OVER PROCESS NOW.
- *-------------------------------------------------------------------*/
- if (nodePtr.p->nodeStatus == NodeRecord::ALIVE) {
- jam();
- Uint32 takeOverNode = Sysfile::getTakeOverNode(nodePtr.i,
- SYSFILE->takeOver);
- if(takeOverNode == 0){
- jam();
- warningEvent("Bug in take-over code restarting");
- takeOverNode = nodePtr.i;
- }
- startTakeOver(signal, RNIL, nodePtr.i, takeOverNode);
- } else {
- jam();
- /**-------------------------------------------------------------------
- * We are not currently taking over, change our active status.
- *-------------------------------------------------------------------*/
- nodePtr.p->activeStatus = Sysfile::NS_NotActive_NotTakenOver;
- setNodeRestartInfoBits();
- }//if
- break;
- case Sysfile::NS_HotSpare:
- jam();
- break;
- /*---------------------------------------------------------------------*/
- // WE NEED NOT TAKE OVER NODES THAT ARE HOT SPARE.
- /*---------------------------------------------------------------------*/
- case Sysfile::NS_NotDefined:
- jam();
- break;
- /*---------------------------------------------------------------------*/
- // WE NEED NOT TAKE OVER NODES THAT DO NOT EVEN EXIST IN THE CLUSTER.
- /*---------------------------------------------------------------------*/
- default:
- ndbrequire(false);
- break;
- }//switch
- }//for
- /*-------------------------------------------------------------------------*/
- /* NO TAKE OVER HAS BEEN INITIATED. */
- /*-------------------------------------------------------------------------*/
- }//Dbdih::systemRestartTakeOverLab()
- /*---------------------------------------------------------------------------*/
- // This subroutine is called as part of node restart in the master node.
- /*---------------------------------------------------------------------------*/
- void Dbdih::nodeRestartTakeOver(Signal* signal, Uint32 startNodeId)
- {
- switch (getNodeActiveStatus(startNodeId)) {
- case Sysfile::NS_Active:
- case Sysfile::NS_ActiveMissed_1:
- case Sysfile::NS_ActiveMissed_2:
- jam();
- /*-----------------------------------------------------------------------*/
- // AN ACTIVE NODE HAS BEEN STARTED. THE ACTIVE NODE MUST THEN GET ALL DATA
- // IT HAD BEFORE ITS CRASH. WE START THE TAKE OVER IMMEDIATELY.
- // SINCE WE ARE AN ACTIVE NODE WE WILL TAKE OVER OUR OWN NODE THAT
- // PREVIOUSLY CRASHED.
- /*-----------------------------------------------------------------------*/
- startTakeOver(signal, RNIL, startNodeId, startNodeId);
- break;
- case Sysfile::NS_HotSpare:{
- jam();
- /*-----------------------------------------------------------------------*/
- // WHEN STARTING UP A HOT SPARE WE WILL CHECK IF ANY NODE NEEDS TO TAKEN
- // OVER. IF SO THEN WE WILL START THE TAKE OVER.
- /*-----------------------------------------------------------------------*/
- bool takeOverStarted = false;
- NodeRecordPtr nodePtr;
- for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
- jam();
- ptrAss(nodePtr, nodeRecord);
- if (nodePtr.p->activeStatus == Sysfile::NS_NotActive_NotTakenOver) {
- jam();
- takeOverStarted = true;
- startTakeOver(signal, RNIL, startNodeId, nodePtr.i);
- }//if
- }//for
- if (!takeOverStarted) {
- jam();
- /*-------------------------------------------------------------------*/
- // NO TAKE OVER WAS NEEDED AT THE MOMENT WE START-UP AND WAIT UNTIL A
- // TAKE OVER IS NEEDED.
- /*-------------------------------------------------------------------*/
- BlockReference ref = calcDihBlockRef(startNodeId);
- signal->theData[0] = startNodeId;
- sendSignal(ref, GSN_START_COPYCONF, signal, 1, JBB);
- }//if
- break;
- }
- case Sysfile::NS_NotActive_NotTakenOver:
- jam();
- /*-----------------------------------------------------------------------*/
- // ALL DATA IN THE NODE IS LOST BUT WE HAVE NOT TAKEN OVER YET. WE WILL
- // TAKE OVER OUR OWN NODE
- /*-----------------------------------------------------------------------*/
- startTakeOver(signal, RNIL, startNodeId, startNodeId);
- break;
- case Sysfile::NS_TakeOver:{
- jam();
- /*--------------------------------------------------------------------
- * We were in the process of taking over but it was not completed.
- * We will complete it now instead.
- *--------------------------------------------------------------------*/
- Uint32 takeOverNode = Sysfile::getTakeOverNode(startNodeId,
- SYSFILE->takeOver);
- startTakeOver(signal, RNIL, startNodeId, takeOverNode);
- break;
- }
- default:
- ndbrequire(false);
- break;
- }//switch
- nodeResetStart();
- }//Dbdih::nodeRestartTakeOver()
- /*************************************************************************/
- // Ths routine is called when starting a local checkpoint.
- /*************************************************************************/
- void Dbdih::checkStartTakeOver(Signal* signal)
- {
- NodeRecordPtr csoNodeptr;
- Uint32 tcsoHotSpareNode;
- Uint32 tcsoTakeOverNode;
- if (isMaster()) {
- /*-----------------------------------------------------------------*/
- /* WE WILL ONLY START TAKE OVER IF WE ARE MASTER. */
- /*-----------------------------------------------------------------*/
- /* WE WILL ONLY START THE TAKE OVER IF THERE WERE A NEED OF */
- /* A TAKE OVER. */
- /*-----------------------------------------------------------------*/
- /* WE CAN ONLY PERFORM THE TAKE OVER IF WE HAVE A HOT SPARE */
- /* AVAILABLE. */
- /*-----------------------------------------------------------------*/
- tcsoTakeOverNode = 0;
- tcsoHotSpareNode = 0;
- for (csoNodeptr.i = 1; csoNodeptr.i < MAX_NDB_NODES; csoNodeptr.i++) {
- ptrAss(csoNodeptr, nodeRecord);
- if (csoNodeptr.p->activeStatus == Sysfile::NS_NotActive_NotTakenOver) {
- jam();
- tcsoTakeOverNode = csoNodeptr.i;
- } else {
- jam();
- if (csoNodeptr.p->activeStatus == Sysfile::NS_HotSpare) {
- jam();
- tcsoHotSpareNode = csoNodeptr.i;
- }//if
- }//if
- }//for
- if ((tcsoTakeOverNode != 0) &&
- (tcsoHotSpareNode != 0)) {
- jam();
- startTakeOver(signal, RNIL, tcsoHotSpareNode, tcsoTakeOverNode);
- }//if
- }//if
- }//Dbdih::checkStartTakeOver()
- /*****************************************************************************/
- /*********** NODE ADDING MODULE *************/
- /*********** CODE TO HANDLE TAKE OVER *************/
- /*****************************************************************************/
- // A take over can be initiated by a number of things:
- // 1) A node restart, usually the node takes over itself but can also take
- // over somebody else if its own data was already taken over
- // 2) At system restart it is necessary to use the take over code to recover
- // nodes which had too old checkpoints to be restorable by the usual
- // restoration from disk.
- // 3) When a node has missed too many local checkpoints and is decided by the
- // master to be taken over by a hot spare node that sits around waiting
- // for this to happen.
- //
- // To support multiple node failures efficiently the code is written such that
- // only one take over can handle transitions in state but during a copy
- // fragment other take over's can perform state transitions.
- /*****************************************************************************/
- void Dbdih::startTakeOver(Signal* signal,
- Uint32 takeOverPtrI,
- Uint32 startNode,
- Uint32 nodeTakenOver)
- {
- NodeRecordPtr toNodePtr;
- NodeGroupRecordPtr NGPtr;
- toNodePtr.i = nodeTakenOver;
- ptrCheckGuard(toNodePtr, MAX_NDB_NODES, nodeRecord);
- NGPtr.i = toNodePtr.p->nodeGroup;
- ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);
- TakeOverRecordPtr takeOverPtr;
- if (takeOverPtrI == RNIL) {
- jam();
- setAllowNodeStart(startNode, false);
- seizeTakeOver(takeOverPtr);
- if (startNode == c_nodeStartMaster.startNode) {
- jam();
- takeOverPtr.p->toNodeRestart = true;
- }//if
- takeOverPtr.p->toStartingNode = startNode;
- takeOverPtr.p->toFailedNode = nodeTakenOver;
- } else {
- jam();
- RETURN_IF_TAKE_OVER_INTERRUPTED(takeOverPtrI, takeOverPtr);
- ndbrequire(takeOverPtr.p->toStartingNode == startNode);
- ndbrequire(takeOverPtr.p->toFailedNode == nodeTakenOver);
- ndbrequire(takeOverPtr.p->toMasterStatus == TakeOverRecord::TO_WAIT_START_TAKE_OVER);
- }//if
- if ((NGPtr.p->activeTakeOver) || (ERROR_INSERTED(7157))) {
- jam();
- /**------------------------------------------------------------------------
- * A take over is already active in this node group. We only allow one
- * take over per node group. Otherwise we will overload the node group and
- * also we will require much more checks when starting up copying of
- * fragments. The parallelism for take over is mainly to ensure that we
- * can handle take over efficiently in large systems with 4 nodes and above
- * A typical case is a 8 node system executing on two 8-cpu boxes.
- * A box crash in one of the boxes will mean 4 nodes crashes.
- * We want to be able to restart those four nodes to some
- * extent in parallel.
- *
- * We will wait for a few seconds and then try again.
- */
- takeOverPtr.p->toMasterStatus = TakeOverRecord::TO_WAIT_START_TAKE_OVER;
- signal->theData[0] = DihContinueB::ZSTART_TAKE_OVER;
- signal->theData[1] = takeOverPtr.i;
- signal->theData[2] = startNode;
- signal->theData[3] = nodeTakenOver;
- sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 5000, 4);
- return;
- }//if
- NGPtr.p->activeTakeOver = true;
- if (startNode == nodeTakenOver) {
- jam();
- switch (getNodeActiveStatus(nodeTakenOver)) {
- case Sysfile::NS_Active:
- case Sysfile::NS_ActiveMissed_1:
- case Sysfile::NS_ActiveMissed_2:
- jam();
- break;
- case Sysfile::NS_NotActive_NotTakenOver:
- case Sysfile::NS_TakeOver:
- jam();
- setNodeActiveStatus(nodeTakenOver, Sysfile::NS_TakeOver);
- break;
- default:
- ndbrequire(false);
- }//switch
- } else {
- jam();
- setNodeActiveStatus(nodeTakenOver, Sysfile::NS_HotSpare);
- setNodeActiveStatus(startNode, Sysfile::NS_TakeOver);
- changeNodeGroups(startNode, nodeTakenOver);
- }//if
- setNodeRestartInfoBits();
- /* ---------------------------------------------------------------------- */
- /* WE SET THE RESTART INFORMATION TO INDICATE THAT WE ARE ABOUT TO TAKE */
- /* OVER THE FAILED NODE. WE SET THIS INFORMATION AND WAIT UNTIL THE */
- /* GLOBAL CHECKPOINT HAS WRITTEN THE RESTART INFORMATION. */
- /* ---------------------------------------------------------------------- */
- Sysfile::setTakeOverNode(takeOverPtr.p->toFailedNode, SYSFILE->takeOver,
- startNode);
- takeOverPtr.p->toMasterStatus = TakeOverRecord::TO_START_COPY;
-
- cstartGcpNow = true;
- }//Dbdih::startTakeOver()
- void Dbdih::changeNodeGroups(Uint32 startNode, Uint32 nodeTakenOver)
- {
- NodeRecordPtr startNodePtr;
- NodeRecordPtr toNodePtr;
- startNodePtr.i = startNode;
- ptrCheckGuard(startNodePtr, MAX_NDB_NODES, nodeRecord);
- toNodePtr.i = nodeTakenOver;
- ptrCheckGuard(toNodePtr, MAX_NDB_NODES, nodeRecord);
- ndbrequire(startNodePtr.p->nodeGroup == ZNIL);
- NodeGroupRecordPtr NGPtr;
- NGPtr.i = toNodePtr.p->nodeGroup;
- ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);
- bool nodeFound = false;
- for (Uint32 i = 0; i < NGPtr.p->nodeCount; i++) {
- jam();
- if (NGPtr.p->nodesInGroup[i] == nodeTakenOver) {
- jam();
- NGPtr.p->nodesInGroup[i] = startNode;
- nodeFound = true;
- }//if
- }//for
- ndbrequire(nodeFound);
- Sysfile::setNodeGroup(startNodePtr.i, SYSFILE->nodeGroups, toNodePtr.p->nodeGroup);
- startNodePtr.p->nodeGroup = toNodePtr.p->nodeGroup;
- Sysfile::setNodeGroup(toNodePtr.i, SYSFILE->nodeGroups, NO_NODE_GROUP_ID);
- toNodePtr.p->nodeGroup = ZNIL;
- }//Dbdih::changeNodeGroups()
- void Dbdih::checkToCopy()
- {
- TakeOverRecordPtr takeOverPtr;
- for (takeOverPtr.i = 0;takeOverPtr.i < MAX_NDB_NODES; takeOverPtr.i++) {
- ptrAss(takeOverPtr, takeOverRecord);
- /*----------------------------------------------------------------------*/
- // TAKE OVER HANDLING WRITES RESTART INFORMATION THROUGH
- // THE GLOBAL CHECKPOINT
- // PROTOCOL. WE CHECK HERE BEFORE STARTING A WRITE OF THE RESTART
- // INFORMATION.
- /*-----------------------------------------------------------------------*/
- if (takeOverPtr.p->toMasterStatus == TakeOverRecord::TO_START_COPY) {
- jam();
- takeOverPtr.p->toMasterStatus = TakeOverRecord::TO_START_COPY_ONGOING;
- } else if (takeOverPtr.p->toMasterStatus == TakeOverRecord::TO_END_COPY) {
- jam();
- takeOverPtr.p->toMasterStatus = TakeOverRecord::TO_END_COPY_ONGOING;
- }//if
- }//for
- }//Dbdih::checkToCopy()
- void Dbdih::checkToCopyCompleted(Signal* signal)
- {
- /* ------------------------------------------------------------------------*/
- /* WE CHECK HERE IF THE WRITING OF TAKE OVER INFORMATION ALSO HAS BEEN */
- /* COMPLETED. */
- /* ------------------------------------------------------------------------*/
- TakeOverRecordPtr toPtr;
- for (toPtr.i = 0; toPtr.i < MAX_NDB_NODES; toPtr.i++) {
- ptrAss(toPtr, takeOverRecord);
- if (toPtr.p->toMasterStatus == TakeOverRecord::TO_START_COPY_ONGOING){
- jam();
- sendStartTo(signal, toPtr.i);
- } else if (toPtr.p->toMasterStatus == TakeOverRecord::TO_END_COPY_ONGOING){
- jam();
- sendEndTo(signal, toPtr.i);
- } else {
- jam();
- }//if
- }//for
- }//Dbdih::checkToCopyCompleted()
- bool Dbdih::checkToInterrupted(TakeOverRecordPtr& takeOverPtr)
- {
- if (checkNodeAlive(takeOverPtr.p->toStartingNode)) {
- jam();
- return false;
- } else {
- jam();
- endTakeOver(takeOverPtr.i);
- return true;
- }//if
- }//Dbdih::checkToInterrupted()
- void Dbdih::sendStartTo(Signal* signal, Uint32 takeOverPtrI)
- {
- TakeOverRecordPtr takeOverPtr;
- CRASH_INSERTION(7155);
- RETURN_IF_TAKE_OVER_INTERRUPTED(takeOverPtrI, takeOverPtr);
- if ((c_startToLock != RNIL) || (ERROR_INSERTED(7158))) {
- jam();
- takeOverPtr.p->toMasterStatus = TakeOverRecord::TO_WAIT_START;
- signal->theData[0] = DihContinueB::ZSEND_START_TO;
- signal->theData[1] = takeOverPtrI;
- signal->theData[2] = takeOverPtr.p->toStartingNode;
- signal->theData[3] = takeOverPtr.p->toFailedNode;
- sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 30, 4);
- return;
- }//if
- c_startToLock = takeOverPtrI;
- StartToReq * const req = (StartToReq *)&signal->theData[0];
- req->userPtr = takeOverPtr.i;
- req->userRef = reference();
- req->startingNodeId = takeOverPtr.p->toStartingNode;
- req->nodeTakenOver = takeOverPtr.p->toFailedNode;
- req->nodeRestart = takeOverPtr.p->toNodeRestart;
- takeOverPtr.p->toMasterStatus = TakeOverRecord::STARTING;
- sendLoopMacro(START_TOREQ, sendSTART_TOREQ);
- }//Dbdih::sendStartTo()
- void Dbdih::execSTART_TOREQ(Signal* signal)
- {
- TakeOverRecordPtr takeOverPtr;
- jamEntry();
- const StartToReq * const req = (StartToReq *)&signal->theData[0];
- takeOverPtr.i = req->userPtr;
- BlockReference ref = req->userRef;
- Uint32 startingNode = req->startingNodeId;
- CRASH_INSERTION(7133);
- RETURN_IF_NODE_NOT_ALIVE(req->startingNodeId);
- ptrCheckGuard(takeOverPtr, MAX_NDB_NODES, takeOverRecord);
- allocateTakeOver(takeOverPtr);
- initStartTakeOver(req, takeOverPtr);
-
- StartToConf * const conf = (StartToConf *)&signal->theData[0];
- conf->userPtr = takeOverPtr.i;
- conf->sendingNodeId = cownNodeId;
- conf->startingNodeId = startingNode;
- sendSignal(ref, GSN_START_TOCONF, signal, StartToConf::SignalLength, JBB);
- }//Dbdih::execSTART_TOREQ()
- void Dbdih::execSTART_TOCONF(Signal* signal)
- {
- TakeOverRecordPtr takeOverPtr;
- jamEntry();
- const StartToConf * const conf = (StartToConf *)&signal->theData[0];
- CRASH_INSERTION(7147);
- RETURN_IF_NODE_NOT_ALIVE(conf->startingNodeId);
-
- takeOverPtr.i = conf->userPtr;
- ptrCheckGuard(takeOverPtr, MAX_NDB_NODES, takeOverRecord);
- ndbrequire(takeOverPtr.p->toMasterStatus == TakeOverRecord::STARTING);
- ndbrequire(takeOverPtr.p->toStartingNode == conf->startingNodeId);
- receiveLoopMacro(START_TOREQ, conf->sendingNodeId);
- CRASH_INSERTION(7134);
- c_startToLock = RNIL;
- startNextCopyFragment(signal, takeOverPtr.i);
- }//Dbdih::execSTART_TOCONF()
- void Dbdih::initStartTakeOver(const StartToReq * req,
- TakeOverRecordPtr takeOverPtr)
- {
- takeOverPtr.p->toCurrentTabref = 0;
- takeOverPtr.p->toCurrentFragid = 0;
- takeOverPtr.p->toStartingNode = req->startingNodeId;
- takeOverPtr.p->toFailedNode = req->nodeTakenOver;
- takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_SLAVE_STARTED;
- takeOverPtr.p->toCopyNode = RNIL;
- takeOverPtr.p->toCurrentReplica = RNIL;
- takeOverPtr.p->toNodeRestart = req->nodeRestart;
- }//Dbdih::initStartTakeOver()
- void Dbdih::startNextCopyFragment(Signal* signal, Uint32 takeOverPtrI)
- {
- TabRecordPtr tabPtr;
- TakeOverRecordPtr takeOverPtr;
- Uint32 loopCount;
- RETURN_IF_TAKE_OVER_INTERRUPTED(takeOverPtrI, takeOverPtr);
- takeOverPtr.p->toMasterStatus = TakeOverRecord::SELECTING_NEXT;
- loopCount = 0;
- if (ERROR_INSERTED(7159)) {
- loopCount = 100;
- }//if
- while (loopCount++ < 100) {
- tabPtr.i = takeOverPtr.p->toCurrentTabref;
- if (tabPtr.i >= ctabFileSize) {
- jam();
- CRASH_INSERTION(7136);
- sendUpdateTo(signal, takeOverPtr.i, UpdateToReq::TO_COPY_COMPLETED);
- return;
- }//if
- ptrAss(tabPtr, tabRecord);
- if (tabPtr.p->tabStatus != TabRecord::TS_ACTIVE){
- jam();
- takeOverPtr.p->toCurrentFragid = 0;
- takeOverPtr.p->toCurrentTabref++;
- continue;
- }//if
- Uint32 fragId = takeOverPtr.p->toCurrentFragid;
- if (fragId >= tabPtr.p->totalfragments) {
- jam();
- takeOverPtr.p->toCurrentFragid = 0;
- takeOverPtr.p->toCurrentTabref++;
- if (ERROR_INSERTED(7135)) {
- if (takeOverPtr.p->toCurrentTabref == 1) {
- ndbrequire(false);
- }//if
- }//if
- continue;
- }//if
- FragmentstorePtr fragPtr;
- getFragstore(tabPtr.p, fragId, fragPtr);
- ReplicaRecordPtr loopReplicaPtr;
- loopReplicaPtr.i = fragPtr.p->oldStoredReplicas;
- while (loopReplicaPtr.i != RNIL) {
- ptrCheckGuard(loopReplicaPtr, creplicaFileSize, replicaRecord);
- if (loopReplicaPtr.p->procNode == takeOverPtr.p->toFailedNode) {
- jam();
- /* ----------------------------------------------------------------- */
- /* WE HAVE FOUND A REPLICA THAT BELONGED THE FAILED NODE THAT NEEDS */
- /* TAKE OVER. WE TAKE OVER THIS REPLICA TO THE NEW NODE. */
- /* ----------------------------------------------------------------- */
- takeOverPtr.p->toCurrentReplica = loopReplicaPtr.i;
- toCopyFragLab(signal, takeOverPtr.i);
- return;
- } else if (loopReplicaPtr.p->procNode == takeOverPtr.p->toStartingNode) {
- jam();
- /* ----------------------------------------------------------------- */
- /* WE HAVE OBVIOUSLY STARTED TAKING OVER THIS WITHOUT COMPLETING IT. */
- /* WE */
- /* NEED TO COMPLETE THE TAKE OVER OF THIS REPLICA. */
- /* ----------------------------------------------------------------- */
- takeOverPtr.p->toCurrentReplica = loopReplicaPtr.i;
- toCopyFragLab(signal, takeOverPtr.i);
- return;
- } else {
- jam();
- loopReplicaPtr.i = loopReplicaPtr.p->nextReplica;
- }//if
- }//while
- takeOverPtr.p->toCurrentFragid++;
- }//while
- signal->theData[0] = DihContinueB::ZTO_START_COPY_FRAG;
- signal->theData[1] = takeOverPtr.i;
- sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
- }//Dbdih::startNextCopyFragment()
- void Dbdih::toCopyFragLab(Signal* signal,
- Uint32 takeOverPtrI)
- {
- TakeOverRecordPtr takeOverPtr;
- RETURN_IF_TAKE_OVER_INTERRUPTED(takeOverPtrI, takeOverPtr);
- CreateReplicaRecordPtr createReplicaPtr;
- createReplicaPtr.i = 0;
- ptrAss(createReplicaPtr, createReplicaRecord);
- ReplicaRecordPtr replicaPtr;
- replicaPtr.i = takeOverPtr.p->toCurrentReplica;
- ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
- TabRecordPtr tabPtr;
- tabPtr.i = takeOverPtr.p->toCurrentTabref;
- ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
- /* ----------------------------------------------------------------------- */
- /* WE HAVE FOUND A REPLICA THAT NEEDS TAKE OVER. WE WILL START THIS TAKE */
- /* OVER BY ADDING THE FRAGMENT WHEREAFTER WE WILL ORDER THE PRIMARY */
- /* REPLICA TO COPY ITS CONTENT TO THE NEW STARTING REPLICA. */
- /* THIS OPERATION IS A SINGLE USER OPERATION UNTIL WE HAVE SENT */
- /* COPY_FRAGREQ. AFTER SENDING COPY_FRAGREQ WE ARE READY TO START A NEW */
- /* FRAGMENT REPLICA. WE WILL NOT IMPLEMENT THIS IN THE FIRST PHASE. */
- /* ----------------------------------------------------------------------- */
- cnoOfCreateReplicas = 1;
- createReplicaPtr.p->hotSpareUse = true;
- createReplicaPtr.p->dataNodeId = takeOverPtr.p->toStartingNode;
- prepareSendCreateFragReq(signal, takeOverPtrI);
- }//Dbdih::toCopyFragLab()
- void Dbdih::prepareSendCreateFragReq(Signal* signal, Uint32 takeOverPtrI)
- {
- TakeOverRecordPtr takeOverPtr;
- RETURN_IF_TAKE_OVER_INTERRUPTED(takeOverPtrI, takeOverPtr);
- TabRecordPtr tabPtr;
- tabPtr.i = takeOverPtr.p->toCurrentTabref;
- ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
- FragmentstorePtr fragPtr;
- getFragstore(tabPtr.p, takeOverPtr.p->toCurrentFragid, fragPtr);
- Uint32 nodes[MAX_REPLICAS];
- extractNodeInfo(fragPtr.p, nodes);
- takeOverPtr.p->toCopyNode = nodes[0];
- sendCreateFragReq(signal, 0, CreateFragReq::STORED, takeOverPtr.i);
- }//Dbdih::prepareSendCreateFragReq()
- void Dbdih::sendCreateFragReq(Signal* signal,
- Uint32 startGci,
- Uint32 replicaType,
- Uint32 takeOverPtrI)
- {
- TakeOverRecordPtr takeOverPtr;
- RETURN_IF_TAKE_OVER_INTERRUPTED(takeOverPtrI, takeOverPtr);
- if ((c_createFragmentLock != RNIL) ||
- ((ERROR_INSERTED(7161))&&(replicaType == CreateFragReq::STORED)) ||
- ((ERROR_INSERTED(7162))&&(replicaType == CreateFragReq::COMMIT_STORED))){
- if (replicaType == CreateFragReq::STORED) {
- jam();
- takeOverPtr.p->toMasterStatus = TakeOverRecord::TO_WAIT_PREPARE_CREATE;
- } else {
- ndbrequire(replicaType == CreateFragReq::COMMIT_STORED);
- jam();
- takeOverPtr.p->toMasterStatus = TakeOverRecord::TO_WAIT_COMMIT_CREATE;
- }//if
- signal->theData[0] = DihContinueB::ZSEND_CREATE_FRAG;
- signal->theData[1] = takeOverPtr.i;
- signal->theData[2] = replicaType;
- signal->theData[3] = startGci;
- signal->theData[4] = takeOverPtr.p->toStartingNode;
- signal->theData[5] = takeOverPtr.p->toFailedNode;
- sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 50, 6);
- return;
- }//if
- c_createFragmentLock = takeOverPtr.i;
- sendLoopMacro(CREATE_FRAGREQ, nullRoutine);
- CreateFragReq * const req = (CreateFragReq *)&signal->theData[0];
- req->userPtr = takeOverPtr.i;
- req->userRef = reference();
- req->tableId = takeOverPtr.p->toCurrentTabref;
- req->fragId = takeOverPtr.p->toCurrentFragid;
- req->startingNodeId = takeOverPtr.p->toStartingNode;
- req->copyNodeId = takeOverPtr.p->toCopyNode;
- req->startGci = startGci;
- req->replicaType = replicaType;
- NodeRecordPtr nodePtr;
- nodePtr.i = cfirstAliveNode;
- do {
- ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
- BlockReference ref = calcDihBlockRef(nodePtr.i);
- sendSignal(ref, GSN_CREATE_FRAGREQ, signal,
- CreateFragReq::SignalLength, JBB);
- nodePtr.i = nodePtr.p->nextNode;
- } while (nodePtr.i != RNIL);
- if (replicaType == CreateFragReq::STORED) {
- jam();
- takeOverPtr.p->toMasterStatus = TakeOverRecord::PREPARE_CREATE;
- } else {
- ndbrequire(replicaType == CreateFragReq::COMMIT_STORED);
- jam();
- takeOverPtr.p->toMasterStatus = TakeOverRecord::COMMIT_CREATE;
- }
- }//Dbdih::sendCreateFragReq()
- /* --------------------------------------------------------------------------*/
- /* AN ORDER TO START OR COMMIT THE REPLICA CREATION ARRIVED FROM THE */
- /* MASTER. */
- /* --------------------------------------------------------------------------*/
- void Dbdih::execCREATE_FRAGREQ(Signal* signal)
- {
- jamEntry();
- CreateFragReq * const req = (CreateFragReq *)&signal->theData[0];
- TakeOverRecordPtr takeOverPtr;
- takeOverPtr.i = req->userPtr;
- ptrCheckGuard(takeOverPtr, MAX_NDB_NODES, takeOverRecord);
- BlockReference retRef = req->userRef;
- TabRecordPtr tabPtr;
- tabPtr.i = req->tableId;
- ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
- Uint32 fragId = req->fragId;
- Uint32 tdestNodeid = req->startingNodeId;
- Uint32 tsourceNodeid = req->copyNodeId;
- Uint32 startGci = req->startGci;
- Uint32 replicaType = req->replicaType;
- FragmentstorePtr fragPtr;
- getFragstore(tabPtr.p, fragId, fragPtr);
- RETURN_IF_NODE_NOT_ALIVE(tdestNodeid);
- ReplicaRecordPtr frReplicaPtr;
- findToReplica(takeOverPtr.p, replicaType, fragPtr, frReplicaPtr);
- ndbrequire(frReplicaPtr.i != RNIL);
- switch (replicaType) {
- case CreateFragReq::STORED:
- jam();
- CRASH_INSERTION(7138);
- /* ----------------------------------------------------------------------*/
- /* HERE WE ARE INSERTING THE NEW BACKUP NODE IN THE EXECUTION OF ALL */
- /* OPERATIONS. FROM HERE ON ALL OPERATIONS ON THIS FRAGMENT WILL INCLUDE*/
- /* USE OF THE NEW REPLICA. */
- /* --------------------------------------------------------------------- */
- insertBackup(fragPtr, tdestNodeid);
- takeOverPtr.p->toCopyNode = tsourceNodeid;
- takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_SLAVE_CREATE_PREPARE;
-
- fragPtr.p->distributionKey++;
- fragPtr.p->distributionKey &= 255;
- break;
- case CreateFragReq::COMMIT_STORED:
- jam();
- CRASH_INSERTION(7139);
- /* ----------------------------------------------------------------------*/
- /* HERE WE ARE MOVING THE REPLICA TO THE STORED SECTION SINCE IT IS NOW */
- /* FULLY LOADED WITH ALL DATA NEEDED. */
- // We also update the order of the replicas here so that if the new
- // replica is the desired primary we insert it as primary.
- /* ----------------------------------------------------------------------*/
- takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_SLAVE_CREATE_COMMIT;
- removeOldStoredReplica(fragPtr, frReplicaPtr);
- linkStoredReplica(fragPtr, frReplicaPtr);
- updateNodeInfo(fragPtr);
- break;
- default:
- ndbrequire(false);
- break;
- }//switch
- /* ------------------------------------------------------------------------*/
- /* THE NEW NODE OF THIS REPLICA IS THE STARTING NODE. */
- /* ------------------------------------------------------------------------*/
- if (frReplicaPtr.p->procNode != takeOverPtr.p->toStartingNode) {
- jam();
- /* ---------------------------------------------------------------------*/
- /* IF WE ARE STARTING A TAKE OVER NODE WE MUST INVALIDATE ALL LCP'S. */
- /* OTHERWISE WE WILL TRY TO START LCP'S THAT DO NOT EXIST. */
- /* ---------------------------------------------------------------------*/
- frReplicaPtr.p->procNode = takeOverPtr.p->toStartingNode;
- frReplicaPtr.p->noCrashedReplicas = 0;
- frReplicaPtr.p->createGci[0] = startGci;
- ndbrequire(startGci != 0xF1F1F1F1);
- frReplicaPtr.p->replicaLastGci[0] = (Uint32)-1;
- for (Uint32 i = 0; i < MAX_LCP_STORED; i++) {
- frReplicaPtr.p->lcpStatus[i] = ZINVALID;
- }//for
- } else {
- jam();
- const Uint32 noCrashed = frReplicaPtr.p->noCrashedReplicas;
- arrGuard(noCrashed, 8);
- frReplicaPtr.p->createGci[noCrashed] = startGci;
- ndbrequire(startGci != 0xF1F1F1F1);
- frReplicaPtr.p->replicaLastGci[noCrashed] = (Uint32)-1;
- }//if
- takeOverPtr.p->toCurrentTabref = tabPtr.i;
- takeOverPtr.p->toCurrentFragid = fragId;
- CreateFragConf * const conf = (CreateFragConf *)&signal->theData[0];
- conf->userPtr = takeOverPtr.i;
- conf->tableId = tabPtr.i;
- conf->fragId = fragId;
- conf->sendingNodeId = cownNodeId;
- conf->startingNodeId = tdestNodeid;
- sendSignal(retRef, GSN_CREATE_FRAGCONF, signal,
- CreateFragConf::SignalLength, JBB);
- }//Dbdih::execCREATE_FRAGREQ()
- void Dbdih::execCREATE_FRAGCONF(Signal* signal)
- {
- jamEntry();
- CRASH_INSERTION(7148);
- const CreateFragConf * const conf = (CreateFragConf *)&signal->theData[0];
- Uint32 fragId = conf->fragId;
- RETURN_IF_NODE_NOT_ALIVE(conf->startingNodeId);
- TabRecordPtr tabPtr;
- tabPtr.i = conf->tableId;
- ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
- TakeOverRecordPtr takeOverPtr;
- takeOverPtr.i = conf->userPtr;
- ptrCheckGuard(takeOverPtr, MAX_NDB_NODES, takeOverRecord);
- ndbrequire(tabPtr.i == takeOverPtr.p->toCurrentTabref);
- ndbrequire(fragId == takeOverPtr.p->toCurrentFragid);
- receiveLoopMacro(CREATE_FRAGREQ, conf->sendingNodeId);
- c_createFragmentLock = RNIL;
- if (takeOverPtr.p->toMasterStatus == TakeOverRecord::PREPARE_CREATE) {
- jam();
- CRASH_INSERTION(7140);
- /* --------------------------------------------------------------------- */
- /* ALL NODES HAVE PREPARED THE INTRODUCTION OF THIS NEW NODE AND IT IS */
- /* ALREADY IN USE. WE CAN NOW START COPYING THE FRAGMENT. */
- /*---------------------------------------------------------------------- */
- FragmentstorePtr fragPtr;
- getFragstore(tabPtr.p, fragId, fragPtr);
- takeOverPtr.p->toMasterStatus = TakeOverRecord::COPY_FRAG;
- BlockReference ref = calcLqhBlockRef(takeOverPtr.p->toCopyNode);
- CopyFragReq * const copyFragReq = (CopyFragReq *)&signal->theData[0];
- copyFragReq->userPtr = takeOverPtr.i;
- copyFragReq->userRef = reference();
- copyFragReq->tableId = tabPtr.i;
- copyFragReq->fragId = fragId;
- copyFragReq->nodeId = takeOverPtr.p->toStartingNode;
- copyFragReq->schemaVersion = tabPtr.p->schemaVersion;
- copyFragReq->distributionKey = fragPtr.p->distributionKey;
- sendSignal(ref, GSN_COPY_FRAGREQ, signal, CopyFragReq::SignalLength, JBB);
- } else {
- ndbrequire(takeOverPtr.p->toMasterStatus == TakeOverRecord::COMMIT_CREATE);
- jam();
- CRASH_INSERTION(7141);
- /* --------------------------------------------------------------------- */
- // REPORT that copy of fragment has been completed.
- /* --------------------------------------------------------------------- */
- signal->theData[0] = EventReport::NR_CopyFragDone;
- signal->theData[1] = takeOverPtr.p->toStartingNode;
- signal->theData[2] = tabPtr.i;
- signal->theData[3] = takeOverPtr.p->toCurrentFragid;
- sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
- /* --------------------------------------------------------------------- */
- /* WE HAVE NOW CREATED THIS NEW REPLICA AND WE ARE READY TO TAKE THE */
- /* THE NEXT REPLICA. */
- /* --------------------------------------------------------------------- */
- Mutex mutex(signal, c_mutexMgr, takeOverPtr.p->m_switchPrimaryMutexHandle);
- mutex.unlock(); // ignore result
- takeOverPtr.p->toCurrentFragid++;
- startNextCopyFragment(signal, takeOverPtr.i);
- }//if
- }//Dbdih::execCREATE_FRAGCONF()
- void Dbdih::execCOPY_FRAGREF(Signal* signal)
- {
- const CopyFragRef * const ref = (CopyFragRef *)&signal->theData[0];
- jamEntry();
- Uint32 takeOverPtrI = ref->userPtr;
- Uint32 startingNodeId = ref->startingNodeId;
- Uint32 errorCode = ref->errorCode;
- TakeOverRecordPtr takeOverPtr;
- RETURN_IF_TAKE_OVER_INTERRUPTED(takeOverPtrI, takeOverPtr);
- ndbrequire(errorCode != ZNODE_FAILURE_ERROR);
- ndbrequire(ref->tableId == takeOverPtr.p->toCurrentTabref);
- ndbrequire(ref->fragId == takeOverPtr.p->toCurrentFragid);
- ndbrequire(ref->startingNodeId == takeOverPtr.p->toStartingNode);
- ndbrequire(ref->sendingNodeId == takeOverPtr.p->toCopyNode);
- ndbrequire(takeOverPtr.p->toMasterStatus == TakeOverRecord::COPY_FRAG);
- endTakeOver(takeOverPtrI);
- //--------------------------------------------------------------------------
- // For some reason we did not succeed in copying a fragment. We treat this
- // as a serious failure and crash the starting node.
- //--------------------------------------------------------------------------
- BlockReference cntrRef = calcNdbCntrBlockRef(startingNodeId);
- SystemError * const sysErr = (SystemError*)&signal->theData[0];
- sysErr->errorCode = SystemError::CopyFragRefError;
- sysErr->errorRef = reference();
- sysErr->data1 = errorCode;
- sysErr->data2 = 0;
- sendSignal(cntrRef, GSN_SYSTEM_ERROR, signal,
- SystemError::SignalLength, JBB);
- return;
- }//Dbdih::execCOPY_FRAGREF()
- void Dbdih::execCOPY_FRAGCONF(Signal* signal)
- {
- const CopyFragConf * const conf = (CopyFragConf *)&signal->theData[0];
- jamEntry();
- CRASH_INSERTION(7142);
- TakeOverRecordPtr takeOverPtr;
- Uint32 takeOverPtrI = conf->userPtr;
- RETURN_IF_TAKE_OVER_INTERRUPTED(takeOverPtrI, takeOverPtr);
- ndbrequire(conf->tableId == takeOverPtr.p->toCurrentTabref);
- ndbrequire(conf->fragId == takeOverPtr.p->toCurrentFragid);
- ndbrequire(conf->startingNodeId == takeOverPtr.p->toStartingNode);
- ndbrequire(conf->sendingNodeId == takeOverPtr.p->toCopyNode);
- ndbrequire(takeOverPtr.p->toMasterStatus == TakeOverRecord::COPY_FRAG);
- sendUpdateTo(signal, takeOverPtr.i,
- (Uint32)UpdateToReq::TO_COPY_FRAG_COMPLETED);
- }//Dbdih::execCOPY_FRAGCONF()
- void Dbdih::sendUpdateTo(Signal* signal,
- Uint32 takeOverPtrI, Uint32 updateState)
- {
- TakeOverRecordPtr takeOverPtr;
- RETURN_IF_TAKE_OVER_INTERRUPTED(takeOverPtrI, takeOverPtr);
- if ((c_updateToLock != RNIL) ||
- ((ERROR_INSERTED(7163)) &&
- (updateState == UpdateToReq::TO_COPY_FRAG_COMPLETED)) ||
- ((ERROR_INSERTED(7169)) &&
- (updateState == UpdateToReq::TO_COPY_COMPLETED))) {
- jam();
- takeOverPtr.p->toMasterStatus = TakeOverRecord::TO_WAIT_UPDATE_TO;
- signal->theData[0] = DihContinueB::ZSEND_UPDATE_TO;
- signal->theData[1] = takeOverPtrI;
- signal->theData[2] = takeOverPtr.p->toStartingNode;
- signal->theData[3] = takeOverPtr.p->toFailedNode;
- signal->theData[4] = updateState;
- sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 30, 5);
- return;
- }//if
- c_updateToLock = takeOverPtrI;
- if (updateState == UpdateToReq::TO_COPY_FRAG_COMPLETED) {
- jam();
- takeOverPtr.p->toMasterStatus = TakeOverRecord::TO_UPDATE_TO;
- } else {
- jam();
- ndbrequire(updateState == UpdateToReq::TO_COPY_COMPLETED);
- takeOverPtr.p->toMasterStatus = TakeOverRecord::TO_COPY_COMPLETED;
- }//if
- UpdateToReq * const req = (UpdateToReq *)&signal->theData[0];
- req->userPtr = takeOverPtr.i;
- req->userRef = reference();
- req->updateState = (UpdateToReq::UpdateState)updateState;
- req->startingNodeId = takeOverPtr.p->toStartingNode;
- req->tableId = takeOverPtr.p->toCurrentTabref;
- req->fragmentNo = takeOverPtr.p->toCurrentFragid;
- sendLoopMacro(UPDATE_TOREQ, sendUPDATE_TOREQ);
- }//Dbdih::sendUpdateTo()
- void Dbdih::execUPDATE_TOREQ(Signal* signal)
- {
- jamEntry();
- const UpdateToReq * const req = (UpdateToReq *)&signal->theData[0];
- BlockReference ref = req->userRef;
- ndbrequire(cmasterdihref == ref);
- CRASH_INSERTION(7154);
- RETURN_IF_NODE_NOT_ALIVE(req->startingNodeId);
- TakeOverRecordPtr takeOverPtr;
- takeOverPtr.i = req->userPtr;
- ptrCheckGuard(takeOverPtr, MAX_NDB_NODES, takeOverRecord);
- ndbrequire(req->startingNodeId == takeOverPtr.p->toStartingNode);
- if (req->updateState == UpdateToReq::TO_COPY_FRAG_COMPLETED) {
- jam();
- ndbrequire(takeOverPtr.p->toSlaveStatus == TakeOverRecord::TO_SLAVE_CREATE_PREPARE);
- takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_SLAVE_COPY_FRAG_COMPLETED;
- takeOverPtr.p->toCurrentTabref = req->tableId;
- takeOverPtr.p->toCurrentFragid = req->fragmentNo;
- } else {
- jam();
- ndbrequire(req->updateState == UpdateToReq::TO_COPY_COMPLETED);
- takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_SLAVE_COPY_COMPLETED;
- setNodeCopyCompleted(takeOverPtr.p->toStartingNode, true);
- }//if
- UpdateToConf * const conf = (UpdateToConf *)&signal->theData[0];
- conf->userPtr = takeOverPtr.i;
- conf->sendingNodeId = cownNodeId;
- conf->startingNodeId = takeOverPtr.p->toStartingNode;
- sendSignal(ref, GSN_UPDATE_TOCONF, signal, UpdateToConf::SignalLength, JBB);
- }//Dbdih::execUPDATE_TOREQ()
- void Dbdih::execUPDATE_TOCONF(Signal* signal)
- {
- const UpdateToConf * const conf = (UpdateToConf *)&signal->theData[0];
- CRASH_INSERTION(7152);
- RETURN_IF_NODE_NOT_ALIVE(conf->startingNodeId);
- TakeOverRecordPtr takeOverPtr;
- takeOverPtr.i = conf->userPtr;
- ptrCheckGuard(takeOverPtr, MAX_NDB_NODES, takeOverRecord);
- receiveLoopMacro(UPDATE_TOREQ, conf->sendingNodeId);
- CRASH_INSERTION(7153);
- c_updateToLock = RNIL;
- if (takeOverPtr.p->toMasterStatus == TakeOverRecord::TO_COPY_COMPLETED) {
- jam();
- toCopyCompletedLab(signal, takeOverPtr);
- return;
- } else {
- ndbrequire(takeOverPtr.p->toMasterStatus == TakeOverRecord::TO_UPDATE_TO);
- }//if
- TabRecordPtr tabPtr;
- tabPtr.i = takeOverPtr.p->toCurrentTabref;
- ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
- FragmentstorePtr fragPtr;
- getFragstore(tabPtr.p, takeOverPtr.p->toCurrentFragid, fragPtr);
- takeOverPtr.p->toMasterStatus = TakeOverRecord::COPY_ACTIVE;
- BlockReference lqhRef = calcLqhBlockRef(takeOverPtr.p->toStartingNode);
- CopyActiveReq * const req = (CopyActiveReq *)&signal->theData[0];
- req->userPtr = takeOverPtr.i;
- req->userRef = reference();
- req->tableId = takeOverPtr.p->toCurrentTabref;
- req->fragId = takeOverPtr.p->toCurrentFragid;
- req->distributionKey = fragPtr.p->distributionKey;
- sendSignal(lqhRef, GSN_COPY_ACTIVEREQ, signal,
- CopyActiveReq::SignalLength, JBB);
- }//Dbdih::execUPDATE_TOCONF()
- void Dbdih::execCOPY_ACTIVECONF(Signal* signal)
- {
- const CopyActiveConf * const conf = (CopyActiveConf *)&signal->theData[0];
- jamEntry();
- CRASH_INSERTION(7143);
- TakeOverRecordPtr takeOverPtr;
- takeOverPtr.i = conf->userPtr;
- ptrCheckGuard(takeOverPtr, MAX_NDB_NODES, takeOverRecord);
- ndbrequire(conf->tableId == takeOverPtr.p->toCurrentTabref);
- ndbrequire(conf->fragId == takeOverPtr.p->toCurrentFragid);
- ndbrequire(checkNodeAlive(conf->startingNodeId));
- ndbrequire(takeOverPtr.p->toMasterStatus == TakeOverRecord::COPY_ACTIVE);
- takeOverPtr.p->startGci = conf->startGci;
- takeOverPtr.p->toMasterStatus = TakeOverRecord::LOCK_MUTEX;
-
- Mutex mutex(signal, c_mutexMgr, takeOverPtr.p->m_switchPrimaryMutexHandle);
- Callback c = { safe_cast(&Dbdih::switchPrimaryMutex_locked), takeOverPtr.i };
- ndbrequire(mutex.lock(c));
- }//Dbdih::execCOPY_ACTIVECONF()
- void
- Dbdih::switchPrimaryMutex_locked(Signal* signal, Uint32 toPtrI, Uint32 retVal){
- jamEntry();
- ndbrequire(retVal == 0);
- TakeOverRecordPtr takeOverPtr;
- takeOverPtr.i = toPtrI;
- ptrCheckGuard(takeOverPtr, MAX_NDB_NODES, takeOverRecord);
- ndbrequire(takeOverPtr.p->toMasterStatus == TakeOverRecord::LOCK_MUTEX);
-
- if (!checkNodeAlive((takeOverPtr.p->toStartingNode))) {
- // We have mutex
- Mutex mutex(signal, c_mutexMgr, takeOverPtr.p->m_switchPrimaryMutexHandle);
- mutex.unlock(); // Ignore result
-
- c_createFragmentLock = RNIL;
- c_CREATE_FRAGREQ_Counter.clearWaitingFor();
- endTakeOver(takeOverPtr.i);
- return;
- }
-
- takeOverPtr.p->toMasterStatus = TakeOverRecord::COMMIT_CREATE;
- sendCreateFragReq(signal, takeOverPtr.p->startGci,
- CreateFragReq::COMMIT_STORED, takeOverPtr.i);
- }
- void Dbdih::toCopyCompletedLab(Signal * signal, TakeOverRecordPtr takeOverPtr)
- {
- signal->theData[0] = EventReport::NR_CopyFragsCompleted;
- signal->theData[1] = takeOverPtr.p->toStartingNode;
- sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
- c_lcpState.immediateLcpStart = true;
- takeOverPtr.p->toMasterStatus = TakeOverRecord::WAIT_LCP;
-
- /*-----------------------------------------------------------------------*/
- /* NOW WE CAN ALLOW THE NEW NODE TO PARTICIPATE IN LOCAL CHECKPOINTS. */
- /* WHEN THE FIRST LOCAL CHECKPOINT IS READY WE DECLARE THE TAKE OVER AS */
- /* COMPLETED. SINCE LOCAL CHECKPOINTS HAVE BEEN BLOCKED DURING THE COPY */
- /* PROCESS WE MUST ALSO START A NEW LOCAL CHECKPOINT PROCESS BY ENSURING */
- /* THAT IT LOOKS LIKE IT IS TIME FOR A NEW LOCAL CHECKPOINT AND BY */
- /* UNBLOCKING THE LOCAL CHECKPOINT AGAIN. */
- /* --------------------------------------------------------------------- */
- }//Dbdih::toCopyCompletedLab()
- void Dbdih::sendEndTo(Signal* signal, Uint32 takeOverPtrI)
- {
- TakeOverRecordPtr takeOverPtr;
- CRASH_INSERTION(7156);
- RETURN_IF_TAKE_OVER_INTERRUPTED(takeOverPtrI, takeOverPtr);
- if ((c_endToLock != RNIL) || (ERROR_INSERTED(7164))) {
- jam();
- takeOverPtr.p->toMasterStatus = TakeOverRecord::TO_WAIT_ENDING;
- signal->theData[0] = DihContinueB::ZSEND_END_TO;
- signal->theData[1] = takeOverPtrI;
- signal->theData[2] = takeOverPtr.p->toStartingNode;
- signal->theData[3] = takeOverPtr.p->toFailedNode;
- sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 30, 4);
- return;
- }//if
- c_endToLock = takeOverPtr.i;
- takeOverPtr.p->toMasterStatus = TakeOverRecord::ENDING;
- EndToReq * const req = (EndToReq *)&signal->theData[0];
- req->userPtr = takeOverPtr.i;
- req->userRef = reference();
- req->startingNodeId = takeOverPtr.p->toStartingNode;
- sendLoopMacro(END_TOREQ, sendEND_TOREQ);
- }//Dbdih::sendStartTo()
- void Dbdih::execEND_TOREQ(Signal* signal)
- {
- jamEntry();
- const EndToReq * const req = (EndToReq *)&signal->theData[0];
- BlockReference ref = req->userRef;
- Uint32 startingNodeId = req->startingNodeId;
- CRASH_INSERTION(7144);
- RETURN_IF_NODE_NOT_ALIVE(startingNodeId);
- TakeOverRecordPtr takeOverPtr;
- takeOverPtr.i = req->userPtr;
- ptrCheckGuard(takeOverPtr, MAX_NDB_NODES, takeOverRecord);
- ndbrequire(startingNodeId == takeOverPtr.p->toStartingNode);
- takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_SLAVE_IDLE;
-
- if (!isMaster()) {
- jam();
- endTakeOver(takeOverPtr.i);
- }//if
- EndToConf * const conf = (EndToConf *)&signal->theData[0];
- conf->userPtr = takeOverPtr.i;
- conf->sendingNodeId = cownNodeId;
- conf->startingNodeId = startingNodeId;
- sendSignal(ref, GSN_END_TOCONF, signal, EndToConf::SignalLength, JBB);
- }//Dbdih::execEND_TOREQ()
- void Dbdih::execEND_TOCONF(Signal* signal)
- {
- const EndToConf * const conf = (EndToConf *)&signal->theData[0];
- jamEntry();
- const Uint32 nodeId = conf->startingNodeId;
- CRASH_INSERTION(7145);
- RETURN_IF_NODE_NOT_ALIVE(nodeId);
- TakeOverRecordPtr takeOverPtr;
- takeOverPtr.i = conf->userPtr;
- ptrCheckGuard(takeOverPtr, MAX_NDB_NODES, takeOverRecord);
- ndbrequire(takeOverPtr.p->toMasterStatus == TakeOverRecord::ENDING);
- ndbrequire(nodeId == takeOverPtr.p->toStartingNode);
- receiveLoopMacro(END_TOREQ, conf->sendingNodeId);
- CRASH_INSERTION(7146);
- c_endToLock = RNIL;
- /* -----------------------------------------------------------------------*/
- /* WE HAVE FINALLY COMPLETED THE TAKE OVER. WE RESET THE STATUS AND CHECK*/
- /* IF ANY MORE TAKE OVERS ARE NEEDED AT THE MOMENT. */
- /* FIRST WE CHECK IF A RESTART IS ONGOING. IN THAT CASE WE RESTART PHASE */
- /* 4 AND CHECK IF ANY MORE TAKE OVERS ARE NEEDED BEFORE WE START NDB */
- /* CLUSTER. THIS CAN ONLY HAPPEN IN A SYSTEM RESTART. */
- /* ---------------------------------------------------------------------- */
- if (takeOverPtr.p->toNodeRestart) {
- jam();
- /* ----------------------------------------------------------------------*/
- /* THE TAKE OVER NODE WAS A STARTING NODE. WE WILL SEND START_COPYCONF */
- /* TO THE STARTING NODE SUCH THAT THE NODE CAN COMPLETE THE START-UP. */
- /* --------------------------------------------------------------------- */
- BlockReference ref = calcDihBlockRef(takeOverPtr.p->toStartingNode);
- signal->theData[0] = takeOverPtr.p->toStartingNode;
- sendSignal(ref, GSN_START_COPYCONF, signal, 1,JBB);
- }//if
- endTakeOver(takeOverPtr.i);
- ndbout_c("2 - endTakeOver");
- if (cstartPhase == ZNDB_SPH4) {
- jam();
- ndbrequire(false);
- if (anyActiveTakeOver()) {
- jam();
- ndbout_c("4 - anyActiveTakeOver == true");
- return;
- }//if
- ndbout_c("5 - anyActiveTakeOver == false -> ndbsttorry10Lab");
- ndbsttorry10Lab(signal, __LINE__);
- return;
- }//if
- checkStartTakeOver(signal);
- }//Dbdih::execEND_TOCONF()
- void Dbdih::allocateTakeOver(TakeOverRecordPtr& takeOverPtr)
- {
- if (isMaster()) {
- jam();
- //--------------------------------------------
- // Master already seized the take over record.
- //--------------------------------------------
- return;
- }//if
- if (takeOverPtr.i == cfirstfreeTakeOver) {
- jam();
- seizeTakeOver(takeOverPtr);
- } else {
- TakeOverRecordPtr nextTakeOverptr;
- TakeOverRecordPtr prevTakeOverptr;
- nextTakeOverptr.i = takeOverPtr.p->nextTakeOver;
- prevTakeOverptr.i = takeOverPtr.p->prevTakeOver;
- if (prevTakeOverptr.i != RNIL) {
- jam();
- ptrCheckGuard(prevTakeOverptr, MAX_NDB_NODES, takeOverRecord);
- prevTakeOverptr.p->nextTakeOver = nextTakeOverptr.i;
- }//if
- if (nextTakeOverptr.i != RNIL) {
- jam();
- ptrCheckGuard(nextTakeOverptr, MAX_NDB_NODES, takeOverRecord);
- nextTakeOverptr.p->prevTakeOver = prevTakeOverptr.i;
- }//if
- }//if
- }//Dbdih::allocateTakeOver()
- void Dbdih::seizeTakeOver(TakeOverRecordPtr& takeOverPtr)
- {
- TakeOverRecordPtr nextTakeOverptr;
- ndbrequire(cfirstfreeTakeOver != RNIL);
- takeOverPtr.i = cfirstfreeTakeOver;
- ptrCheckGuard(takeOverPtr, MAX_NDB_NODES, takeOverRecord);
- cfirstfreeTakeOver = takeOverPtr.p->nextTakeOver;
- nextTakeOverptr.i = takeOverPtr.p->nextTakeOver;
- if (nextTakeOverptr.i != RNIL) {
- jam();
- ptrCheckGuard(nextTakeOverptr, MAX_NDB_NODES, takeOverRecord);
- nextTakeOverptr.p->prevTakeOver = RNIL;
- }//if
- takeOverPtr.p->nextTakeOver = RNIL;
- takeOverPtr.p->prevTakeOver = RNIL;
- }//Dbdih::seizeTakeOver()
- void Dbdih::endTakeOver(Uint32 takeOverPtrI)
- {
- TakeOverRecordPtr takeOverPtr;
- takeOverPtr.i = takeOverPtrI;
- ptrCheckGuard(takeOverPtr, MAX_NDB_NODES, takeOverRecord);
- releaseTakeOver(takeOverPtrI);
- if ((takeOverPtr.p->toMasterStatus != TakeOverRecord::IDLE) &&
- (takeOverPtr.p->toMasterStatus != TakeOverRecord::TO_WAIT_START_TAKE_OVER)) {
- jam();
- NodeGroupRecordPtr NGPtr;
- NodeRecordPtr nodePtr;
- nodePtr.i = takeOverPtr.p->toStartingNode;
- ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
- NGPtr.i = nodePtr.p->nodeGroup;
- ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);
- NGPtr.p->activeTakeOver = false;
- }//if
- setAllowNodeStart(takeOverPtr.p->toStartingNode, true);
- initTakeOver(takeOverPtr);
- }//Dbdih::endTakeOver()
- void Dbdih::releaseTakeOver(Uint32 takeOverPtrI)
- {
- TakeOverRecordPtr takeOverPtr;
- takeOverPtr.i = takeOverPtrI;
- ptrCheckGuard(takeOverPtr, MAX_NDB_NODES, takeOverRecord);
- takeOverPtr.p->nextTakeOver = cfirstfreeTakeOver;
- cfirstfreeTakeOver = takeOverPtr.i;
- }//Dbdih::releaseTakeOver()
- void Dbdih::initTakeOver(TakeOverRecordPtr takeOverPtr)
- {
- takeOverPtr.p->toCopyNode = RNIL;
- takeOverPtr.p->toCurrentFragid = RNIL;
- takeOverPtr.p->toCurrentReplica = RNIL;
- takeOverPtr.p->toCurrentTabref = RNIL;
- takeOverPtr.p->toFailedNode = RNIL;
- takeOverPtr.p->toStartingNode = RNIL;
- takeOverPtr.p->prevTakeOver = RNIL;
- takeOverPtr.p->nextTakeOver = RNIL;
- takeOverPtr.p->toNodeRestart = false;
- takeOverPtr.p->toMasterStatus = TakeOverRecord::IDLE;
- takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_SLAVE_IDLE;
- }//Dbdih::initTakeOver()
- bool Dbdih::anyActiveTakeOver()
- {
- TakeOverRecordPtr takeOverPtr;
- for (takeOverPtr.i = 0; takeOverPtr.i < MAX_NDB_NODES; takeOverPtr.i++) {
- ptrAss(takeOverPtr, takeOverRecord);
- if (takeOverPtr.p->toMasterStatus != TakeOverRecord::IDLE) {
- jam();
- return true;
- }//if
- }//for
- return false;
- }//Dbdih::anyActiveTakeOver()
- /*****************************************************************************/
- /* ------------------------------------------------------------------------- */
- /* WE HAVE BEEN REQUESTED TO PERFORM A SYSTEM RESTART. WE START BY */
- /* READING THE GCI FILES. THIS REQUEST WILL ONLY BE SENT TO THE MASTER */
- /* DIH. THAT MEANS WE HAVE TO REPLICATE THE INFORMATION WE READ FROM */
- /* OUR FILES TO ENSURE THAT ALL NODES HAVE THE SAME DISTRIBUTION */
- /* INFORMATION. */
- /* ------------------------------------------------------------------------- */
- /*****************************************************************************/
- void Dbdih::readGciFileLab(Signal* signal)
- {
- FileRecordPtr filePtr;
- filePtr.i = crestartInfoFile[0];
- ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
- filePtr.p->reqStatus = FileRecord::OPENING_GCP;
- openFileRo(signal, filePtr);
- }//Dbdih::readGciFileLab()
- void Dbdih::openingGcpLab(Signal* signal, FileRecordPtr filePtr)
- {
- /* ----------------------------------------------------------------------- */
- /* WE HAVE SUCCESSFULLY OPENED A FILE CONTAINING INFORMATION ABOUT */
- /* THE GLOBAL CHECKPOINTS THAT ARE POSSIBLE TO RESTART. */
- /* ----------------------------------------------------------------------- */
- readRestorableGci(signal, filePtr);
- filePtr.p->reqStatus = FileRecord::READING_GCP;
- }//Dbdih::openingGcpLab()
- void Dbdih::readingGcpLab(Signal* signal, FileRecordPtr filePtr)
- {
- /* ----------------------------------------------------------------------- */
- /* WE HAVE NOW SUCCESSFULLY MANAGED TO READ IN THE GLOBAL CHECKPOINT */
- /* INFORMATION FROM FILE. LATER WE WILL ADD SOME FUNCTIONALITY THAT */
- /* CHECKS THE RESTART TIMERS TO DEDUCE FROM WHERE TO RESTART. */
- /* NOW WE WILL SIMPLY RESTART FROM THE NEWEST GLOBAL CHECKPOINT */
- /* POSSIBLE TO RESTORE. */
- /* */
- /* BEFORE WE INVOKE DICT WE NEED TO COPY CRESTART_INFO TO ALL NODES. */
- /* WE ALSO COPY TO OUR OWN NODE. TO ENABLE US TO DO THIS PROPERLY WE */
- /* START BY CLOSING THIS FILE. */
- /* ----------------------------------------------------------------------- */
- closeFile(signal, filePtr);
- filePtr.p->reqStatus = FileRecord::CLOSING_GCP;
- }//Dbdih::readingGcpLab()
- void Dbdih::closingGcpLab(Signal* signal, FileRecordPtr filePtr)
- {
- if (Sysfile::getInitialStartOngoing(SYSFILE->systemRestartBits) == false){
- jam();
- selectMasterCandidateAndSend(signal);
- return;
- } else {
- jam();
- sendSignal(cntrlblockref, GSN_DIH_RESTARTREF, signal, 1, JBB);
- return;
- }//if
- }//Dbdih::closingGcpLab()
- /* ------------------------------------------------------------------------- */
- /* SELECT THE MASTER CANDIDATE TO BE USED IN SYSTEM RESTARTS. */
- /* ------------------------------------------------------------------------- */
- void Dbdih::selectMasterCandidateAndSend(Signal* signal)
- {
- Uint32 gci = 0;
- Uint32 masterCandidateId = 0;
- NodeRecordPtr nodePtr;
- for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
- jam();
- ptrAss(nodePtr, nodeRecord);
- if (SYSFILE->lastCompletedGCI[nodePtr.i] > gci) {
- jam();
- masterCandidateId = nodePtr.i;
- gci = SYSFILE->lastCompletedGCI[nodePtr.i];
- }//if
- }//for
- ndbrequire(masterCandidateId != 0);
- setNodeGroups();
- signal->theData[0] = masterCandidateId;
- signal->theData[1] = gci;
- sendSignal(cntrlblockref, GSN_DIH_RESTARTCONF, signal, 2, JBB);
- Uint32 node_groups[MAX_NDB_NODES];
- memset(node_groups, 0, sizeof(node_groups));
- for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
- jam();
- const Uint32 ng = Sysfile::getNodeGroup(nodePtr.i, SYSFILE->nodeGroups);
- if(ng != NO_NODE_GROUP_ID){
- ndbrequire(ng < MAX_NDB_NODES);
- node_groups[ng]++;
- }
- }
-
- for (nodePtr.i = 0; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
- jam();
- Uint32 count = node_groups[nodePtr.i];
- if(count != 0 && count != cnoReplicas){
- char buf[255];
- BaseString::snprintf(buf, sizeof(buf),
- "Illegal configuration change."
- " Initial start needs to be performed "
- " when changing no of replicas (%d != %d)",
- node_groups[nodePtr.i], cnoReplicas);
- progError(__LINE__,
- ERR_INVALID_CONFIG,
- buf);
- }
- }
- }//Dbdih::selectMasterCandidate()
- /* ------------------------------------------------------------------------- */
- /* ERROR HANDLING DURING READING RESTORABLE GCI FROM FILE. */
- /* ------------------------------------------------------------------------- */
- void Dbdih::openingGcpErrorLab(Signal* signal, FileRecordPtr filePtr)
- {
- filePtr.p->fileStatus = FileRecord::CRASHED;
- filePtr.p->reqStatus = FileRecord::IDLE;
- if (crestartInfoFile[0] == filePtr.i) {
- jam();
- /* --------------------------------------------------------------------- */
- /* THE FIRST FILE WAS NOT ABLE TO BE OPENED. SET STATUS TO CRASHED AND */
- /* TRY OPEN THE NEXT FILE. */
- /* --------------------------------------------------------------------- */
- filePtr.i = crestartInfoFile[1];
- ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
- openFileRo(signal, filePtr);
- filePtr.p->reqStatus = FileRecord::OPENING_GCP;
- } else {
- jam();
- /* --------------------------------------------------------------------- */
- /* WE FAILED IN OPENING THE SECOND FILE. BOTH FILES WERE CORRUPTED. WE */
- /* CANNOT CONTINUE THE RESTART IN THIS CASE. TELL NDBCNTR OF OUR */
- /* FAILURE. */
- /*---------------------------------------------------------------------- */
- sendSignal(cntrlblockref, GSN_DIH_RESTARTREF, signal, 1, JBB);
- return;
- }//if
- }//Dbdih::openingGcpErrorLab()
- void Dbdih::readingGcpErrorLab(Signal* signal, FileRecordPtr filePtr)
- {
- filePtr.p->fileStatus = FileRecord::CRASHED;
- /* ----------------------------------------------------------------------- */
- /* WE FAILED IN READING THE FILE AS WELL. WE WILL CLOSE THIS FILE. */
- /* ----------------------------------------------------------------------- */
- closeFile(signal, filePtr);
- filePtr.p->reqStatus = FileRecord::CLOSING_GCP_CRASH;
- }//Dbdih::readingGcpErrorLab()
- void Dbdih::closingGcpCrashLab(Signal* signal, FileRecordPtr filePtr)
- {
- if (crestartInfoFile[0] == filePtr.i) {