btree.c.svn-base
上传用户:sunhongbo
上传日期:2022-01-25
资源大小:3010k
文件大小:220k
- ** before or after the key.
- **
- ** The result of comparing the key with the entry to which the
- ** cursor is written to *pRes if pRes!=NULL. The meaning of
- ** this value is as follows:
- **
- ** *pRes<0 The cursor is left pointing at an entry that
- ** is smaller than pKey or if the table is empty
- ** and the cursor is therefore left point to nothing.
- **
- ** *pRes==0 The cursor is left pointing at an entry that
- ** exactly matches pKey.
- **
- ** *pRes>0 The cursor is left pointing at an entry that
- ** is larger than pKey.
- **
- */
- int sqlite3BtreeMoveto(
- BtCursor *pCur, /* The cursor to be moved */
- const void *pKey, /* The key content for indices. Not used by tables */
- UnpackedRecord *pUnKey,/* Unpacked version of pKey */
- i64 nKey, /* Size of pKey. Or the key for tables */
- int biasRight, /* If true, bias the search to the high end */
- int *pRes /* Search result flag */
- ){
- int rc;
- char aSpace[200];
- assert( cursorHoldsMutex(pCur) );
- assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) );
- /* If the cursor is already positioned at the point we are trying
- ** to move to, then just return without doing any work */
- if( pCur->eState==CURSOR_VALID && pCur->validNKey && pCur->pPage->intKey ){
- if( pCur->info.nKey==nKey ){
- *pRes = 0;
- return SQLITE_OK;
- }
- if( pCur->atLast && pCur->info.nKey<nKey ){
- *pRes = -1;
- return SQLITE_OK;
- }
- }
- rc = moveToRoot(pCur);
- if( rc ){
- return rc;
- }
- assert( pCur->pPage );
- assert( pCur->pPage->isInit );
- if( pCur->eState==CURSOR_INVALID ){
- *pRes = -1;
- assert( pCur->pPage->nCell==0 );
- return SQLITE_OK;
- }
- if( pCur->pPage->intKey ){
- /* We are given an SQL table to search. The key is the integer
- ** rowid contained in nKey. pKey and pUnKey should both be NULL */
- assert( pUnKey==0 );
- assert( pKey==0 );
- }else if( pUnKey==0 ){
- /* We are to search an SQL index using a key encoded as a blob.
- ** The blob is found at pKey and is nKey bytes in length. Unpack
- ** this key so that we can use it. */
- assert( pKey!=0 );
- pUnKey = sqlite3VdbeRecordUnpack(pCur->pKeyInfo, nKey, pKey,
- aSpace, sizeof(aSpace));
- if( pUnKey==0 ) return SQLITE_NOMEM;
- }else{
- /* We are to search an SQL index using a key that is already unpacked
- ** and handed to us in pUnKey. */
- assert( pKey==0 );
- }
- for(;;){
- int lwr, upr;
- Pgno chldPg;
- MemPage *pPage = pCur->pPage;
- int c = -1; /* pRes return if table is empty must be -1 */
- lwr = 0;
- upr = pPage->nCell-1;
- if( !pPage->intKey && pUnKey==0 ){
- rc = SQLITE_CORRUPT_BKPT;
- goto moveto_finish;
- }
- if( biasRight ){
- pCur->idx = upr;
- }else{
- pCur->idx = (upr+lwr)/2;
- }
- if( lwr<=upr ) for(;;){
- void *pCellKey;
- i64 nCellKey;
- pCur->info.nSize = 0;
- pCur->validNKey = 1;
- if( pPage->intKey ){
- u8 *pCell;
- pCell = findCell(pPage, pCur->idx) + pPage->childPtrSize;
- if( pPage->hasData ){
- u32 dummy;
- pCell += getVarint32(pCell, &dummy);
- }
- getVarint(pCell, (u64*)&nCellKey);
- if( nCellKey==nKey ){
- c = 0;
- }else if( nCellKey<nKey ){
- c = -1;
- }else{
- assert( nCellKey>nKey );
- c = +1;
- }
- }else{
- int available;
- pCellKey = (void *)fetchPayload(pCur, &available, 0);
- nCellKey = pCur->info.nKey;
- if( available>=nCellKey ){
- c = sqlite3VdbeRecordCompare(nCellKey, pCellKey, pUnKey);
- }else{
- pCellKey = sqlite3_malloc( nCellKey );
- if( pCellKey==0 ){
- rc = SQLITE_NOMEM;
- goto moveto_finish;
- }
- rc = sqlite3BtreeKey(pCur, 0, nCellKey, (void *)pCellKey);
- c = sqlite3VdbeRecordCompare(nCellKey, pCellKey, pUnKey);
- sqlite3_free(pCellKey);
- if( rc ) goto moveto_finish;
- }
- }
- if( c==0 ){
- pCur->info.nKey = nCellKey;
- if( pPage->leafData && !pPage->leaf ){
- lwr = pCur->idx;
- upr = lwr - 1;
- break;
- }else{
- if( pRes ) *pRes = 0;
- rc = SQLITE_OK;
- goto moveto_finish;
- }
- }
- if( c<0 ){
- lwr = pCur->idx+1;
- }else{
- upr = pCur->idx-1;
- }
- if( lwr>upr ){
- pCur->info.nKey = nCellKey;
- break;
- }
- pCur->idx = (lwr+upr)/2;
- }
- assert( lwr==upr+1 );
- assert( pPage->isInit );
- if( pPage->leaf ){
- chldPg = 0;
- }else if( lwr>=pPage->nCell ){
- chldPg = get4byte(&pPage->aData[pPage->hdrOffset+8]);
- }else{
- chldPg = get4byte(findCell(pPage, lwr));
- }
- if( chldPg==0 ){
- assert( pCur->idx>=0 && pCur->idx<pCur->pPage->nCell );
- if( pRes ) *pRes = c;
- rc = SQLITE_OK;
- goto moveto_finish;
- }
- pCur->idx = lwr;
- pCur->info.nSize = 0;
- pCur->validNKey = 0;
- rc = moveToChild(pCur, chldPg);
- if( rc ) goto moveto_finish;
- }
- moveto_finish:
- if( pKey ){
- /* If we created our own unpacked key at the top of this
- ** procedure, then destroy that key before returning. */
- sqlite3VdbeDeleteUnpackedRecord(pUnKey);
- }
- return rc;
- }
- /*
- ** Return TRUE if the cursor is not pointing at an entry of the table.
- **
- ** TRUE will be returned after a call to sqlite3BtreeNext() moves
- ** past the last entry in the table or sqlite3BtreePrev() moves past
- ** the first entry. TRUE is also returned if the table is empty.
- */
- int sqlite3BtreeEof(BtCursor *pCur){
- /* TODO: What if the cursor is in CURSOR_REQUIRESEEK but all table entries
- ** have been deleted? This API will need to change to return an error code
- ** as well as the boolean result value.
- */
- return (CURSOR_VALID!=pCur->eState);
- }
- /*
- ** Return the database connection handle for a cursor.
- */
- sqlite3 *sqlite3BtreeCursorDb(const BtCursor *pCur){
- assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) );
- return pCur->pBtree->db;
- }
- /*
- ** Advance the cursor to the next entry in the database. If
- ** successful then set *pRes=0. If the cursor
- ** was already pointing to the last entry in the database before
- ** this routine was called, then set *pRes=1.
- */
- int sqlite3BtreeNext(BtCursor *pCur, int *pRes){
- int rc;
- MemPage *pPage;
- assert( cursorHoldsMutex(pCur) );
- rc = restoreOrClearCursorPosition(pCur);
- if( rc!=SQLITE_OK ){
- return rc;
- }
- assert( pRes!=0 );
- pPage = pCur->pPage;
- if( CURSOR_INVALID==pCur->eState ){
- *pRes = 1;
- return SQLITE_OK;
- }
- if( pCur->skip>0 ){
- pCur->skip = 0;
- *pRes = 0;
- return SQLITE_OK;
- }
- pCur->skip = 0;
- assert( pPage->isInit );
- assert( pCur->idx<pPage->nCell );
- pCur->idx++;
- pCur->info.nSize = 0;
- pCur->validNKey = 0;
- if( pCur->idx>=pPage->nCell ){
- if( !pPage->leaf ){
- rc = moveToChild(pCur, get4byte(&pPage->aData[pPage->hdrOffset+8]));
- if( rc ) return rc;
- rc = moveToLeftmost(pCur);
- *pRes = 0;
- return rc;
- }
- do{
- if( sqlite3BtreeIsRootPage(pPage) ){
- *pRes = 1;
- pCur->eState = CURSOR_INVALID;
- return SQLITE_OK;
- }
- sqlite3BtreeMoveToParent(pCur);
- pPage = pCur->pPage;
- }while( pCur->idx>=pPage->nCell );
- *pRes = 0;
- if( pPage->leafData ){
- rc = sqlite3BtreeNext(pCur, pRes);
- }else{
- rc = SQLITE_OK;
- }
- return rc;
- }
- *pRes = 0;
- if( pPage->leaf ){
- return SQLITE_OK;
- }
- rc = moveToLeftmost(pCur);
- return rc;
- }
- /*
- ** Step the cursor to the back to the previous entry in the database. If
- ** successful then set *pRes=0. If the cursor
- ** was already pointing to the first entry in the database before
- ** this routine was called, then set *pRes=1.
- */
- int sqlite3BtreePrevious(BtCursor *pCur, int *pRes){
- int rc;
- Pgno pgno;
- MemPage *pPage;
- assert( cursorHoldsMutex(pCur) );
- rc = restoreOrClearCursorPosition(pCur);
- if( rc!=SQLITE_OK ){
- return rc;
- }
- pCur->atLast = 0;
- if( CURSOR_INVALID==pCur->eState ){
- *pRes = 1;
- return SQLITE_OK;
- }
- if( pCur->skip<0 ){
- pCur->skip = 0;
- *pRes = 0;
- return SQLITE_OK;
- }
- pCur->skip = 0;
- pPage = pCur->pPage;
- assert( pPage->isInit );
- assert( pCur->idx>=0 );
- if( !pPage->leaf ){
- pgno = get4byte( findCell(pPage, pCur->idx) );
- rc = moveToChild(pCur, pgno);
- if( rc ){
- return rc;
- }
- rc = moveToRightmost(pCur);
- }else{
- while( pCur->idx==0 ){
- if( sqlite3BtreeIsRootPage(pPage) ){
- pCur->eState = CURSOR_INVALID;
- *pRes = 1;
- return SQLITE_OK;
- }
- sqlite3BtreeMoveToParent(pCur);
- pPage = pCur->pPage;
- }
- pCur->idx--;
- pCur->info.nSize = 0;
- pCur->validNKey = 0;
- if( pPage->leafData && !pPage->leaf ){
- rc = sqlite3BtreePrevious(pCur, pRes);
- }else{
- rc = SQLITE_OK;
- }
- }
- *pRes = 0;
- return rc;
- }
- /*
- ** Allocate a new page from the database file.
- **
- ** The new page is marked as dirty. (In other words, sqlite3PagerWrite()
- ** has already been called on the new page.) The new page has also
- ** been referenced and the calling routine is responsible for calling
- ** sqlite3PagerUnref() on the new page when it is done.
- **
- ** SQLITE_OK is returned on success. Any other return value indicates
- ** an error. *ppPage and *pPgno are undefined in the event of an error.
- ** Do not invoke sqlite3PagerUnref() on *ppPage if an error is returned.
- **
- ** If the "nearby" parameter is not 0, then a (feeble) effort is made to
- ** locate a page close to the page number "nearby". This can be used in an
- ** attempt to keep related pages close to each other in the database file,
- ** which in turn can make database access faster.
- **
- ** If the "exact" parameter is not 0, and the page-number nearby exists
- ** anywhere on the free-list, then it is guarenteed to be returned. This
- ** is only used by auto-vacuum databases when allocating a new table.
- */
- static int allocateBtreePage(
- BtShared *pBt,
- MemPage **ppPage,
- Pgno *pPgno,
- Pgno nearby,
- u8 exact
- ){
- MemPage *pPage1;
- int rc;
- int n; /* Number of pages on the freelist */
- int k; /* Number of leaves on the trunk of the freelist */
- MemPage *pTrunk = 0;
- MemPage *pPrevTrunk = 0;
- assert( sqlite3_mutex_held(pBt->mutex) );
- pPage1 = pBt->pPage1;
- n = get4byte(&pPage1->aData[36]);
- if( n>0 ){
- /* There are pages on the freelist. Reuse one of those pages. */
- Pgno iTrunk;
- u8 searchList = 0; /* If the free-list must be searched for 'nearby' */
-
- /* If the 'exact' parameter was true and a query of the pointer-map
- ** shows that the page 'nearby' is somewhere on the free-list, then
- ** the entire-list will be searched for that page.
- */
- #ifndef SQLITE_OMIT_AUTOVACUUM
- if( exact && nearby<=sqlite3PagerPagecount(pBt->pPager) ){
- u8 eType;
- assert( nearby>0 );
- assert( pBt->autoVacuum );
- rc = ptrmapGet(pBt, nearby, &eType, 0);
- if( rc ) return rc;
- if( eType==PTRMAP_FREEPAGE ){
- searchList = 1;
- }
- *pPgno = nearby;
- }
- #endif
- /* Decrement the free-list count by 1. Set iTrunk to the index of the
- ** first free-list trunk page. iPrevTrunk is initially 1.
- */
- rc = sqlite3PagerWrite(pPage1->pDbPage);
- if( rc ) return rc;
- put4byte(&pPage1->aData[36], n-1);
- /* The code within this loop is run only once if the 'searchList' variable
- ** is not true. Otherwise, it runs once for each trunk-page on the
- ** free-list until the page 'nearby' is located.
- */
- do {
- pPrevTrunk = pTrunk;
- if( pPrevTrunk ){
- iTrunk = get4byte(&pPrevTrunk->aData[0]);
- }else{
- iTrunk = get4byte(&pPage1->aData[32]);
- }
- rc = sqlite3BtreeGetPage(pBt, iTrunk, &pTrunk, 0);
- if( rc ){
- pTrunk = 0;
- goto end_allocate_page;
- }
- k = get4byte(&pTrunk->aData[4]);
- if( k==0 && !searchList ){
- /* The trunk has no leaves and the list is not being searched.
- ** So extract the trunk page itself and use it as the newly
- ** allocated page */
- assert( pPrevTrunk==0 );
- rc = sqlite3PagerWrite(pTrunk->pDbPage);
- if( rc ){
- goto end_allocate_page;
- }
- *pPgno = iTrunk;
- memcpy(&pPage1->aData[32], &pTrunk->aData[0], 4);
- *ppPage = pTrunk;
- pTrunk = 0;
- TRACE(("ALLOCATE: %d trunk - %d free pages leftn", *pPgno, n-1));
- }else if( k>pBt->usableSize/4 - 8 ){
- /* Value of k is out of range. Database corruption */
- rc = SQLITE_CORRUPT_BKPT;
- goto end_allocate_page;
- #ifndef SQLITE_OMIT_AUTOVACUUM
- }else if( searchList && nearby==iTrunk ){
- /* The list is being searched and this trunk page is the page
- ** to allocate, regardless of whether it has leaves.
- */
- assert( *pPgno==iTrunk );
- *ppPage = pTrunk;
- searchList = 0;
- rc = sqlite3PagerWrite(pTrunk->pDbPage);
- if( rc ){
- goto end_allocate_page;
- }
- if( k==0 ){
- if( !pPrevTrunk ){
- memcpy(&pPage1->aData[32], &pTrunk->aData[0], 4);
- }else{
- memcpy(&pPrevTrunk->aData[0], &pTrunk->aData[0], 4);
- }
- }else{
- /* The trunk page is required by the caller but it contains
- ** pointers to free-list leaves. The first leaf becomes a trunk
- ** page in this case.
- */
- MemPage *pNewTrunk;
- Pgno iNewTrunk = get4byte(&pTrunk->aData[8]);
- rc = sqlite3BtreeGetPage(pBt, iNewTrunk, &pNewTrunk, 0);
- if( rc!=SQLITE_OK ){
- goto end_allocate_page;
- }
- rc = sqlite3PagerWrite(pNewTrunk->pDbPage);
- if( rc!=SQLITE_OK ){
- releasePage(pNewTrunk);
- goto end_allocate_page;
- }
- memcpy(&pNewTrunk->aData[0], &pTrunk->aData[0], 4);
- put4byte(&pNewTrunk->aData[4], k-1);
- memcpy(&pNewTrunk->aData[8], &pTrunk->aData[12], (k-1)*4);
- releasePage(pNewTrunk);
- if( !pPrevTrunk ){
- put4byte(&pPage1->aData[32], iNewTrunk);
- }else{
- rc = sqlite3PagerWrite(pPrevTrunk->pDbPage);
- if( rc ){
- goto end_allocate_page;
- }
- put4byte(&pPrevTrunk->aData[0], iNewTrunk);
- }
- }
- pTrunk = 0;
- TRACE(("ALLOCATE: %d trunk - %d free pages leftn", *pPgno, n-1));
- #endif
- }else{
- /* Extract a leaf from the trunk */
- int closest;
- Pgno iPage;
- unsigned char *aData = pTrunk->aData;
- rc = sqlite3PagerWrite(pTrunk->pDbPage);
- if( rc ){
- goto end_allocate_page;
- }
- if( nearby>0 ){
- int i, dist;
- closest = 0;
- dist = get4byte(&aData[8]) - nearby;
- if( dist<0 ) dist = -dist;
- for(i=1; i<k; i++){
- int d2 = get4byte(&aData[8+i*4]) - nearby;
- if( d2<0 ) d2 = -d2;
- if( d2<dist ){
- closest = i;
- dist = d2;
- }
- }
- }else{
- closest = 0;
- }
- iPage = get4byte(&aData[8+closest*4]);
- if( !searchList || iPage==nearby ){
- *pPgno = iPage;
- if( *pPgno>sqlite3PagerPagecount(pBt->pPager) ){
- /* Free page off the end of the file */
- return SQLITE_CORRUPT_BKPT;
- }
- TRACE(("ALLOCATE: %d was leaf %d of %d on trunk %d"
- ": %d more free pagesn",
- *pPgno, closest+1, k, pTrunk->pgno, n-1));
- if( closest<k-1 ){
- memcpy(&aData[8+closest*4], &aData[4+k*4], 4);
- }
- put4byte(&aData[4], k-1);
- rc = sqlite3BtreeGetPage(pBt, *pPgno, ppPage, 1);
- if( rc==SQLITE_OK ){
- sqlite3PagerDontRollback((*ppPage)->pDbPage);
- rc = sqlite3PagerWrite((*ppPage)->pDbPage);
- if( rc!=SQLITE_OK ){
- releasePage(*ppPage);
- }
- }
- searchList = 0;
- }
- }
- releasePage(pPrevTrunk);
- pPrevTrunk = 0;
- }while( searchList );
- }else{
- /* There are no pages on the freelist, so create a new page at the
- ** end of the file */
- *pPgno = sqlite3PagerPagecount(pBt->pPager) + 1;
- #ifndef SQLITE_OMIT_AUTOVACUUM
- if( pBt->nTrunc ){
- /* An incr-vacuum has already run within this transaction. So the
- ** page to allocate is not from the physical end of the file, but
- ** at pBt->nTrunc.
- */
- *pPgno = pBt->nTrunc+1;
- if( *pPgno==PENDING_BYTE_PAGE(pBt) ){
- (*pPgno)++;
- }
- }
- if( pBt->autoVacuum && PTRMAP_ISPAGE(pBt, *pPgno) ){
- /* If *pPgno refers to a pointer-map page, allocate two new pages
- ** at the end of the file instead of one. The first allocated page
- ** becomes a new pointer-map page, the second is used by the caller.
- */
- TRACE(("ALLOCATE: %d from end of file (pointer-map page)n", *pPgno));
- assert( *pPgno!=PENDING_BYTE_PAGE(pBt) );
- (*pPgno)++;
- if( *pPgno==PENDING_BYTE_PAGE(pBt) ){ (*pPgno)++; }
- }
- if( pBt->nTrunc ){
- pBt->nTrunc = *pPgno;
- }
- #endif
- assert( *pPgno!=PENDING_BYTE_PAGE(pBt) );
- rc = sqlite3BtreeGetPage(pBt, *pPgno, ppPage, 0);
- if( rc ) return rc;
- rc = sqlite3PagerWrite((*ppPage)->pDbPage);
- if( rc!=SQLITE_OK ){
- releasePage(*ppPage);
- }
- TRACE(("ALLOCATE: %d from end of filen", *pPgno));
- }
- assert( *pPgno!=PENDING_BYTE_PAGE(pBt) );
- end_allocate_page:
- releasePage(pTrunk);
- releasePage(pPrevTrunk);
- return rc;
- }
- /*
- ** Add a page of the database file to the freelist.
- **
- ** sqlite3PagerUnref() is NOT called for pPage.
- */
- static int freePage(MemPage *pPage){
- BtShared *pBt = pPage->pBt;
- MemPage *pPage1 = pBt->pPage1;
- int rc, n, k;
- /* Prepare the page for freeing */
- assert( sqlite3_mutex_held(pPage->pBt->mutex) );
- assert( pPage->pgno>1 );
- pPage->isInit = 0;
- releasePage(pPage->pParent);
- pPage->pParent = 0;
- /* Increment the free page count on pPage1 */
- rc = sqlite3PagerWrite(pPage1->pDbPage);
- if( rc ) return rc;
- n = get4byte(&pPage1->aData[36]);
- put4byte(&pPage1->aData[36], n+1);
- #ifdef SQLITE_SECURE_DELETE
- /* If the SQLITE_SECURE_DELETE compile-time option is enabled, then
- ** always fully overwrite deleted information with zeros.
- */
- rc = sqlite3PagerWrite(pPage->pDbPage);
- if( rc ) return rc;
- memset(pPage->aData, 0, pPage->pBt->pageSize);
- #endif
- #ifndef SQLITE_OMIT_AUTOVACUUM
- /* If the database supports auto-vacuum, write an entry in the pointer-map
- ** to indicate that the page is free.
- */
- if( pBt->autoVacuum ){
- rc = ptrmapPut(pBt, pPage->pgno, PTRMAP_FREEPAGE, 0);
- if( rc ) return rc;
- }
- #endif
- if( n==0 ){
- /* This is the first free page */
- rc = sqlite3PagerWrite(pPage->pDbPage);
- if( rc ) return rc;
- memset(pPage->aData, 0, 8);
- put4byte(&pPage1->aData[32], pPage->pgno);
- TRACE(("FREE-PAGE: %d firstn", pPage->pgno));
- }else{
- /* Other free pages already exist. Retrive the first trunk page
- ** of the freelist and find out how many leaves it has. */
- MemPage *pTrunk;
- rc = sqlite3BtreeGetPage(pBt, get4byte(&pPage1->aData[32]), &pTrunk, 0);
- if( rc ) return rc;
- k = get4byte(&pTrunk->aData[4]);
- if( k>=pBt->usableSize/4 - 8 ){
- /* The trunk is full. Turn the page being freed into a new
- ** trunk page with no leaves. */
- rc = sqlite3PagerWrite(pPage->pDbPage);
- if( rc==SQLITE_OK ){
- put4byte(pPage->aData, pTrunk->pgno);
- put4byte(&pPage->aData[4], 0);
- put4byte(&pPage1->aData[32], pPage->pgno);
- TRACE(("FREE-PAGE: %d new trunk page replacing %dn",
- pPage->pgno, pTrunk->pgno));
- }
- }else if( k<0 ){
- rc = SQLITE_CORRUPT;
- }else{
- /* Add the newly freed page as a leaf on the current trunk */
- rc = sqlite3PagerWrite(pTrunk->pDbPage);
- if( rc==SQLITE_OK ){
- put4byte(&pTrunk->aData[4], k+1);
- put4byte(&pTrunk->aData[8+k*4], pPage->pgno);
- #ifndef SQLITE_SECURE_DELETE
- sqlite3PagerDontWrite(pPage->pDbPage);
- #endif
- }
- TRACE(("FREE-PAGE: %d leaf on trunk page %dn",pPage->pgno,pTrunk->pgno));
- }
- releasePage(pTrunk);
- }
- return rc;
- }
- /*
- ** Free any overflow pages associated with the given Cell.
- */
- static int clearCell(MemPage *pPage, unsigned char *pCell){
- BtShared *pBt = pPage->pBt;
- CellInfo info;
- Pgno ovflPgno;
- int rc;
- int nOvfl;
- int ovflPageSize;
- assert( sqlite3_mutex_held(pPage->pBt->mutex) );
- sqlite3BtreeParseCellPtr(pPage, pCell, &info);
- if( info.iOverflow==0 ){
- return SQLITE_OK; /* No overflow pages. Return without doing anything */
- }
- ovflPgno = get4byte(&pCell[info.iOverflow]);
- ovflPageSize = pBt->usableSize - 4;
- nOvfl = (info.nPayload - info.nLocal + ovflPageSize - 1)/ovflPageSize;
- assert( ovflPgno==0 || nOvfl>0 );
- while( nOvfl-- ){
- MemPage *pOvfl;
- if( ovflPgno==0 || ovflPgno>sqlite3PagerPagecount(pBt->pPager) ){
- return SQLITE_CORRUPT_BKPT;
- }
- rc = getOverflowPage(pBt, ovflPgno, &pOvfl, (nOvfl==0)?0:&ovflPgno);
- if( rc ) return rc;
- rc = freePage(pOvfl);
- sqlite3PagerUnref(pOvfl->pDbPage);
- if( rc ) return rc;
- }
- return SQLITE_OK;
- }
- /*
- ** Create the byte sequence used to represent a cell on page pPage
- ** and write that byte sequence into pCell[]. Overflow pages are
- ** allocated and filled in as necessary. The calling procedure
- ** is responsible for making sure sufficient space has been allocated
- ** for pCell[].
- **
- ** Note that pCell does not necessary need to point to the pPage->aData
- ** area. pCell might point to some temporary storage. The cell will
- ** be constructed in this temporary area then copied into pPage->aData
- ** later.
- */
- static int fillInCell(
- MemPage *pPage, /* The page that contains the cell */
- unsigned char *pCell, /* Complete text of the cell */
- const void *pKey, i64 nKey, /* The key */
- const void *pData,int nData, /* The data */
- int nZero, /* Extra zero bytes to append to pData */
- int *pnSize /* Write cell size here */
- ){
- int nPayload;
- const u8 *pSrc;
- int nSrc, n, rc;
- int spaceLeft;
- MemPage *pOvfl = 0;
- MemPage *pToRelease = 0;
- unsigned char *pPrior;
- unsigned char *pPayload;
- BtShared *pBt = pPage->pBt;
- Pgno pgnoOvfl = 0;
- int nHeader;
- CellInfo info;
- assert( sqlite3_mutex_held(pPage->pBt->mutex) );
- /* Fill in the header. */
- nHeader = 0;
- if( !pPage->leaf ){
- nHeader += 4;
- }
- if( pPage->hasData ){
- nHeader += putVarint(&pCell[nHeader], nData+nZero);
- }else{
- nData = nZero = 0;
- }
- nHeader += putVarint(&pCell[nHeader], *(u64*)&nKey);
- sqlite3BtreeParseCellPtr(pPage, pCell, &info);
- assert( info.nHeader==nHeader );
- assert( info.nKey==nKey );
- assert( info.nData==nData+nZero );
-
- /* Fill in the payload */
- nPayload = nData + nZero;
- if( pPage->intKey ){
- pSrc = pData;
- nSrc = nData;
- nData = 0;
- }else{
- nPayload += nKey;
- pSrc = pKey;
- nSrc = nKey;
- }
- *pnSize = info.nSize;
- spaceLeft = info.nLocal;
- pPayload = &pCell[nHeader];
- pPrior = &pCell[info.iOverflow];
- while( nPayload>0 ){
- if( spaceLeft==0 ){
- int isExact = 0;
- #ifndef SQLITE_OMIT_AUTOVACUUM
- Pgno pgnoPtrmap = pgnoOvfl; /* Overflow page pointer-map entry page */
- if( pBt->autoVacuum ){
- do{
- pgnoOvfl++;
- } while(
- PTRMAP_ISPAGE(pBt, pgnoOvfl) || pgnoOvfl==PENDING_BYTE_PAGE(pBt)
- );
- if( pgnoOvfl>1 ){
- /* isExact = 1; */
- }
- }
- #endif
- rc = allocateBtreePage(pBt, &pOvfl, &pgnoOvfl, pgnoOvfl, isExact);
- #ifndef SQLITE_OMIT_AUTOVACUUM
- /* If the database supports auto-vacuum, and the second or subsequent
- ** overflow page is being allocated, add an entry to the pointer-map
- ** for that page now.
- **
- ** If this is the first overflow page, then write a partial entry
- ** to the pointer-map. If we write nothing to this pointer-map slot,
- ** then the optimistic overflow chain processing in clearCell()
- ** may misinterpret the uninitialised values and delete the
- ** wrong pages from the database.
- */
- if( pBt->autoVacuum && rc==SQLITE_OK ){
- u8 eType = (pgnoPtrmap?PTRMAP_OVERFLOW2:PTRMAP_OVERFLOW1);
- rc = ptrmapPut(pBt, pgnoOvfl, eType, pgnoPtrmap);
- if( rc ){
- releasePage(pOvfl);
- }
- }
- #endif
- if( rc ){
- releasePage(pToRelease);
- return rc;
- }
- put4byte(pPrior, pgnoOvfl);
- releasePage(pToRelease);
- pToRelease = pOvfl;
- pPrior = pOvfl->aData;
- put4byte(pPrior, 0);
- pPayload = &pOvfl->aData[4];
- spaceLeft = pBt->usableSize - 4;
- }
- n = nPayload;
- if( n>spaceLeft ) n = spaceLeft;
- if( nSrc>0 ){
- if( n>nSrc ) n = nSrc;
- assert( pSrc );
- memcpy(pPayload, pSrc, n);
- }else{
- memset(pPayload, 0, n);
- }
- nPayload -= n;
- pPayload += n;
- pSrc += n;
- nSrc -= n;
- spaceLeft -= n;
- if( nSrc==0 ){
- nSrc = nData;
- pSrc = pData;
- }
- }
- releasePage(pToRelease);
- return SQLITE_OK;
- }
- /*
- ** Change the MemPage.pParent pointer on the page whose number is
- ** given in the second argument so that MemPage.pParent holds the
- ** pointer in the third argument.
- */
- static int reparentPage(BtShared *pBt, Pgno pgno, MemPage *pNewParent, int idx){
- MemPage *pThis;
- DbPage *pDbPage;
- assert( sqlite3_mutex_held(pBt->mutex) );
- assert( pNewParent!=0 );
- if( pgno==0 ) return SQLITE_OK;
- assert( pBt->pPager!=0 );
- pDbPage = sqlite3PagerLookup(pBt->pPager, pgno);
- if( pDbPage ){
- pThis = (MemPage *)sqlite3PagerGetExtra(pDbPage);
- if( pThis->isInit ){
- assert( pThis->aData==sqlite3PagerGetData(pDbPage) );
- if( pThis->pParent!=pNewParent ){
- if( pThis->pParent ) sqlite3PagerUnref(pThis->pParent->pDbPage);
- pThis->pParent = pNewParent;
- sqlite3PagerRef(pNewParent->pDbPage);
- }
- pThis->idxParent = idx;
- }
- sqlite3PagerUnref(pDbPage);
- }
- #ifndef SQLITE_OMIT_AUTOVACUUM
- if( pBt->autoVacuum ){
- return ptrmapPut(pBt, pgno, PTRMAP_BTREE, pNewParent->pgno);
- }
- #endif
- return SQLITE_OK;
- }
- /*
- ** Change the pParent pointer of all children of pPage to point back
- ** to pPage.
- **
- ** In other words, for every child of pPage, invoke reparentPage()
- ** to make sure that each child knows that pPage is its parent.
- **
- ** This routine gets called after you memcpy() one page into
- ** another.
- */
- static int reparentChildPages(MemPage *pPage){
- int i;
- BtShared *pBt = pPage->pBt;
- int rc = SQLITE_OK;
- assert( sqlite3_mutex_held(pPage->pBt->mutex) );
- if( pPage->leaf ) return SQLITE_OK;
- for(i=0; i<pPage->nCell; i++){
- u8 *pCell = findCell(pPage, i);
- rc = reparentPage(pBt, get4byte(pCell), pPage, i);
- if( rc!=SQLITE_OK ) return rc;
- }
- rc = reparentPage(pBt, get4byte(&pPage->aData[pPage->hdrOffset+8]),
- pPage, i);
- pPage->idxShift = 0;
- return rc;
- }
- /*
- ** Remove the i-th cell from pPage. This routine effects pPage only.
- ** The cell content is not freed or deallocated. It is assumed that
- ** the cell content has been copied someplace else. This routine just
- ** removes the reference to the cell from pPage.
- **
- ** "sz" must be the number of bytes in the cell.
- */
- static void dropCell(MemPage *pPage, int idx, int sz){
- int i; /* Loop counter */
- int pc; /* Offset to cell content of cell being deleted */
- u8 *data; /* pPage->aData */
- u8 *ptr; /* Used to move bytes around within data[] */
- assert( idx>=0 && idx<pPage->nCell );
- assert( sz==cellSize(pPage, idx) );
- assert( sqlite3PagerIswriteable(pPage->pDbPage) );
- assert( sqlite3_mutex_held(pPage->pBt->mutex) );
- data = pPage->aData;
- ptr = &data[pPage->cellOffset + 2*idx];
- pc = get2byte(ptr);
- assert( pc>10 && pc+sz<=pPage->pBt->usableSize );
- freeSpace(pPage, pc, sz);
- for(i=idx+1; i<pPage->nCell; i++, ptr+=2){
- ptr[0] = ptr[2];
- ptr[1] = ptr[3];
- }
- pPage->nCell--;
- put2byte(&data[pPage->hdrOffset+3], pPage->nCell);
- pPage->nFree += 2;
- pPage->idxShift = 1;
- }
- /*
- ** Insert a new cell on pPage at cell index "i". pCell points to the
- ** content of the cell.
- **
- ** If the cell content will fit on the page, then put it there. If it
- ** will not fit, then make a copy of the cell content into pTemp if
- ** pTemp is not null. Regardless of pTemp, allocate a new entry
- ** in pPage->aOvfl[] and make it point to the cell content (either
- ** in pTemp or the original pCell) and also record its index.
- ** Allocating a new entry in pPage->aCell[] implies that
- ** pPage->nOverflow is incremented.
- **
- ** If nSkip is non-zero, then do not copy the first nSkip bytes of the
- ** cell. The caller will overwrite them after this function returns. If
- ** nSkip is non-zero, then pCell may not point to an invalid memory location
- ** (but pCell+nSkip is always valid).
- */
- static int insertCell(
- MemPage *pPage, /* Page into which we are copying */
- int i, /* New cell becomes the i-th cell of the page */
- u8 *pCell, /* Content of the new cell */
- int sz, /* Bytes of content in pCell */
- u8 *pTemp, /* Temp storage space for pCell, if needed */
- u8 nSkip /* Do not write the first nSkip bytes of the cell */
- ){
- int idx; /* Where to write new cell content in data[] */
- int j; /* Loop counter */
- int top; /* First byte of content for any cell in data[] */
- int end; /* First byte past the last cell pointer in data[] */
- int ins; /* Index in data[] where new cell pointer is inserted */
- int hdr; /* Offset into data[] of the page header */
- int cellOffset; /* Address of first cell pointer in data[] */
- u8 *data; /* The content of the whole page */
- u8 *ptr; /* Used for moving information around in data[] */
- assert( i>=0 && i<=pPage->nCell+pPage->nOverflow );
- assert( sz==cellSizePtr(pPage, pCell) );
- assert( sqlite3_mutex_held(pPage->pBt->mutex) );
- if( pPage->nOverflow || sz+2>pPage->nFree ){
- if( pTemp ){
- memcpy(pTemp+nSkip, pCell+nSkip, sz-nSkip);
- pCell = pTemp;
- }
- j = pPage->nOverflow++;
- assert( j<sizeof(pPage->aOvfl)/sizeof(pPage->aOvfl[0]) );
- pPage->aOvfl[j].pCell = pCell;
- pPage->aOvfl[j].idx = i;
- pPage->nFree = 0;
- }else{
- int rc = sqlite3PagerWrite(pPage->pDbPage);
- if( rc!=SQLITE_OK ){
- return rc;
- }
- assert( sqlite3PagerIswriteable(pPage->pDbPage) );
- data = pPage->aData;
- hdr = pPage->hdrOffset;
- top = get2byte(&data[hdr+5]);
- cellOffset = pPage->cellOffset;
- end = cellOffset + 2*pPage->nCell + 2;
- ins = cellOffset + 2*i;
- if( end > top - sz ){
- rc = defragmentPage(pPage);
- if( rc!=SQLITE_OK ) return rc;
- top = get2byte(&data[hdr+5]);
- assert( end + sz <= top );
- }
- idx = allocateSpace(pPage, sz);
- assert( idx>0 );
- assert( end <= get2byte(&data[hdr+5]) );
- pPage->nCell++;
- pPage->nFree -= 2;
- memcpy(&data[idx+nSkip], pCell+nSkip, sz-nSkip);
- for(j=end-2, ptr=&data[j]; j>ins; j-=2, ptr-=2){
- ptr[0] = ptr[-2];
- ptr[1] = ptr[-1];
- }
- put2byte(&data[ins], idx);
- put2byte(&data[hdr+3], pPage->nCell);
- pPage->idxShift = 1;
- #ifndef SQLITE_OMIT_AUTOVACUUM
- if( pPage->pBt->autoVacuum ){
- /* The cell may contain a pointer to an overflow page. If so, write
- ** the entry for the overflow page into the pointer map.
- */
- CellInfo info;
- sqlite3BtreeParseCellPtr(pPage, pCell, &info);
- assert( (info.nData+(pPage->intKey?0:info.nKey))==info.nPayload );
- if( (info.nData+(pPage->intKey?0:info.nKey))>info.nLocal ){
- Pgno pgnoOvfl = get4byte(&pCell[info.iOverflow]);
- rc = ptrmapPut(pPage->pBt, pgnoOvfl, PTRMAP_OVERFLOW1, pPage->pgno);
- if( rc!=SQLITE_OK ) return rc;
- }
- }
- #endif
- }
- return SQLITE_OK;
- }
- /*
- ** Add a list of cells to a page. The page should be initially empty.
- ** The cells are guaranteed to fit on the page.
- */
- static void assemblePage(
- MemPage *pPage, /* The page to be assemblied */
- int nCell, /* The number of cells to add to this page */
- u8 **apCell, /* Pointers to cell bodies */
- u16 *aSize /* Sizes of the cells */
- ){
- int i; /* Loop counter */
- int totalSize; /* Total size of all cells */
- int hdr; /* Index of page header */
- int cellptr; /* Address of next cell pointer */
- int cellbody; /* Address of next cell body */
- u8 *data; /* Data for the page */
- assert( pPage->nOverflow==0 );
- assert( sqlite3_mutex_held(pPage->pBt->mutex) );
- totalSize = 0;
- for(i=0; i<nCell; i++){
- totalSize += aSize[i];
- }
- assert( totalSize+2*nCell<=pPage->nFree );
- assert( pPage->nCell==0 );
- cellptr = pPage->cellOffset;
- data = pPage->aData;
- hdr = pPage->hdrOffset;
- put2byte(&data[hdr+3], nCell);
- if( nCell ){
- cellbody = allocateSpace(pPage, totalSize);
- assert( cellbody>0 );
- assert( pPage->nFree >= 2*nCell );
- pPage->nFree -= 2*nCell;
- for(i=0; i<nCell; i++){
- put2byte(&data[cellptr], cellbody);
- memcpy(&data[cellbody], apCell[i], aSize[i]);
- cellptr += 2;
- cellbody += aSize[i];
- }
- assert( cellbody==pPage->pBt->usableSize );
- }
- pPage->nCell = nCell;
- }
- /*
- ** The following parameters determine how many adjacent pages get involved
- ** in a balancing operation. NN is the number of neighbors on either side
- ** of the page that participate in the balancing operation. NB is the
- ** total number of pages that participate, including the target page and
- ** NN neighbors on either side.
- **
- ** The minimum value of NN is 1 (of course). Increasing NN above 1
- ** (to 2 or 3) gives a modest improvement in SELECT and DELETE performance
- ** in exchange for a larger degradation in INSERT and UPDATE performance.
- ** The value of NN appears to give the best results overall.
- */
- #define NN 1 /* Number of neighbors on either side of pPage */
- #define NB (NN*2+1) /* Total pages involved in the balance */
- /* Forward reference */
- static int balance(MemPage*, int);
- #ifndef SQLITE_OMIT_QUICKBALANCE
- /*
- ** This version of balance() handles the common special case where
- ** a new entry is being inserted on the extreme right-end of the
- ** tree, in other words, when the new entry will become the largest
- ** entry in the tree.
- **
- ** Instead of trying balance the 3 right-most leaf pages, just add
- ** a new page to the right-hand side and put the one new entry in
- ** that page. This leaves the right side of the tree somewhat
- ** unbalanced. But odds are that we will be inserting new entries
- ** at the end soon afterwards so the nearly empty page will quickly
- ** fill up. On average.
- **
- ** pPage is the leaf page which is the right-most page in the tree.
- ** pParent is its parent. pPage must have a single overflow entry
- ** which is also the right-most entry on the page.
- */
- static int balance_quick(MemPage *pPage, MemPage *pParent){
- int rc;
- MemPage *pNew;
- Pgno pgnoNew;
- u8 *pCell;
- u16 szCell;
- CellInfo info;
- BtShared *pBt = pPage->pBt;
- int parentIdx = pParent->nCell; /* pParent new divider cell index */
- int parentSize; /* Size of new divider cell */
- u8 parentCell[64]; /* Space for the new divider cell */
- assert( sqlite3_mutex_held(pPage->pBt->mutex) );
- /* Allocate a new page. Insert the overflow cell from pPage
- ** into it. Then remove the overflow cell from pPage.
- */
- rc = allocateBtreePage(pBt, &pNew, &pgnoNew, 0, 0);
- if( rc!=SQLITE_OK ){
- return rc;
- }
- pCell = pPage->aOvfl[0].pCell;
- szCell = cellSizePtr(pPage, pCell);
- zeroPage(pNew, pPage->aData[0]);
- assemblePage(pNew, 1, &pCell, &szCell);
- pPage->nOverflow = 0;
- /* Set the parent of the newly allocated page to pParent. */
- pNew->pParent = pParent;
- sqlite3PagerRef(pParent->pDbPage);
- /* pPage is currently the right-child of pParent. Change this
- ** so that the right-child is the new page allocated above and
- ** pPage is the next-to-right child.
- */
- assert( pPage->nCell>0 );
- pCell = findCell(pPage, pPage->nCell-1);
- sqlite3BtreeParseCellPtr(pPage, pCell, &info);
- rc = fillInCell(pParent, parentCell, 0, info.nKey, 0, 0, 0, &parentSize);
- if( rc!=SQLITE_OK ){
- return rc;
- }
- assert( parentSize<64 );
- rc = insertCell(pParent, parentIdx, parentCell, parentSize, 0, 4);
- if( rc!=SQLITE_OK ){
- return rc;
- }
- put4byte(findOverflowCell(pParent,parentIdx), pPage->pgno);
- put4byte(&pParent->aData[pParent->hdrOffset+8], pgnoNew);
- #ifndef SQLITE_OMIT_AUTOVACUUM
- /* If this is an auto-vacuum database, update the pointer map
- ** with entries for the new page, and any pointer from the
- ** cell on the page to an overflow page.
- */
- if( pBt->autoVacuum ){
- rc = ptrmapPut(pBt, pgnoNew, PTRMAP_BTREE, pParent->pgno);
- if( rc==SQLITE_OK ){
- rc = ptrmapPutOvfl(pNew, 0);
- }
- if( rc!=SQLITE_OK ){
- releasePage(pNew);
- return rc;
- }
- }
- #endif
- /* Release the reference to the new page and balance the parent page,
- ** in case the divider cell inserted caused it to become overfull.
- */
- releasePage(pNew);
- return balance(pParent, 0);
- }
- #endif /* SQLITE_OMIT_QUICKBALANCE */
- /*
- ** This routine redistributes Cells on pPage and up to NN*2 siblings
- ** of pPage so that all pages have about the same amount of free space.
- ** Usually NN siblings on either side of pPage is used in the balancing,
- ** though more siblings might come from one side if pPage is the first
- ** or last child of its parent. If pPage has fewer than 2*NN siblings
- ** (something which can only happen if pPage is the root page or a
- ** child of root) then all available siblings participate in the balancing.
- **
- ** The number of siblings of pPage might be increased or decreased by one or
- ** two in an effort to keep pages nearly full but not over full. The root page
- ** is special and is allowed to be nearly empty. If pPage is
- ** the root page, then the depth of the tree might be increased
- ** or decreased by one, as necessary, to keep the root page from being
- ** overfull or completely empty.
- **
- ** Note that when this routine is called, some of the Cells on pPage
- ** might not actually be stored in pPage->aData[]. This can happen
- ** if the page is overfull. Part of the job of this routine is to
- ** make sure all Cells for pPage once again fit in pPage->aData[].
- **
- ** In the course of balancing the siblings of pPage, the parent of pPage
- ** might become overfull or underfull. If that happens, then this routine
- ** is called recursively on the parent.
- **
- ** If this routine fails for any reason, it might leave the database
- ** in a corrupted state. So if this routine fails, the database should
- ** be rolled back.
- */
- static int balance_nonroot(MemPage *pPage){
- MemPage *pParent; /* The parent of pPage */
- BtShared *pBt; /* The whole database */
- int nCell = 0; /* Number of cells in apCell[] */
- int nMaxCells = 0; /* Allocated size of apCell, szCell, aFrom. */
- int nOld; /* Number of pages in apOld[] */
- int nNew; /* Number of pages in apNew[] */
- int nDiv; /* Number of cells in apDiv[] */
- int i, j, k; /* Loop counters */
- int idx; /* Index of pPage in pParent->aCell[] */
- int nxDiv; /* Next divider slot in pParent->aCell[] */
- int rc; /* The return code */
- int leafCorrection; /* 4 if pPage is a leaf. 0 if not */
- int leafData; /* True if pPage is a leaf of a LEAFDATA tree */
- int usableSpace; /* Bytes in pPage beyond the header */
- int pageFlags; /* Value of pPage->aData[0] */
- int subtotal; /* Subtotal of bytes in cells on one page */
- int iSpace = 0; /* First unused byte of aSpace[] */
- MemPage *apOld[NB]; /* pPage and up to two siblings */
- Pgno pgnoOld[NB]; /* Page numbers for each page in apOld[] */
- MemPage *apCopy[NB]; /* Private copies of apOld[] pages */
- MemPage *apNew[NB+2]; /* pPage and up to NB siblings after balancing */
- Pgno pgnoNew[NB+2]; /* Page numbers for each page in apNew[] */
- u8 *apDiv[NB]; /* Divider cells in pParent */
- int cntNew[NB+2]; /* Index in aCell[] of cell after i-th page */
- int szNew[NB+2]; /* Combined size of cells place on i-th page */
- u8 **apCell = 0; /* All cells begin balanced */
- u16 *szCell; /* Local size of all cells in apCell[] */
- u8 *aCopy[NB]; /* Space for holding data of apCopy[] */
- u8 *aSpace; /* Space to hold copies of dividers cells */
- #ifndef SQLITE_OMIT_AUTOVACUUM
- u8 *aFrom = 0;
- #endif
- assert( sqlite3_mutex_held(pPage->pBt->mutex) );
- /*
- ** Find the parent page.
- */
- assert( pPage->isInit );
- assert( sqlite3PagerIswriteable(pPage->pDbPage) || pPage->nOverflow==1 );
- pBt = pPage->pBt;
- pParent = pPage->pParent;
- assert( pParent );
- if( SQLITE_OK!=(rc = sqlite3PagerWrite(pParent->pDbPage)) ){
- return rc;
- }
- TRACE(("BALANCE: begin page %d child of %dn", pPage->pgno, pParent->pgno));
- #ifndef SQLITE_OMIT_QUICKBALANCE
- /*
- ** A special case: If a new entry has just been inserted into a
- ** table (that is, a btree with integer keys and all data at the leaves)
- ** and the new entry is the right-most entry in the tree (it has the
- ** largest key) then use the special balance_quick() routine for
- ** balancing. balance_quick() is much faster and results in a tighter
- ** packing of data in the common case.
- */
- if( pPage->leaf &&
- pPage->intKey &&
- pPage->leafData &&
- pPage->nOverflow==1 &&
- pPage->aOvfl[0].idx==pPage->nCell &&
- pPage->pParent->pgno!=1 &&
- get4byte(&pParent->aData[pParent->hdrOffset+8])==pPage->pgno
- ){
- /*
- ** TODO: Check the siblings to the left of pPage. It may be that
- ** they are not full and no new page is required.
- */
- return balance_quick(pPage, pParent);
- }
- #endif
- if( SQLITE_OK!=(rc = sqlite3PagerWrite(pPage->pDbPage)) ){
- return rc;
- }
- /*
- ** Find the cell in the parent page whose left child points back
- ** to pPage. The "idx" variable is the index of that cell. If pPage
- ** is the rightmost child of pParent then set idx to pParent->nCell
- */
- if( pParent->idxShift ){
- Pgno pgno;
- pgno = pPage->pgno;
- assert( pgno==sqlite3PagerPagenumber(pPage->pDbPage) );
- for(idx=0; idx<pParent->nCell; idx++){
- if( get4byte(findCell(pParent, idx))==pgno ){
- break;
- }
- }
- assert( idx<pParent->nCell
- || get4byte(&pParent->aData[pParent->hdrOffset+8])==pgno );
- }else{
- idx = pPage->idxParent;
- }
- /*
- ** Initialize variables so that it will be safe to jump
- ** directly to balance_cleanup at any moment.
- */
- nOld = nNew = 0;
- sqlite3PagerRef(pParent->pDbPage);
- /*
- ** Find sibling pages to pPage and the cells in pParent that divide
- ** the siblings. An attempt is made to find NN siblings on either
- ** side of pPage. More siblings are taken from one side, however, if
- ** pPage there are fewer than NN siblings on the other side. If pParent
- ** has NB or fewer children then all children of pParent are taken.
- */
- nxDiv = idx - NN;
- if( nxDiv + NB > pParent->nCell ){
- nxDiv = pParent->nCell - NB + 1;
- }
- if( nxDiv<0 ){
- nxDiv = 0;
- }
- nDiv = 0;
- for(i=0, k=nxDiv; i<NB; i++, k++){
- if( k<pParent->nCell ){
- apDiv[i] = findCell(pParent, k);
- nDiv++;
- assert( !pParent->leaf );
- pgnoOld[i] = get4byte(apDiv[i]);
- }else if( k==pParent->nCell ){
- pgnoOld[i] = get4byte(&pParent->aData[pParent->hdrOffset+8]);
- }else{
- break;
- }
- rc = getAndInitPage(pBt, pgnoOld[i], &apOld[i], pParent);
- if( rc ) goto balance_cleanup;
- apOld[i]->idxParent = k;
- apCopy[i] = 0;
- assert( i==nOld );
- nOld++;
- nMaxCells += 1+apOld[i]->nCell+apOld[i]->nOverflow;
- }
- /* Make nMaxCells a multiple of 4 in order to preserve 8-byte
- ** alignment */
- nMaxCells = (nMaxCells + 3)&~3;
- /*
- ** Allocate space for memory structures
- */
- apCell = sqlite3_malloc(
- nMaxCells*sizeof(u8*) /* apCell */
- + nMaxCells*sizeof(u16) /* szCell */
- + (ROUND8(sizeof(MemPage))+pBt->pageSize)*NB /* aCopy */
- + pBt->pageSize*5 /* aSpace */
- + (ISAUTOVACUUM ? nMaxCells : 0) /* aFrom */
- );
- if( apCell==0 ){
- rc = SQLITE_NOMEM;
- goto balance_cleanup;
- }
- szCell = (u16*)&apCell[nMaxCells];
- aCopy[0] = (u8*)&szCell[nMaxCells];
- assert( ((aCopy[0] - (u8*)apCell) & 7)==0 ); /* 8-byte alignment required */
- for(i=1; i<NB; i++){
- aCopy[i] = &aCopy[i-1][pBt->pageSize+ROUND8(sizeof(MemPage))];
- assert( ((aCopy[i] - (u8*)apCell) & 7)==0 ); /* 8-byte alignment required */
- }
- aSpace = &aCopy[NB-1][pBt->pageSize+ROUND8(sizeof(MemPage))];
- assert( ((aSpace - (u8*)apCell) & 7)==0 ); /* 8-byte alignment required */
- #ifndef SQLITE_OMIT_AUTOVACUUM
- if( pBt->autoVacuum ){
- aFrom = &aSpace[5*pBt->pageSize];
- }
- #endif
-
- /*
- ** Make copies of the content of pPage and its siblings into aOld[].
- ** The rest of this function will use data from the copies rather
- ** that the original pages since the original pages will be in the
- ** process of being overwritten.
- */
- for(i=0; i<nOld; i++){
- MemPage *p = apCopy[i] = (MemPage*)aCopy[i];
- memcpy(p, apOld[i], sizeof(MemPage));
- p->aData = (void*)&p[1];
- memcpy(p->aData, apOld[i]->aData, pBt->pageSize);
- }
- /*
- ** Load pointers to all cells on sibling pages and the divider cells
- ** into the local apCell[] array. Make copies of the divider cells
- ** into space obtained form aSpace[] and remove the the divider Cells
- ** from pParent.
- **
- ** If the siblings are on leaf pages, then the child pointers of the
- ** divider cells are stripped from the cells before they are copied
- ** into aSpace[]. In this way, all cells in apCell[] are without
- ** child pointers. If siblings are not leaves, then all cell in
- ** apCell[] include child pointers. Either way, all cells in apCell[]
- ** are alike.
- **
- ** leafCorrection: 4 if pPage is a leaf. 0 if pPage is not a leaf.
- ** leafData: 1 if pPage holds key+data and pParent holds only keys.
- */
- nCell = 0;
- leafCorrection = pPage->leaf*4;
- leafData = pPage->leafData && pPage->leaf;
- for(i=0; i<nOld; i++){
- MemPage *pOld = apCopy[i];
- int limit = pOld->nCell+pOld->nOverflow;
- for(j=0; j<limit; j++){
- assert( nCell<nMaxCells );
- apCell[nCell] = findOverflowCell(pOld, j);
- szCell[nCell] = cellSizePtr(pOld, apCell[nCell]);
- #ifndef SQLITE_OMIT_AUTOVACUUM
- if( pBt->autoVacuum ){
- int a;
- aFrom[nCell] = i;
- for(a=0; a<pOld->nOverflow; a++){
- if( pOld->aOvfl[a].pCell==apCell[nCell] ){
- aFrom[nCell] = 0xFF;
- break;
- }
- }
- }
- #endif
- nCell++;
- }
- if( i<nOld-1 ){
- u16 sz = cellSizePtr(pParent, apDiv[i]);
- if( leafData ){
- /* With the LEAFDATA flag, pParent cells hold only INTKEYs that
- ** are duplicates of keys on the child pages. We need to remove
- ** the divider cells from pParent, but the dividers cells are not
- ** added to apCell[] because they are duplicates of child cells.
- */
- dropCell(pParent, nxDiv, sz);
- }else{
- u8 *pTemp;
- assert( nCell<nMaxCells );
- szCell[nCell] = sz;
- pTemp = &aSpace[iSpace];
- iSpace += sz;
- assert( iSpace<=pBt->pageSize*5 );
- memcpy(pTemp, apDiv[i], sz);
- apCell[nCell] = pTemp+leafCorrection;
- #ifndef SQLITE_OMIT_AUTOVACUUM
- if( pBt->autoVacuum ){
- aFrom[nCell] = 0xFF;
- }
- #endif
- dropCell(pParent, nxDiv, sz);
- szCell[nCell] -= leafCorrection;
- assert( get4byte(pTemp)==pgnoOld[i] );
- if( !pOld->leaf ){
- assert( leafCorrection==0 );
- /* The right pointer of the child page pOld becomes the left
- ** pointer of the divider cell */
- memcpy(apCell[nCell], &pOld->aData[pOld->hdrOffset+8], 4);
- }else{
- assert( leafCorrection==4 );
- if( szCell[nCell]<4 ){
- /* Do not allow any cells smaller than 4 bytes. */
- szCell[nCell] = 4;
- }
- }
- nCell++;
- }
- }
- }
- /*
- ** Figure out the number of pages needed to hold all nCell cells.
- ** Store this number in "k". Also compute szNew[] which is the total
- ** size of all cells on the i-th page and cntNew[] which is the index
- ** in apCell[] of the cell that divides page i from page i+1.
- ** cntNew[k] should equal nCell.
- **
- ** Values computed by this block:
- **
- ** k: The total number of sibling pages
- ** szNew[i]: Spaced used on the i-th sibling page.
- ** cntNew[i]: Index in apCell[] and szCell[] for the first cell to
- ** the right of the i-th sibling page.
- ** usableSpace: Number of bytes of space available on each sibling.
- **
- */
- usableSpace = pBt->usableSize - 12 + leafCorrection;
- for(subtotal=k=i=0; i<nCell; i++){
- assert( i<nMaxCells );
- subtotal += szCell[i] + 2;
- if( subtotal > usableSpace ){
- szNew[k] = subtotal - szCell[i];
- cntNew[k] = i;
- if( leafData ){ i--; }
- subtotal = 0;
- k++;
- }
- }
- szNew[k] = subtotal;
- cntNew[k] = nCell;
- k++;
- /*
- ** The packing computed by the previous block is biased toward the siblings
- ** on the left side. The left siblings are always nearly full, while the
- ** right-most sibling might be nearly empty. This block of code attempts
- ** to adjust the packing of siblings to get a better balance.
- **
- ** This adjustment is more than an optimization. The packing above might
- ** be so out of balance as to be illegal. For example, the right-most
- ** sibling might be completely empty. This adjustment is not optional.
- */
- for(i=k-1; i>0; i--){
- int szRight = szNew[i]; /* Size of sibling on the right */
- int szLeft = szNew[i-1]; /* Size of sibling on the left */
- int r; /* Index of right-most cell in left sibling */
- int d; /* Index of first cell to the left of right sibling */
- r = cntNew[i-1] - 1;
- d = r + 1 - leafData;
- assert( d<nMaxCells );
- assert( r<nMaxCells );
- while( szRight==0 || szRight+szCell[d]+2<=szLeft-(szCell[r]+2) ){
- szRight += szCell[d] + 2;
- szLeft -= szCell[r] + 2;
- cntNew[i-1]--;
- r = cntNew[i-1] - 1;
- d = r + 1 - leafData;
- }
- szNew[i] = szRight;
- szNew[i-1] = szLeft;
- }
- /* Either we found one or more cells (cntnew[0])>0) or we are the
- ** a virtual root page. A virtual root page is when the real root
- ** page is page 1 and we are the only child of that page.
- */
- assert( cntNew[0]>0 || (pParent->pgno==1 && pParent->nCell==0) );
- /*
- ** Allocate k new pages. Reuse old pages where possible.
- */
- assert( pPage->pgno>1 );
- pageFlags = pPage->aData[0];
- for(i=0; i<k; i++){
- MemPage *pNew;
- if( i<nOld ){
- pNew = apNew[i] = apOld[i];
- pgnoNew[i] = pgnoOld[i];
- apOld[i] = 0;
- rc = sqlite3PagerWrite(pNew->pDbPage);
- nNew++;
- if( rc ) goto balance_cleanup;
- }else{
- assert( i>0 );
- rc = allocateBtreePage(pBt, &pNew, &pgnoNew[i], pgnoNew[i-1], 0);
- if( rc ) goto balance_cleanup;
- apNew[i] = pNew;
- nNew++;
- }
- zeroPage(pNew, pageFlags);
- }
- /* Free any old pages that were not reused as new pages.
- */
- while( i<nOld ){
- rc = freePage(apOld[i]);
- if( rc ) goto balance_cleanup;
- releasePage(apOld[i]);
- apOld[i] = 0;
- i++;
- }
- /*
- ** Put the new pages in accending order. This helps to
- ** keep entries in the disk file in order so that a scan
- ** of the table is a linear scan through the file. That
- ** in turn helps the operating system to deliver pages
- ** from the disk more rapidly.
- **
- ** An O(n^2) insertion sort algorithm is used, but since
- ** n is never more than NB (a small constant), that should
- ** not be a problem.
- **
- ** When NB==3, this one optimization makes the database
- ** about 25% faster for large insertions and deletions.
- */
- for(i=0; i<k-1; i++){
- int minV = pgnoNew[i];
- int minI = i;
- for(j=i+1; j<k; j++){
- if( pgnoNew[j]<(unsigned)minV ){
- minI = j;
- minV = pgnoNew[j];
- }
- }
- if( minI>i ){
- int t;
- MemPage *pT;
- t = pgnoNew[i];
- pT = apNew[i];
- pgnoNew[i] = pgnoNew[minI];
- apNew[i] = apNew[minI];
- pgnoNew[minI] = t;
- apNew[minI] = pT;
- }
- }
- TRACE(("BALANCE: old: %d %d %d new: %d(%d) %d(%d) %d(%d) %d(%d) %d(%d)n",
- pgnoOld[0],
- nOld>=2 ? pgnoOld[1] : 0,
- nOld>=3 ? pgnoOld[2] : 0,
- pgnoNew[0], szNew[0],
- nNew>=2 ? pgnoNew[1] : 0, nNew>=2 ? szNew[1] : 0,
- nNew>=3 ? pgnoNew[2] : 0, nNew>=3 ? szNew[2] : 0,
- nNew>=4 ? pgnoNew[3] : 0, nNew>=4 ? szNew[3] : 0,
- nNew>=5 ? pgnoNew[4] : 0, nNew>=5 ? szNew[4] : 0));
- /*
- ** Evenly distribute the data in apCell[] across the new pages.
- ** Insert divider cells into pParent as necessary.
- */
- j = 0;
- for(i=0; i<nNew; i++){
- /* Assemble the new sibling page. */
- MemPage *pNew = apNew[i];
- assert( j<nMaxCells );
- assert( pNew->pgno==pgnoNew[i] );
- assemblePage(pNew, cntNew[i]-j, &apCell[j], &szCell[j]);
- assert( pNew->nCell>0 || (nNew==1 && cntNew[0]==0) );
- assert( pNew->nOverflow==0 );
- #ifndef SQLITE_OMIT_AUTOVACUUM
- /* If this is an auto-vacuum database, update the pointer map entries
- ** that point to the siblings that were rearranged. These can be: left
- ** children of cells, the right-child of the page, or overflow pages
- ** pointed to by cells.
- */
- if( pBt->autoVacuum ){
- for(k=j; k<cntNew[i]; k++){
- assert( k<nMaxCells );
- if( aFrom[k]==0xFF || apCopy[aFrom[k]]->pgno!=pNew->pgno ){
- rc = ptrmapPutOvfl(pNew, k-j);
- if( rc!=SQLITE_OK ){
- goto balance_cleanup;
- }
- }
- }
- }
- #endif
- j = cntNew[i];
- /* If the sibling page assembled above was not the right-most sibling,
- ** insert a divider cell into the parent page.
- */
- if( i<nNew-1 && j<nCell ){
- u8 *pCell;
- u8 *pTemp;
- int sz;
- assert( j<nMaxCells );
- pCell = apCell[j];
- sz = szCell[j] + leafCorrection;
- if( !pNew->leaf ){
- memcpy(&pNew->aData[8], pCell, 4);
- pTemp = 0;
- }else if( leafData ){
- /* If the tree is a leaf-data tree, and the siblings are leaves,
- ** then there is no divider cell in apCell[]. Instead, the divider
- ** cell consists of the integer key for the right-most cell of
- ** the sibling-page assembled above only.
- */
- CellInfo info;
- j--;
- sqlite3BtreeParseCellPtr(pNew, apCell[j], &info);
- pCell = &aSpace[iSpace];
- fillInCell(pParent, pCell, 0, info.nKey, 0, 0, 0, &sz);
- iSpace += sz;
- assert( iSpace<=pBt->pageSize*5 );
- pTemp = 0;
- }else{
- pCell -= 4;
- pTemp = &aSpace[iSpace];
- iSpace += sz;
- assert( iSpace<=pBt->pageSize*5 );
- /* Obscure case for non-leaf-data trees: If the cell at pCell was
- ** previously stored on a leaf node, and its reported size was 4
- ** bytes, then it may actually be smaller than this
- ** (see sqlite3BtreeParseCellPtr(), 4 bytes is the minimum size of
- ** any cell). But it is important to pass the correct size to
- ** insertCell(), so reparse the cell now.
- **
- ** Note that this can never happen in an SQLite data file, as all
- ** cells are at least 4 bytes. It only happens in b-trees used
- ** to evaluate "IN (SELECT ...)" and similar clauses.
- */
- if( szCell[j]==4 ){
- assert(leafCorrection==4);
- sz = cellSizePtr(pParent, pCell);
- }
- }
- rc = insertCell(pParent, nxDiv, pCell, sz, pTemp, 4);
- if( rc!=SQLITE_OK ) goto balance_cleanup;
- put4byte(findOverflowCell(pParent,nxDiv), pNew->pgno);
- #ifndef SQLITE_OMIT_AUTOVACUUM
- /* If this is an auto-vacuum database, and not a leaf-data tree,
- ** then update the pointer map with an entry for the overflow page
- ** that the cell just inserted points to (if any).
- */
- if( pBt->autoVacuum && !leafData ){
- rc = ptrmapPutOvfl(pParent, nxDiv);
- if( rc!=SQLITE_OK ){
- goto balance_cleanup;
- }
- }
- #endif
- j++;
- nxDiv++;
- }
- }
- assert( j==nCell );
- assert( nOld>0 );
- assert( nNew>0 );
- if( (pageFlags & PTF_LEAF)==0 ){
- memcpy(&apNew[nNew-1]->aData[8], &apCopy[nOld-1]->aData[8], 4);
- }
- if( nxDiv==pParent->nCell+pParent->nOverflow ){
- /* Right-most sibling is the right-most child of pParent */
- put4byte(&pParent->aData[pParent->hdrOffset+8], pgnoNew[nNew-1]);
- }else{
- /* Right-most sibling is the left child of the first entry in pParent
- ** past the right-most divider entry */
- put4byte(findOverflowCell(pParent, nxDiv), pgnoNew[nNew-1]);
- }
- /*
- ** Reparent children of all cells.
- */
- for(i=0; i<nNew; i++){
- rc = reparentChildPages(apNew[i]);
- if( rc!=SQLITE_OK ) goto balance_cleanup;
- }
- rc = reparentChildPages(pParent);
- if( rc!=SQLITE_OK ) goto balance_cleanup;
- /*
- ** Balance the parent page. Note that the current page (pPage) might
- ** have been added to the freelist so it might no longer be initialized.
- ** But the parent page will always be initialized.
- */
- assert( pParent->isInit );
- rc = balance(pParent, 0);
-
- /*
- ** Cleanup before returning.
- */
- balance_cleanup:
- sqlite3_free(apCell);
- for(i=0; i<nOld; i++){
- releasePage(apOld[i]);
- }
- for(i=0; i<nNew; i++){
- releasePage(apNew[i]);
- }
- releasePage(pParent);
- TRACE(("BALANCE: finished with %d: old=%d new=%d cells=%dn",
- pPage->pgno, nOld, nNew, nCell));
- return rc;
- }
- /*
- ** This routine is called for the root page of a btree when the root
- ** page contains no cells. This is an opportunity to make the tree
- ** shallower by one level.
- */
- static int balance_shallower(MemPage *pPage){
- MemPage *pChild; /* The only child page of pPage */
- Pgno pgnoChild; /* Page number for pChild */
- int rc = SQLITE_OK; /* Return code from subprocedures */
- BtShared *pBt; /* The main BTree structure */
- int mxCellPerPage; /* Maximum number of cells per page */
- u8 **apCell; /* All cells from pages being balanced */
- u16 *szCell; /* Local size of all cells */
- assert( pPage->pParent==0 );
- assert( pPage->nCell==0 );
- assert( sqlite3_mutex_held(pPage->pBt->mutex) );
- pBt = pPage->pBt;
- mxCellPerPage = MX_CELL(pBt);
- apCell = sqlite3_malloc( mxCellPerPage*(sizeof(u8*)+sizeof(u16)) );
- if( apCell==0 ) return SQLITE_NOMEM;
- szCell = (u16*)&apCell[mxCellPerPage];
- if( pPage->leaf ){
- /* The table is completely empty */
- TRACE(("BALANCE: empty table %dn", pPage->pgno));
- }else{
- /* The root page is empty but has one child. Transfer the
- ** information from that one child into the root page if it
- ** will fit. This reduces the depth of the tree by one.
- **
- ** If the root page is page 1, it has less space available than
- ** its child (due to the 100 byte header that occurs at the beginning
- ** of the database fle), so it might not be able to hold all of the
- ** information currently contained in the child. If this is the
- ** case, then do not do the transfer. Leave page 1 empty except
- ** for the right-pointer to the child page. The child page becomes
- ** the virtual root of the tree.
- */
- pgnoChild = get4byte(&pPage->aData[pPage->hdrOffset+8]);
- assert( pgnoChild>0 );
- assert( pgnoChild<=sqlite3PagerPagecount(pPage->pBt->pPager) );
- rc = sqlite3BtreeGetPage(pPage->pBt, pgnoChild, &pChild, 0);
- if( rc ) goto end_shallow_balance;
- if( pPage->pgno==1 ){
- rc = sqlite3BtreeInitPage(pChild, pPage);
- if( rc ) goto end_shallow_balance;
- assert( pChild->nOverflow==0 );
- if( pChild->nFree>=100 ){
- /* The child information will fit on the root page, so do the
- ** copy */
- int i;
- zeroPage(pPage, pChild->aData[0]);
- for(i=0; i<pChild->nCell; i++){
- apCell[i] = findCell(pChild,i);
- szCell[i] = cellSizePtr(pChild, apCell[i]);
- }
- assemblePage(pPage, pChild->nCell, apCell, szCell);
- /* Copy the right-pointer of the child to the parent. */
- put4byte(&pPage->aData[pPage->hdrOffset+8],
- get4byte(&pChild->aData[pChild->hdrOffset+8]));
- freePage(pChild);
- TRACE(("BALANCE: child %d transfer to page 1n", pChild->pgno));
- }else{
- /* The child has more information that will fit on the root.
- ** The tree is already balanced. Do nothing. */
- TRACE(("BALANCE: child %d will not fit on page 1n", pChild->pgno));
- }
- }else{
- memcpy(pPage->aData, pChild->aData, pPage->pBt->usableSize);
- pPage->isInit = 0;
- pPage->pParent = 0;
- rc = sqlite3BtreeInitPage(pPage, 0);
- assert( rc==SQLITE_OK );
- freePage(pChild);
- TRACE(("BALANCE: transfer child %d into root %dn",
- pChild->pgno, pPage->pgno));
- }
- rc = reparentChildPages(pPage);
- assert( pPage->nOverflow==0 );
- #ifndef SQLITE_OMIT_AUTOVACUUM
- if( pBt->autoVacuum ){
- int i;
- for(i=0; i<pPage->nCell; i++){
- rc = ptrmapPutOvfl(pPage, i);
- if( rc!=SQLITE_OK ){
- goto end_shallow_balance;
- }
- }
- }
- #endif
- releasePage(pChild);
- }
- end_shallow_balance:
- sqlite3_free(apCell);
- return rc;
- }
- /*
- ** The root page is overfull
- **
- ** When this happens, Create a new child page and copy the
- ** contents of the root into the child. Then make the root
- ** page an empty page with rightChild pointing to the new
- ** child. Finally, call balance_internal() on the new child
- ** to cause it to split.
- */
- static int balance_deeper(MemPage *pPage){
- int rc; /* Return value from subprocedures */
- MemPage *pChild; /* Pointer to a new child page */
- Pgno pgnoChild; /* Page number of the new child page */
- BtShared *pBt; /* The BTree */
- int usableSize; /* Total usable size of a page */
- u8 *data; /* Content of the parent page */
- u8 *cdata; /* Content of the child page */
- int hdr; /* Offset to page header in parent */
- int brk; /* Offset to content of first cell in parent */
- assert( pPage->pParent==0 );
- assert( pPage->nOverflow>0 );
- pBt = pPage->pBt;
- assert( sqlite3_mutex_held(pBt->mutex) );
- rc = allocateBtreePage(pBt, &pChild, &pgnoChild, pPage->pgno, 0);
- if( rc ) return rc;
- assert( sqlite3PagerIswriteable(pChild->pDbPage) );
- usableSize = pBt->usableSize;
- data = pPage->aData;
- hdr = pPage->hdrOffset;
- brk = get2byte(&data[hdr+5]);
- cdata = pChild->aData;
- memcpy(cdata, &data[hdr], pPage->cellOffset+2*pPage->nCell-hdr);
- memcpy(&cdata[brk], &data[brk], usableSize-brk);
- assert( pChild->isInit==0 );
- rc = sqlite3BtreeInitPage(pChild, pPage);
- if( rc ) goto balancedeeper_out;
- memcpy(pChild->aOvfl, pPage->aOvfl, pPage->nOverflow*sizeof(pPage->aOvfl[0]));
- pChild->nOverflow = pPage->nOverflow;
- if( pChild->nOverflow ){
- pChild->nFree = 0;
- }
- assert( pChild->nCell==pPage->nCell );
- zeroPage(pPage, pChild->aData[0] & ~PTF_LEAF);
- put4byte(&pPage->aData[pPage->hdrOffset+8], pgnoChild);
- TRACE(("BALANCE: copy root %d into %dn", pPage->pgno, pChild->pgno));
- #ifndef SQLITE_OMIT_AUTOVACUUM
- if( pBt->autoVacuum ){
- int i;
- rc = ptrmapPut(pBt, pChild->pgno, PTRMAP_BTREE, pPage->pgno);
- if( rc ) goto balancedeeper_out;
- for(i=0; i<pChild->nCell; i++){
- rc = ptrmapPutOvfl(pChild, i);
- if( rc!=SQLITE_OK ){
- return rc;
- }
- }
- }
- #endif
- rc = balance_nonroot(pChild);
- balancedeeper_out:
- releasePage(pChild);
- return rc;
- }
- /*
- ** Decide if the page pPage needs to be balanced. If balancing is
- ** required, call the appropriate balancing routine.
- */
- static int balance(MemPage *pPage, int insert){
- int rc = SQLITE_OK;
- assert( sqlite3_mutex_held(pPage->pBt->mutex) );
- if( pPage->pParent==0 ){
- rc = sqlite3PagerWrite(pPage->pDbPage);
- if( rc==SQLITE_OK && pPage->nOverflow>0 ){
- rc = balance_deeper(pPage);
- }
- if( rc==SQLITE_OK && pPage->nCell==0 ){
- rc = balance_shallower(pPage);
- }
- }else{
- if( pPage->nOverflow>0 ||
- (!insert && pPage->nFree>pPage->pBt->usableSize*2/3) ){
- rc = balance_nonroot(pPage);
- }
- }
- return rc;
- }
- /*
- ** This routine checks all cursors that point to table pgnoRoot.
- ** If any of those cursors were opened with wrFlag==0 in a different
- ** database connection (a database connection that shares the pager
- ** cache with the current connection) and that other connection
- ** is not in the ReadUncommmitted state, then this routine returns
- ** SQLITE_LOCKED.
- **
- ** In addition to checking for read-locks (where a read-lock
- ** means a cursor opened with wrFlag==0) this routine also moves
- ** all write cursors so that they are pointing to the
- ** first Cell on the root page. This is necessary because an insert
- ** or delete might change the number of cells on a page or delete
- ** a page entirely and we do not want to leave any cursors
- ** pointing to non-existant pages or cells.
- */
- static int checkReadLocks(Btree *pBtree, Pgno pgnoRoot, BtCursor *pExclude){
- BtCursor *p;
- BtShared *pBt = pBtree->pBt;
- sqlite3 *db = pBtree->db;
- assert( sqlite3BtreeHoldsMutex(pBtree) );
- for(p=pBt->pCursor; p; p=p->pNext){
- if( p==pExclude ) continue;
- if( p->eState!=CURSOR_VALID ) continue;
- if( p->pgnoRoot!=pgnoRoot ) continue;
- if( p->wrFlag==0 ){
- sqlite3 *dbOther = p->pBtree->db;
- if( dbOther==0 ||
- (dbOther!=db && (dbOther->flags & SQLITE_ReadUncommitted)==0) ){
- return SQLITE_LOCKED;
- }
- }else if( p->pPage->pgno!=p->pgnoRoot ){
- moveToRoot(p);
- }
- }
- return SQLITE_OK;
- }
- /*
- ** Make sure pBt->pTmpSpace points to an allocation of
- ** MX_CELL_SIZE(pBt) bytes.
- */
- static void allocateTempSpace(BtShared *pBt){
- if( !pBt->pTmpSpace ){
- pBt->pTmpSpace = sqlite3_malloc(MX_CELL_SIZE(pBt));
- }
- }
- /*
- ** Insert a new record into the BTree. The key is given by (pKey,nKey)
- ** and the data is given by (pData,nData). The cursor is used only to
- ** define what table the record should be inserted into. The cursor
- ** is left pointing at a random location.
- **
- ** For an INTKEY table, only the nKey value of the key is used. pKey is
- ** ignored. For a ZERODATA table, the pData and nData are both ignored.
- */
- int sqlite3BtreeInsert(
- BtCursor *pCur, /* Insert data into the table of this cursor */
- const void *pKey, i64 nKey, /* The key of the new record */
- const void *pData, int nData, /* The data of the new record */
- int nZero, /* Number of extra 0 bytes to append to data */
- int appendBias /* True if this is likely an append */
- ){
- int rc;
- int loc;
- int szNew;
- MemPage *pPage;
- Btree *p = pCur->pBtree;
- BtShared *pBt = p->pBt;
- unsigned char *oldCell;
- unsigned char *newCell = 0;
- assert( cursorHoldsMutex(pCur) );
- if( pBt->inTransaction!=TRANS_WRITE ){
- /* Must start a transaction before doing an insert */
- rc = pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
- return rc;
- }
- assert( !pBt->readOnly );
- if( !pCur->wrFlag ){
- return SQLITE_PERM; /* Cursor not open for writing */
- }
- if( checkReadLocks(pCur->pBtree, pCur->pgnoRoot, pCur) ){
- return SQLITE_LOCKED; /* The table pCur points to has a read lock */
- }
- if( pCur->eState==CURSOR_FAULT ){
- return pCur->skip;
- }
- /* Save the positions of any other cursors open on this table */
- clearCursorPosition(pCur);
- if(
- SQLITE_OK!=(rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur)) ||
- SQLITE_OK!=(rc = sqlite3BtreeMoveto(pCur, pKey, 0, nKey, appendBias, &loc))
- ){
- return rc;
- }
- pPage = pCur->pPage;
- assert( pPage->intKey || nKey>=0 );
- assert( pPage->leaf || !pPage->leafData );
- TRACE(("INSERT: table=%d nkey=%lld ndata=%d page=%d %sn",
- pCur->pgnoRoot, nKey, nData, pPage->pgno,
- loc==0 ? "overwrite" : "new entry"));
- assert( pPage->isInit );
- allocateTempSpace(pBt);
- newCell = pBt->pTmpSpace;
- if( newCell==0 ) return SQLITE_NOMEM;
- rc = fillInCell(pPage, newCell, pKey, nKey, pData, nData, nZero, &szNew);
- if( rc ) goto end_insert;
- assert( szNew==cellSizePtr(pPage, newCell) );
- assert( szNew<=MX_CELL_SIZE(pBt) );
- if( loc==0 && CURSOR_VALID==pCur->eState ){
- u16 szOld;
- assert( pCur->idx>=0 && pCur->idx<pPage->nCell );
- rc = sqlite3PagerWrite(pPage->pDbPage);
- if( rc ){
- goto end_insert;
- }
- oldCell = findCell(pPage, pCur->idx);
- if( !pPage->leaf ){
- memcpy(newCell, oldCell, 4);
- }
- szOld = cellSizePtr(pPage, oldCell);
- rc = clearCell(pPage, oldCell);
- if( rc ) goto end_insert;
- dropCell(pPage, pCur->idx, szOld);
- }else if( loc<0 && pPage->nCell>0 ){
- assert( pPage->leaf );
- pCur->idx++;
- pCur->info.nSize = 0;
- pCur->validNKey = 0;
- }else{
- assert( pPage->leaf );
- }
- rc = insertCell(pPage, pCur->idx, newCell, szNew, 0, 0);
- if( rc!=SQLITE_OK ) goto end_insert;
- rc = balance(pPage, 1);
- /* sqlite3BtreePageDump(pCur->pBt, pCur->pgnoRoot, 1); */
- /* fflush(stdout); */
- if( rc==SQLITE_OK ){
- moveToRoot(pCur);
- }
- end_insert:
- return rc;
- }
- /*
- ** Delete the entry that the cursor is pointing to. The cursor
- ** is left pointing at a random location.
- */
- int sqlite3BtreeDelete(BtCursor *pCur){
- MemPage *pPage = pCur->pPage;
- unsigned char *pCell;
- int rc;
- Pgno pgnoChild = 0;
- Btree *p = pCur->pBtree;
- BtShared *pBt = p->pBt;
- assert( cursorHoldsMutex(pCur) );
- assert( pPage->isInit );
- if( pBt->inTransaction!=TRANS_WRITE ){
- /* Must start a transaction before doing a delete */
- rc = pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
- return rc;
- }
- assert( !pBt->readOnly );
- if( pCur->eState==CURSOR_FAULT ){
- return pCur->skip;
- }
- if( pCur->idx >= pPage->nCell ){
- return SQLITE_ERROR; /* The cursor is not pointing to anything */
- }
- if( !pCur->wrFlag ){
- return SQLITE_PERM; /* Did not open this cursor for writing */
- }
- if( checkReadLocks(pCur->pBtree, pCur->pgnoRoot, pCur) ){
- return SQLITE_LOCKED; /* The table pCur points to has a read lock */
- }
- /* Restore the current cursor position (a no-op if the cursor is not in
- ** CURSOR_REQUIRESEEK state) and save the positions of any other cursors
- ** open on the same table. Then call sqlite3PagerWrite() on the page
- ** that the entry will be deleted from.
- */
- if(
- (rc = restoreOrClearCursorPosition(pCur))!=0 ||
- (rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur))!=0 ||
- (rc = sqlite3PagerWrite(pPage->pDbPage))!=0
- ){
- return rc;
- }
- /* Locate the cell within its page and leave pCell pointing to the
- ** data. The clearCell() call frees any overflow pages associated with the
- ** cell. The cell itself is still intact.
- */
- pCell = findCell(pPage, pCur->idx);
- if( !pPage->leaf ){
- pgnoChild = get4byte(pCell);
- }
- rc = clearCell(pPage, pCell);
- if( rc ){
- return rc;
- }
- if( !pPage->leaf ){
- /*
- ** The entry we are about to delete is not a leaf so if we do not
- ** do something we will leave a hole on an internal page.
- ** We have to fill the hole by moving in a cell from a leaf. The
- ** next Cell after the one to be deleted is guaranteed to exist and
- ** to be a leaf so we can use it.
- */
- BtCursor leafCur;
- unsigned char *pNext;
- int notUsed;
- unsigned char *tempCell = 0;
- assert( !pPage->leafData );
- sqlite3BtreeGetTempCursor(pCur, &leafCur);
- rc = sqlite3BtreeNext(&leafCur, ¬Used);
- if( rc==SQLITE_OK ){
- rc = sqlite3PagerWrite(leafCur.pPage->pDbPage);
- }
- if( rc==SQLITE_OK ){
- u16 szNext;
- TRACE(("DELETE: table=%d delete internal from %d replace from leaf %dn",
- pCur->pgnoRoot, pPage->pgno, leafCur.pPage->pgno));
- dropCell(pPage, pCur->idx, cellSizePtr(pPage, pCell));
- pNext = findCell(leafCur.pPage, leafCur.idx);
- szNext = cellSizePtr(leafCur.pPage, pNext);
- assert( MX_CELL_SIZE(pBt)>=szNext+4 );
- allocateTempSpace(pBt);
- tempCell = pBt->pTmpSpace;
- if( tempCell==0 ){
- rc = SQLITE_NOMEM;
- }
- if( rc==SQLITE_OK ){
- rc = insertCell(pPage, pCur->idx, pNext-4, szNext+4, tempCell, 0);
- }
- if( rc==SQLITE_OK ){
- put4byte(findOverflowCell(pPage, pCur->idx), pgnoChild);
- rc = balance(pPage, 0);
- }
- if( rc==SQLITE_OK ){
- dropCell(leafCur.pPage, leafCur.idx, szNext);
- rc = balance(leafCur.pPage, 0);
- }
- }
- sqlite3BtreeReleaseTempCursor(&leafCur);
- }else{
- TRACE(("DELETE: table=%d delete from leaf %dn",
- pCur->pgnoRoot, pPage->pgno));
- dropCell(pPage, pCur->idx, cellSizePtr(pPage, pCell));
- rc = balance(pPage, 0);
- }
- if( rc==SQLITE_OK ){
- moveToRoot(pCur);
- }
- return rc;
- }
- /*
- ** Create a new BTree table. Write into *piTable the page
- ** number for the root page of the new table.
- **
- ** The type of type is determined by the flags parameter. Only the
- ** following values of flags are currently in use. Other values for
- ** flags might not work:
- **
- ** BTREE_INTKEY|BTREE_LEAFDATA Used for SQL tables with rowid keys
- ** BTREE_ZERODATA Used for SQL indices
- */
- static int btreeCreateTable(Btree *p, int *piTable, int flags){
- BtShared *pBt = p->pBt;
- MemPage *pRoot;
- Pgno pgnoRoot;
- int rc;
- assert( sqlite3BtreeHoldsMutex(p) );
- if( pBt->inTransaction!=TRANS_WRITE ){
- /* Must start a transaction first */
- rc = pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
- return rc;
- }
- assert( !pBt->readOnly );
- #ifdef SQLITE_OMIT_AUTOVACUUM
- rc = allocateBtreePage(pBt, &pRoot, &pgnoRoot, 1, 0);
- if( rc ){
- return rc;
- }
- #else
- if( pBt->autoVacuum ){
- Pgno pgnoMove; /* Move a page here to make room for the root-page */
- MemPage *pPageMove; /* The page to move to. */
- /* Creating a new table may probably require moving an existing database
- ** to make room for the new tables root page. In case this page turns
- ** out to be an overflow page, delete all overflow page-map caches
- ** held by open cursors.
- */
- invalidateAllOverflowCache(pBt);
- /* Read the value of meta[3] from the database to determine where the
- ** root page of the new table should go. meta[3] is the largest root-page
- ** created so far, so the new root-page is (meta[3]+1).
- */
- rc = sqlite3BtreeGetMeta(p, 4, &pgnoRoot);
- if( rc!=SQLITE_OK ){
- return rc;
- }
- pgnoRoot++;
- /* The new root-page may not be allocated on a pointer-map page, or the
- ** PENDING_BYTE page.
- */
- while( pgnoRoot==PTRMAP_PAGENO(pBt, pgnoRoot) ||
- pgnoRoot==PENDING_BYTE_PAGE(pBt) ){
- pgnoRoot++;
- }
- assert( pgnoRoot>=3 );
- /* Allocate a page. The page that currently resides at pgnoRoot will
- ** be moved to the allocated page (unless the allocated page happens
- ** to reside at pgnoRoot).
- */
- rc = allocateBtreePage(pBt, &pPageMove, &pgnoMove, pgnoRoot, 1);
- if( rc!=SQLITE_OK ){
- return rc;
- }
- if( pgnoMove!=pgnoRoot ){
- /* pgnoRoot is the page that will be used for the root-page of
- ** the new table (assuming an error did not occur). But we were
- ** allocated pgnoMove. If required (i.e. if it was not allocated
- ** by extending the file), the current page at position pgnoMove
- ** is already journaled.
- */
- u8 eType;
- Pgno iPtrPage;
- releasePage(pPageMove);
- /* Move the page currently at pgnoRoot to pgnoMove. */
- rc = sqlite3BtreeGetPage(pBt, pgnoRoot, &pRoot, 0);
- if( rc!=SQLITE_OK ){
- return rc;
- }
- rc = ptrmapGet(pBt, pgnoRoot, &eType, &iPtrPage);
- if( rc!=SQLITE_OK || eType==PTRMAP_ROOTPAGE || eType==PTRMAP_FREEPAGE ){
- releasePage(pRoot);
- return rc;
- }
- assert( eType!=PTRMAP_ROOTPAGE );
- assert( eType!=PTRMAP_FREEPAGE );
- rc = sqlite3PagerWrite(pRoot->pDbPage);
- if( rc!=SQLITE_OK ){
- releasePage(pRoot);
- return rc;
- }
- rc = relocatePage(pBt, pRoot, eType, iPtrPage, pgnoMove);
- releasePage(pRoot);
- /* Obtain the page at pgnoRoot */
- if( rc!=SQLITE_OK ){
- return rc;
- }
- rc = sqlite3BtreeGetPage(pBt, pgnoRoot, &pRoot, 0);
- if( rc!=SQLITE_OK ){
- return rc;
- }
- rc = sqlite3PagerWrite(pRoot->pDbPage);
- if( rc!=SQLITE_OK ){
- releasePage(pRoot);
- return rc;
- }
- }else{
- pRoot = pPageMove;
- }
- /* Update the pointer-map and meta-data with the new root-page number. */
- rc = ptrmapPut(pBt, pgnoRoot, PTRMAP_ROOTPAGE, 0);
- if( rc ){
- releasePage(pRoot);
- return rc;
- }
- rc = sqlite3BtreeUpdateMeta(p, 4, pgnoRoot);
- if( rc ){
- releasePage(pRoot);
- return rc;
- }
- }else{
- rc = allocateBtreePage(pBt, &pRoot, &pgnoRoot, 1, 0);
- if( rc ) return rc;
- }
- #endif
- assert( sqlite3PagerIswriteable(pRoot->pDbPage) );
- zeroPage(pRoot, flags | PTF_LEAF);
- sqlite3PagerUnref(pRoot->pDbPage);
- *piTable = (int)pgnoRoot;
- return SQLITE_OK;
- }
- int sqlite3BtreeCreateTable(Btree *p, int *piTable, int flags){
- int rc;
- sqlite3BtreeEnter(p);
- p->pBt->db = p->db;
- rc = btreeCreateTable(p, piTable, flags);
- sqlite3BtreeLeave(p);
- return rc;
- }
- /*
- ** Erase the given database page and all its children. Return
- ** the page to the freelist.
- */
- static int clearDatabasePage(
- BtShared *pBt, /* The BTree that contains the table */
- Pgno pgno, /* Page number to clear */
- MemPage *pParent, /* Parent page. NULL for the root */
- int freePageFlag /* Deallocate page if true */
- ){
- MemPage *pPage = 0;
- int rc;
- unsigned char *pCell;
- int i;
- assert( sqlite3_mutex_held(pBt->mutex) );
- if( pgno>sqlite3PagerPagecount(pBt->pPager) ){
- return SQLITE_CORRUPT_BKPT;
- }
- rc = getAndInitPage(pBt, pgno, &pPage, pParent);
- if( rc ) goto cleardatabasepage_out;
- for(i=0; i<pPage->nCell; i++){
- pCell = findCell(pPage, i);
- if( !pPage->leaf ){
- rc = clearDatabasePage(pBt, get4byte(pCell), pPage->pParent, 1);
- if( rc ) goto cleardatabasepage_out;
- }
- rc = clearCell(pPage, pCell);
- if( rc ) goto cleardatabasepage_out;
- }
- if( !pPage->leaf ){
- rc = clearDatabasePage(pBt, get4byte(&pPage->aData[8]), pPage->pParent, 1);
- if( rc ) goto cleardatabasepage_out;
- }
- if( freePageFlag ){
- rc = freePage(pPage);
- }else if( (rc = sqlite3PagerWrite(pPage->pDbPage))==0 ){
- zeroPage(pPage, pPage->aData[0] | PTF_LEAF);
- }
- cleardatabasepage_out:
- releasePage(pPage);
- return rc;
- }
- /*
- ** Delete all information from a single table in the database. iTable is
- ** the page number of the root of the table. After this routine returns,
- ** the root page is empty, but still exists.
- **
- ** This routine will fail with SQLITE_LOCKED if there are any open
- ** read cursors on the table. Open write cursors are moved to the
- ** root of the table.
- */
- int sqlite3BtreeClearTable(Btree *p, int iTable){
- int rc;
- BtShared *pBt = p->pBt;
- sqlite3BtreeEnter(p);
- pBt->db = p->db;
- if( p->inTrans!=TRANS_WRITE ){
- rc = pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
- }else if( (rc = checkReadLocks(p, iTable, 0))!=SQLITE_OK ){
- /* nothing to do */
- }else if( SQLITE_OK!=(rc = saveAllCursors(pBt, iTable, 0)) ){
- /* nothing to do */
- }else{
- rc = clearDatabasePage(pBt, (Pgno)iTable, 0, 0);
- }
- sqlite3BtreeLeave(p);
- return rc;
- }
- /*
- ** Erase all information in a table and add the root of the table to
- ** the freelist. Except, the root of the principle table (the one on
- ** page 1) is never added to the freelist.
- **
- ** This routine will fail with SQLITE_LOCKED if there are any open
- ** cursors on the table.
- **
- ** If AUTOVACUUM is enabled and the page at iTable is not the last
- ** root page in the database file, then the last root page
- ** in the database file is moved into the slot formerly occupied by
- ** iTable and that last slot formerly occupied by the last root page
- ** is added to the freelist instead of iTable. In this say, all
- ** root pages are kept at the beginning of the database file, which
- ** is necessary for AUTOVACUUM to work right. *piMoved is set to the
- ** page number that used to be the last root page in the file before
- ** the move. If no page gets moved, *piMoved is set to 0.
- ** The last root page is recorded in meta[3] and the value of
- ** meta[3] is updated by this procedure.
- */
- static int btreeDropTable(Btree *p, int iTable, int *piMoved){
- int rc;
- MemPage *pPage = 0;
- BtShared *pBt = p->pBt;
- assert( sqlite3BtreeHoldsMutex(p) );
- if( p->inTrans!=TRANS_WRITE ){
- return pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
- }
- /* It is illegal to drop a table if any cursors are open on the
- ** database. This is because in auto-vacuum mode the backend may
- ** need to move another root-page to fill a gap left by the deleted
- ** root page. If an open cursor was using this page a problem would
- ** occur.
- */
- if( pBt->pCursor ){
- return SQLITE_LOCKED;
- }
- rc = sqlite3BtreeGetPage(pBt, (Pgno)iTable, &pPage, 0);
- if( rc ) return rc;
- rc = sqlite3BtreeClearTable(p, iTable);
- if( rc ){
- releasePage(pPage);
- return rc;
- }
- *piMoved = 0;
- if( iTable>1 ){
- #ifdef SQLITE_OMIT_AUTOVACUUM
- rc = freePage(pPage);
- releasePage(pPage);
- #else
- if( pBt->autoVacuum ){
- Pgno maxRootPgno;
- rc = sqlite3BtreeGetMeta(p, 4, &maxRootPgno);
- if( rc!=SQLITE_OK ){
- releasePage(pPage);
- return rc;
- }
- if( iTable==maxRootPgno ){
- /* If the table being dropped is the table with the largest root-page
- ** number in the database, put the root page on the free list.
- */
- rc = freePage(pPage);
- releasePage(pPage);
- if( rc!=SQLITE_OK ){
- return rc;
- }
- }else{
- /* The table being dropped does not have the largest root-page
- ** number in the database. So move the page that does into the
- ** gap left by the deleted root-page.
- */
- MemPage *pMove;
- releasePage(pPage);
- rc = sqlite3BtreeGetPage(pBt, maxRootPgno, &pMove, 0);
- if( rc!=SQLITE_OK ){
- return rc;
- }
- rc = relocatePage(pBt, pMove, PTRMAP_ROOTPAGE, 0, iTable);
- releasePage(pMove);
- if( rc!=SQLITE_OK ){
- return rc;
- }
- rc = sqlite3BtreeGetPage(pBt, maxRootPgno, &pMove, 0);
- if( rc!=SQLITE_OK ){
- return rc;
- }
- rc = freePage(pMove);
- releasePage(pMove);
- if( rc!=SQLITE_OK ){
- return rc;
- }
- *piMoved = maxRootPgno;
- }
- /* Set the new 'max-root-page' value in the database header. This
- ** is the old value less one, less one more if that happens to
- ** be a root-page number, less one again if that is the
- ** PENDING_BYTE_PAGE.
- */
- maxRootPgno--;
- if( maxRootPgno==PENDING_BYTE_PAGE(pBt) ){
- maxRootPgno--;
- }
- if( maxRootPgno==PTRMAP_PAGENO(pBt, maxRootPgno) ){
- maxRootPgno--;
- }
- assert( maxRootPgno!=PENDING_BYTE_PAGE(pBt) );
- rc = sqlite3BtreeUpdateMeta(p, 4, maxRootPgno);
- }else{
- rc = freePage(pPage);
- releasePage(pPage);
- }
- #endif
- }else{
- /* If sqlite3BtreeDropTable was called on page 1. */
- zeroPage(pPage, PTF_INTKEY|PTF_LEAF );
- releasePage(pPage);
- }
- return rc;
- }
- int sqlite3BtreeDropTable(Btree *p, int iTable, int *piMoved){
- int rc;
- sqlite3BtreeEnter(p);
- p->pBt->db = p->db;
- rc = btreeDropTable(p, iTable, piMoved);
- sqlite3BtreeLeave(p);
- return rc;
- }
- /*
- ** Read the meta-information out of a database file. Meta[0]
- ** is the number of free pages currently in the database. Meta[1]
- ** through meta[15] are available for use by higher layers. Meta[0]
- ** is read-only, the others are read/write.
- **
- ** The schema layer numbers meta values differently. At the schema
- ** layer (and the SetCookie and ReadCookie opcodes) the number of
- ** free pages is not visible. So Cookie[0] is the same as Meta[1].
- */
- int sqlite3BtreeGetMeta(Btree *p, int idx, u32 *pMeta){
- DbPage *pDbPage;
- int rc;
- unsigned char *pP1;
- BtShared *pBt = p->pBt;
- sqlite3BtreeEnter(p);
- pBt->db = p->db;
- /* Reading a meta-data value requires a read-lock on page 1 (and hence
- ** the sqlite_master table. We grab this lock regardless of whether or
- ** not the SQLITE_ReadUncommitted flag is set (the table rooted at page
- ** 1 is treated as a special case by queryTableLock() and lockTable()).
- */
- rc = queryTableLock(p, 1, READ_LOCK);
- if( rc!=SQLITE_OK ){
- sqlite3BtreeLeave(p);
- return rc;
- }
- assert( idx>=0 && idx<=15 );
- rc = sqlite3PagerGet(pBt->pPager, 1, &pDbPage);
- if( rc ){
- sqlite3BtreeLeave(p);
- return rc;
- }
- pP1 = (unsigned char *)sqlite3PagerGetData(pDbPage);
- *pMeta = get4byte(&pP1[36 + idx*4]);
- sqlite3PagerUnref(pDbPage);
- /* If autovacuumed is disabled in this build but we are trying to
- ** access an autovacuumed database, then make the database readonly.
- */
- #ifdef SQLITE_OMIT_AUTOVACUUM
- if( idx==4 && *pMeta>0 ) pBt->readOnly = 1;
- #endif
- /* Grab the read-lock on page 1. */
- rc = lockTable(p, 1, READ_LOCK);
- sqlite3BtreeLeave(p);
- return rc;
- }
- /*
- ** Write meta-information back into the database. Meta[0] is
- ** read-only and may not be written.
- */
- int sqlite3BtreeUpdateMeta(Btree *p, int idx, u32 iMeta){
- BtShared *pBt = p->pBt;
- unsigned char *pP1;
- int rc;
- assert( idx>=1 && idx<=15 );
- sqlite3BtreeEnter(p);
- pBt->db = p->db;
- if( p->inTrans!=TRANS_WRITE ){
- rc = pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
- }else{
- assert( pBt->pPage1!=0 );
- pP1 = pBt->pPage1->aData;
- rc = sqlite3PagerWrite(pBt->pPage1->pDbPage);
- if( rc==SQLITE_OK ){
- put4byte(&pP1[36 + idx*4], iMeta);
- #ifndef SQLITE_OMIT_AUTOVACUUM
- if( idx==7 ){
- assert( pBt->autoVacuum || iMeta==0 );
- assert( iMeta==0 || iMeta==1 );
- pBt->incrVacuum = iMeta;
- }
- #endif
- }
- }
- sqlite3BtreeLeave(p);
- return rc;
- }
- /*
- ** Return the flag byte at the beginning of the page that the cursor
- ** is currently pointing to.
- */
- int sqlite3BtreeFlags(BtCursor *pCur){
- /* TODO: What about CURSOR_REQUIRESEEK state? Probably need to call
- ** restoreOrClearCursorPosition() here.
- */
- MemPage *pPage;
- restoreOrClearCursorPosition(pCur);
- pPage = pCur->pPage;
- assert( cursorHoldsMutex(pCur) );
- assert( pPage->pBt==pCur->pBt );
- return pPage ? pPage->aData[pPage->hdrOffset] : 0;
- }
- /*
- ** Return the pager associated with a BTree. This routine is used for
- ** testing and debugging only.
- */
- Pager *sqlite3BtreePager(Btree *p){
- return p->pBt->pPager;
- }
- #ifndef SQLITE_OMIT_INTEGRITY_CHECK
- /*
- ** Append a message to the error message string.
- */
- static void checkAppendMsg(
- IntegrityCk *pCheck,
- char *zMsg1,
- const char *zFormat,
- ...
- ){
- va_list ap;
- char *zMsg2;
- if( !pCheck->mxErr ) return;
- pCheck->mxErr--;
- pCheck->nErr++;
- va_start(ap, zFormat);
- zMsg2 = sqlite3VMPrintf(0, zFormat, ap);
- va_end(ap);
- if( zMsg1==0 ) zMsg1 = "";
- if( pCheck->zErrMsg ){
- char *zOld = pCheck->zErrMsg;
- pCheck->zErrMsg = 0;
- sqlite3SetString(&pCheck->zErrMsg, zOld, "n", zMsg1, zMsg2, (char*)0);
- sqlite3_free(zOld);
- }else{
- sqlite3SetString(&pCheck->zErrMsg, zMsg1, zMsg2, (char*)0);
- }
- sqlite3_free(zMsg2);
- }
- #endif /* SQLITE_OMIT_INTEGRITY_CHECK */
- #ifndef SQLITE_OMIT_INTEGRITY_CHECK
- /*
- ** Add 1 to the reference count for page iPage. If this is the second
- ** reference to the page, add an error message to pCheck->zErrMsg.
- ** Return 1 if there are 2 ore more references to the page and 0 if
- ** if this is the first reference to the page.
- **
- ** Also check that the page number is in bounds.
- */
- static int checkRef(IntegrityCk *pCheck, int iPage, char *zContext){
- if( iPage==0 ) return 1;
- if( iPage>pCheck->nPage || iPage<0 ){
- checkAppendMsg(pCheck, zContext, "invalid page number %d", iPage);
- return 1;
- }
- if( pCheck->anRef[iPage]==1 ){
- checkAppendMsg(pCheck, zContext, "2nd reference to page %d", iPage);
- return 1;
- }
- return (pCheck->anRef[iPage]++)>1;
- }
- #ifndef SQLITE_OMIT_AUTOVACUUM
- /*
- ** Check that the entry in the pointer-map for page iChild maps to
- ** page iParent, pointer type ptrType. If not, append an error message
- ** to pCheck.
- */
- static void checkPtrmap(
- IntegrityCk *pCheck, /* Integrity check context */
- Pgno iChild, /* Child page number */
- u8 eType, /* Expected pointer map type */
- Pgno iParent, /* Expected pointer map parent page number */
- char *zContext /* Context description (used for error msg) */
- ){
- int rc;
- u8 ePtrmapType;
- Pgno iPtrmapParent;
- rc = ptrmapGet(pCheck->pBt, iChild, &ePtrmapType, &iPtrmapParent);
- if( rc!=SQLITE_OK ){
- checkAppendMsg(pCheck, zContext, "Failed to read ptrmap key=%d", iChild);
- return;
- }
- if( ePtrmapType!=eType || iPtrmapParent!=iParent ){
- checkAppendMsg(pCheck, zContext,
- "Bad ptr map entry key=%d expected=(%d,%d) got=(%d,%d)",
- iChild, eType, iParent, ePtrmapType, iPtrmapParent);
- }
- }
- #endif
- /*
- ** Check the integrity of the freelist or of an overflow page list.
- ** Verify that the number of pages on the list is N.
- */
- static void checkList(
- IntegrityCk *pCheck, /* Integrity checking context */
- int isFreeList, /* True for a freelist. False for overflow page list */
- int iPage, /* Page number for first page in the list */
- int N, /* Expected number of pages in the list */
- char *zContext /* Context for error messages */
- ){
- int i;
- int expected = N;
- int iFirst = iPage;
- while( N-- > 0 && pCheck->mxErr ){
- DbPage *pOvflPage;
- unsigned char *pOvflData;
- if( iPage<1 ){
- checkAppendMsg(pCheck, zContext,
- "%d of %d pages missing from overflow list starting at %d",
- N+1, expected, iFirst);
- break;
- }
- if( checkRef(pCheck, iPage, zContext) ) break;
- if( sqlite3PagerGet(pCheck->pPager, (Pgno)iPage, &pOvflPage) ){
- checkAppendMsg(pCheck, zContext, "failed to get page %d", iPage);
- break;
- }
- pOvflData = (unsigned char *)sqlite3PagerGetData(pOvflPage);
- if( isFreeList ){
- int n = get4byte(&pOvflData[4]);
- #ifndef SQLITE_OMIT_AUTOVACUUM
- if( pCheck->pBt->autoVacuum ){
- checkPtrmap(pCheck, iPage, PTRMAP_FREEPAGE, 0, zContext);
- }
- #endif
- if( n>pCheck->pBt->usableSize/4-8 ){
- checkAppendMsg(pCheck, zContext,
- "freelist leaf count too big on page %d", iPage);
- N--;
- }else{
- for(i=0; i<n; i++){
- Pgno iFreePage = get4byte(&pOvflData[8+i*4]);
- #ifndef SQLITE_OMIT_AUTOVACUUM
- if( pCheck->pBt->autoVacuum ){
- checkPtrmap(pCheck, iFreePage, PTRMAP_FREEPAGE, 0, zContext);
- }
- #endif
- checkRef(pCheck, iFreePage, zContext);
- }
- N -= n;
- }
- }
- #ifndef SQLITE_OMIT_AUTOVACUUM
- else{
- /* If this database supports auto-vacuum and iPage is not the last
- ** page in this overflow list, check that the pointer-map entry for
- ** the following page matches iPage.
- */
- if( pCheck->pBt->autoVacuum && N>0 ){
- i = get4byte(pOvflData);
- checkPtrmap(pCheck, i, PTRMAP_OVERFLOW2, iPage, zContext);
- }
- }
- #endif
- iPage = get4byte(pOvflData);
- sqlite3PagerUnref(pOvflPage);
- }
- }
- #endif /* SQLITE_OMIT_INTEGRITY_CHECK */
- #ifndef SQLITE_OMIT_INTEGRITY_CHECK
- /*
- ** Do various sanity checks on a single page of a tree. Return
- ** the tree depth. Root pages return 0. Parents of root pages
- ** return 1, and so forth.
- **
- ** These checks are done:
- **
- ** 1. Make sure that cells and freeblocks do not overlap
- ** but combine to completely cover the page.
- ** NO 2. Make sure cell keys are in order.
- ** NO 3. Make sure no key is less than or equal to zLowerBound.
- ** NO 4. Make sure no key is greater than or equal to zUpperBound.
- ** 5. Check the integrity of overflow pages.
- ** 6. Recursively call checkTreePage on all children.
- ** 7. Verify that the depth of all children is the same.
- ** 8. Make sure this page is at least 33% full or else it is
- ** the root of the tree.
- */
- static int checkTreePage(
- IntegrityCk *pCheck, /* Context for the sanity check */
- int iPage, /* Page number of the page to check */
- MemPage *pParent, /* Parent page */
- char *zParentContext /* Parent context */
- ){
- MemPage *pPage;
- int i, rc, depth, d2, pgno, cnt;
- int hdr, cellStart;
- int nCell;
- u8 *data;
- BtShared *pBt;
- int usableSize;
- char zContext[100];
- char *hit;
- sqlite3_snprintf(sizeof(zContext), zContext, "Page %d: ", iPage);
- /* Check that the page exists
- */
- pBt = pCheck->pBt;
- usableSize = pBt->usableSize;
- if( iPage==0 ) return 0;
- if( checkRef(pCheck, iPage, zParentContext) ) return 0;
- if( (rc = sqlite3BtreeGetPage(pBt, (Pgno)iPage, &pPage, 0))!=0 ){
- checkAppendMsg(pCheck, zContext,
- "unable to get the page. error code=%d", rc);
- return 0;
- }
- if( (rc = sqlite3BtreeInitPage(pPage, pParent))!=0 ){
- checkAppendMsg(pCheck, zContext,
- "sqlite3BtreeInitPage() returns error code %d", rc);
- releasePage(pPage);
- return 0;
- }
- /* Check out all the cells.
- */
- depth = 0;
- for(i=0; i<pPage->nCell && pCheck->mxErr; i++){
- u8 *pCell;
- int sz;
- CellInfo info;
- /* Check payload overflow pages
- */
- sqlite3_snprintf(sizeof(zContext), zContext,
- "On tree page %d cell %d: ", iPage, i);
- pCell = findCell(pPage,i);
- sqlite3BtreeParseCellPtr(pPage, pCell, &info);
- sz = info.nData;
- if( !pPage->intKey ) sz += info.nKey;
- assert( sz==info.nPayload );
- if( sz>info.nLocal ){
- int nPage = (sz - info.nLocal + usableSize - 5)/(usableSize - 4);
- Pgno pgnoOvfl = get4byte(&pCell[info.iOverflow]);
- #ifndef SQLITE_OMIT_AUTOVACUUM
- if( pBt->autoVacuum ){
- checkPtrmap(pCheck, pgnoOvfl, PTRMAP_OVERFLOW1, iPage, zContext);
- }
- #endif
- checkList(pCheck, 0, pgnoOvfl, nPage, zContext);
- }
- /* Check sanity of left child page.
- */
- if( !pPage->leaf ){
- pgno = get4byte(pCell);
- #ifndef SQLITE_OMIT_AUTOVACUUM
- if( pBt->autoVacuum ){
- checkPtrmap(pCheck, pgno, PTRMAP_BTREE, iPage, zContext);
- }
- #endif
- d2 = checkTreePage(pCheck,pgno,pPage,zContext);
- if( i>0 && d2!=depth ){
- checkAppendMsg(pCheck, zContext, "Child page depth differs");
- }
- depth = d2;
- }
- }
- if( !pPage->leaf ){
- pgno = get4byte(&pPage->aData[pPage->hdrOffset+8]);
- sqlite3_snprintf(sizeof(zContext), zContext,
- "On page %d at right child: ", iPage);
- #ifndef SQLITE_OMIT_AUTOVACUUM
- if( pBt->autoVacuum ){
- checkPtrmap(pCheck, pgno, PTRMAP_BTREE, iPage, 0);
- }
- #endif
- checkTreePage(pCheck, pgno, pPage, zContext);
- }
-
- /* Check for complete coverage of the page
- */
- data = pPage->aData;
- hdr = pPage->hdrOffset;
- hit = sqlite3MallocZero( usableSize );
- if( hit ){
- memset(hit, 1, get2byte(&data[hdr+5]));
- nCell = get2byte(&data[hdr+3]);
- cellStart = hdr + 12 - 4*pPage->leaf;
- for(i=0; i<nCell; i++){
- int pc = get2byte(&data[cellStart+i*2]);
- u16 size = cellSizePtr(pPage, &data[pc]);
- int j;
- if( (pc+size-1)>=usableSize || pc<0 ){
- checkAppendMsg(pCheck, 0,
- "Corruption detected in cell %d on page %d",i,iPage,0);
- }else{
- for(j=pc+size-1; j>=pc; j--) hit[j]++;
- }
- }
- for(cnt=0, i=get2byte(&data[hdr+1]); i>0 && i<usableSize && cnt<10000;
- cnt++){
- int size = get2byte(&data[i+2]);
- int j;
- if( (i+size-1)>=usableSize || i<0 ){
- checkAppendMsg(pCheck, 0,
- "Corruption detected in cell %d on page %d",i,iPage,0);
- }else{
- for(j=i+size-1; j>=i; j--) hit[j]++;
- }
- i = get2byte(&data[i]);
- }
- for(i=cnt=0; i<usableSize; i++){
- if( hit[i]==0 ){
- cnt++;
- }else if( hit[i]>1 ){
- checkAppendMsg(pCheck, 0,
- "Multiple uses for byte %d of page %d", i, iPage);
- break;
- }
- }
- if( cnt!=data[hdr+7] ){
- checkAppendMsg(pCheck, 0,
- "Fragmented space is %d byte reported as %d on page %d",
- cnt, data[hdr+7], iPage);
- }
- }
- sqlite3_free(hit);
- releasePage(pPage);
- return depth+1;
- }
- #endif /* SQLITE_OMIT_INTEGRITY_CHECK */
- #ifndef SQLITE_OMIT_INTEGRITY_CHECK
- /*
- ** This routine does a complete check of the given BTree file. aRoot[] is
- ** an array of pages numbers were each page number is the root page of
- ** a table. nRoot is the number of entries in aRoot.
- **
- ** If everything checks out, this routine returns NULL. If something is
- ** amiss, an error message is written into memory obtained from malloc()
- ** and a pointer to that error message is returned. The calling function
- ** is responsible for freeing the error message when it is done.
- */
- char *sqlite3BtreeIntegrityCheck(
- Btree *p, /* The btree to be checked */
- int *aRoot, /* An array of root pages numbers for individual trees */
- int nRoot, /* Number of entries in aRoot[] */
- int mxErr, /* Stop reporting errors after this many */
- int *pnErr /* Write number of errors seen to this variable */
- ){
- int i;
- int nRef;
- IntegrityCk sCheck;
- BtShared *pBt = p->pBt;
- sqlite3BtreeEnter(p);
- pBt->db = p->db;
- nRef = sqlite3PagerRefcount(pBt->pPager);
- if( lockBtreeWithRetry(p)!=SQLITE_OK ){
- sqlite3BtreeLeave(p);
- return sqlite3StrDup("Unable to acquire a read lock on the database");
- }
- sCheck.pBt = pBt;
- sCheck.pPager = pBt->pPager;
- sCheck.nPage = sqlite3PagerPagecount(sCheck.pPager);
- sCheck.mxErr = mxErr;
- sCheck.nErr = 0;
- *pnErr = 0;
- #ifndef SQLITE_OMIT_AUTOVACUUM
- if( pBt->nTrunc!=0 ){
- sCheck.nPage = pBt->nTrunc;
- }
- #endif
- if( sCheck.nPage==0 ){
- unlockBtreeIfUnused(pBt);
- sqlite3BtreeLeave(p);
- return 0;
- }
- sCheck.anRef = sqlite3_malloc( (sCheck.nPage+1)*sizeof(sCheck.anRef[0]) );
- if( !sCheck.anRef ){
- unlockBtreeIfUnused(pBt);
- *pnErr = 1;
- sqlite3BtreeLeave(p);
- return sqlite3MPrintf(p->db, "Unable to malloc %d bytes",
- (sCheck.nPage+1)*sizeof(sCheck.anRef[0]));
- }
- for(i=0; i<=sCheck.nPage; i++){ sCheck.anRef[i] = 0; }
- i = PENDING_BYTE_PAGE(pBt);
- if( i<=sCheck.nPage ){
- sCheck.anRef[i] = 1;
- }
- sCheck.zErrMsg = 0;
- /* Check the integrity of the freelist
- */
- checkList(&sCheck, 1, get4byte(&pBt->pPage1->aData[32]),
- get4byte(&pBt->pPage1->aData[36]), "Main freelist: ");
- /* Check all the tables.
- */
- for(i=0; i<nRoot && sCheck.mxErr; i++){
- if( aRoot[i]==0 ) continue;
- #ifndef SQLITE_OMIT_AUTOVACUUM
- if( pBt->autoVacuum && aRoot[i]>1 ){
- checkPtrmap(&sCheck, aRoot[i], PTRMAP_ROOTPAGE, 0, 0);
- }
- #endif
- checkTreePage(&sCheck, aRoot[i], 0, "List of tree roots: ");
- }
- /* Make sure every page in the file is referenced
- */
- for(i=1; i<=sCheck.nPage && sCheck.mxErr; i++){
- #ifdef SQLITE_OMIT_AUTOVACUUM
- if( sCheck.anRef[i]==0 ){
- checkAppendMsg(&sCheck, 0, "Page %d is never used", i);
- }
- #else
- /* If the database supports auto-vacuum, make sure no tables contain
- ** references to pointer-map pages.
- */
- if( sCheck.anRef[i]==0 &&
- (PTRMAP_PAGENO(pBt, i)!=i || !pBt->autoVacuum) ){
- checkAppendMsg(&sCheck, 0, "Page %d is never used", i);
- }
- if( sCheck.anRef[i]!=0 &&
- (PTRMAP_PAGENO(pBt, i)==i && pBt->autoVacuum) ){
- checkAppendMsg(&sCheck, 0, "Pointer map page %d is referenced", i);
- }
- #endif
- }
- /* Make sure this analysis did not leave any unref() pages
- */
- unlockBtreeIfUnused(pBt);
- if( nRef != sqlite3PagerRefcount(pBt->pPager) ){
- checkAppendMsg(&sCheck, 0,
- "Outstanding page count goes from %d to %d during this analysis",
- nRef, sqlite3PagerRefcount(pBt->pPager)
- );
- }
- /* Clean up and report errors.
- */
- sqlite3BtreeLeave(p);
- sqlite3_free(sCheck.anRef);
- *pnErr = sCheck.nErr;
- return sCheck.zErrMsg;
- }
- #endif /* SQLITE_OMIT_INTEGRITY_CHECK */
- /*
- ** Return the full pathname of the underlying database file.
- **
- ** The pager filename is invariant as long as the pager is
- ** open so it is safe to access without the BtShared mutex.
- */
- const char *sqlite3BtreeGetFilename(Btree *p){
- assert( p->pBt->pPager!=0 );
- return sqlite3PagerFilename(p->pBt->pPager);
- }
- /*
- ** Return the pathname of the directory that contains the database file.
- **
- ** The pager directory name is invariant as long as the pager is
- ** open so it is safe to access without the BtShared mutex.
- */
- const char *sqlite3BtreeGetDirname(Btree *p){
- assert( p->pBt->pPager!=0 );
- return sqlite3PagerDirname(p->pBt->pPager);
- }
- /*
- ** Return the pathname of the journal file for this database. The return
- ** value of this routine is the same regardless of whether the journal file
- ** has been created or not.
- **
- ** The pager journal filename is invariant as long as the pager is
- ** open so it is safe to access without the BtShared mutex.
- */
- const char *sqlite3BtreeGetJournalname(Btree *p){
- assert( p->pBt->pPager!=0 );
- return sqlite3PagerJournalname(p->pBt->pPager);
- }
- #ifndef SQLITE_OMIT_VACUUM
- /*
- ** Copy the complete content of pBtFrom into pBtTo. A transaction
- ** must be active for both files.
- **
- ** The size of file pTo may be reduced by this operation.
- ** If anything goes wrong, the transaction on pTo is rolled back.
- **
- ** If successful, CommitPhaseOne() may be called on pTo before returning.
- ** The caller should finish committing the transaction on pTo by calling
- ** sqlite3BtreeCommit().
- */
- static int btreeCopyFile(Btree *pTo, Btree *pFrom){
- int rc = SQLITE_OK;
- Pgno i;
- Pgno nFromPage; /* Number of pages in pFrom */
- Pgno nToPage; /* Number of pages in pTo */
- Pgno nNewPage; /* Number of pages in pTo after the copy */
- Pgno iSkip; /* Pending byte page in pTo */
- int nToPageSize; /* Page size of pTo in bytes */
- int nFromPageSize; /* Page size of pFrom in bytes */
- BtShared *pBtTo = pTo->pBt;
- BtShared *pBtFrom = pFrom->pBt;
- pBtTo->db = pTo->db;
- pBtFrom->db = pFrom->db;
- nToPageSize = pBtTo->pageSize;
- nFromPageSize = pBtFrom->pageSize;
- if( pTo->inTrans!=TRANS_WRITE || pFrom->inTrans!=TRANS_WRITE ){
- return SQLITE_ERROR;
- }
- if( pBtTo->pCursor ){
- return SQLITE_BUSY;
- }
- nToPage = sqlite3PagerPagecount(pBtTo->pPager);
- nFromPage = sqlite3PagerPagecount(pBtFrom->pPager);
- iSkip = PENDING_BYTE_PAGE(pBtTo);
- /* Variable nNewPage is the number of pages required to store the
- ** contents of pFrom using the current page-size of pTo.
- */
- nNewPage = ((i64)nFromPage * (i64)nFromPageSize + (i64)nToPageSize - 1) /
- (i64)nToPageSize;
- for(i=1; rc==SQLITE_OK && (i<=nToPage || i<=nNewPage); i++){
- /* Journal the original page.
- **
- ** iSkip is the page number of the locking page (PENDING_BYTE_PAGE)
- ** in database *pTo (before the copy). This page is never written
- ** into the journal file. Unless i==iSkip or the page was not
- ** present in pTo before the copy operation, journal page i from pTo.
- */
- if( i!=iSkip && i<=nToPage ){
- DbPage *pDbPage;
- rc = sqlite3PagerGet(pBtTo->pPager, i, &pDbPage);
- if( rc ){
- break;
- }
- rc = sqlite3PagerWrite(pDbPage);
- if( rc ){
- break;
- }
- if( i>nFromPage ){
- /* Yeah. It seems wierd to call DontWrite() right after Write(). But
- ** that is because the names of those procedures do not exactly
- ** represent what they do. Write() really means "put this page in the
- ** rollback journal and mark it as dirty so that it will be written
- ** to the database file later." DontWrite() undoes the second part of
- ** that and prevents the page from being written to the database. The
- ** page is still on the rollback journal, though. And that is the
- ** whole point of this block: to put pages on the rollback journal.
- */
- sqlite3PagerDontWrite(pDbPage);
- }
- sqlite3PagerUnref(pDbPage);
- }
- /* Overwrite the data in page i of the target database */
- if( rc==SQLITE_OK && i!=iSkip && i<=nNewPage ){
- DbPage *pToPage = 0;
- sqlite3_int64 iOff;
- rc = sqlite3PagerGet(pBtTo->pPager, i, &pToPage);
- if( rc==SQLITE_OK ){
- rc = sqlite3PagerWrite(pToPage);
- }
- for(
- iOff=(i-1)*nToPageSize;
- rc==SQLITE_OK && iOff<i*nToPageSize;
- iOff += nFromPageSize
- ){
- DbPage *pFromPage = 0;
- Pgno iFrom = (iOff/nFromPageSize)+1;
- if( iFrom==PENDING_BYTE_PAGE(pBtFrom) ){
- continue;
- }
- rc = sqlite3PagerGet(pBtFrom->pPager, iFrom, &pFromPage);
- if( rc==SQLITE_OK ){
- char *zTo = sqlite3PagerGetData(pToPage);
- char *zFrom = sqlite3PagerGetData(pFromPage);
- int nCopy;
- if( nFromPageSize>=nToPageSize ){
- zFrom += ((i-1)*nToPageSize - ((iFrom-1)*nFromPageSize));
- nCopy = nToPageSize;
- }else{
- zTo += (((iFrom-1)*nFromPageSize) - (i-1)*nToPageSize);
- nCopy = nFromPageSize;
- }
- memcpy(zTo, zFrom, nCopy);
- sqlite3PagerUnref(pFromPage);
- }
- }
- if( pToPage ) sqlite3PagerUnref(pToPage);
- }
- }
- /* If things have worked so far, the database file may need to be
- ** truncated. The complex part is that it may need to be truncated to
- ** a size that is not an integer multiple of nToPageSize - the current
- ** page size used by the pager associated with B-Tree pTo.
- **
- ** For example, say the page-size of pTo is 2048 bytes and the original
- ** number of pages is 5 (10 KB file). If pFrom has a page size of 1024
- ** bytes and 9 pages, then the file needs to be truncated to 9KB.
- */
- if( rc==SQLITE_OK ){
- if( nFromPageSize!=nToPageSize ){
- sqlite3_file *pFile = sqlite3PagerFile(pBtTo->pPager);
- i64 iSize = (i64)nFromPageSize * (i64)nFromPage;
- i64 iNow = (i64)((nToPage>nNewPage)?nToPage:nNewPage) * (i64)nToPageSize;
- i64 iPending = ((i64)PENDING_BYTE_PAGE(pBtTo)-1) *(i64)nToPageSize;
-
- assert( iSize<=iNow );
-
- /* Commit phase one syncs the journal file associated with pTo
- ** containing the original data. It does not sync the database file
- ** itself. After doing this it is safe to use OsTruncate() and other
- ** file APIs on the database file directly.
- */
- pBtTo->db = pTo->db;
- rc = sqlite3PagerCommitPhaseOne(pBtTo->pPager, 0, 0, 1);
- if( iSize<iNow && rc==SQLITE_OK ){
- rc = sqlite3OsTruncate(pFile, iSize);
- }
-
- /* The loop that copied data from database pFrom to pTo did not
- ** populate the locking page of database pTo. If the page-size of
- ** pFrom is smaller than that of pTo, this means some data will
- ** not have been copied.
- **
- ** This block copies the missing data from database pFrom to pTo
- ** using file APIs. This is safe because at this point we know that
- ** all of the original data from pTo has been synced into the
- ** journal file. At this point it would be safe to do anything at
- ** all to the database file except truncate it to zero bytes.
- */
- if( rc==SQLITE_OK && nFromPageSize<nToPageSize && iSize>iPending){
- i64 iOff;
- for(
- iOff=iPending;
- rc==SQLITE_OK && iOff<(iPending+nToPageSize);
- iOff += nFromPageSize
- ){
- DbPage *pFromPage = 0;
- Pgno iFrom = (iOff/nFromPageSize)+1;
-
- if( iFrom==PENDING_BYTE_PAGE(pBtFrom) || iFrom>nFromPage ){
- continue;
- }
-
- rc = sqlite3PagerGet(pBtFrom->pPager, iFrom, &pFromPage);
- if( rc==SQLITE_OK ){
- char *zFrom = sqlite3PagerGetData(pFromPage);
- rc = sqlite3OsWrite(pFile, zFrom, nFromPageSize, iOff);
- sqlite3PagerUnref(pFromPage);
- }
- }
- }
-
- /* Sync the database file */
- if( rc==SQLITE_OK ){
- rc = sqlite3PagerSync(pBtTo->pPager);
- }
- }else{
- rc = sqlite3PagerTruncate(pBtTo->pPager, nNewPage);
- }
- if( rc==SQLITE_OK ){
- pBtTo->pageSizeFixed = 0;
- }
- }
- if( rc ){
- sqlite3BtreeRollback(pTo);
- }
- return rc;
- }
- int sqlite3BtreeCopyFile(Btree *pTo, Btree *pFrom){
- int rc;
- sqlite3BtreeEnter(pTo);
- sqlite3BtreeEnter(pFrom);
- rc = btreeCopyFile(pTo, pFrom);
- sqlite3BtreeLeave(pFrom);
- sqlite3BtreeLeave(pTo);
- return rc;
- }
- #endif /* SQLITE_OMIT_VACUUM */
- /*
- ** Return non-zero if a transaction is active.
- */
- int sqlite3BtreeIsInTrans(Btree *p){
- assert( p==0 || sqlite3_mutex_held(p->db->mutex) );
- return (p && (p->inTrans==TRANS_WRITE));
- }
- /*
- ** Return non-zero if a statement transaction is active.
- */
- int sqlite3BtreeIsInStmt(Btree *p){
- assert( sqlite3BtreeHoldsMutex(p) );
- return (p->pBt && p->pBt->inStmt);
- }
- /*
- ** Return non-zero if a read (or write) transaction is active.
- */
- int sqlite3BtreeIsInReadTrans(Btree *p){
- assert( sqlite3_mutex_held(p->db->mutex) );
- return (p && (p->inTrans!=TRANS_NONE));
- }
- /*
- ** This function returns a pointer to a blob of memory associated with
- ** a single shared-btree. The memory is used by client code for its own
- ** purposes (for example, to store a high-level schema associated with
- ** the shared-btree). The btree layer manages reference counting issues.
- **
- ** The first time this is called on a shared-btree, nBytes bytes of memory
- ** are allocated, zeroed, and returned to the caller. For each subsequent
- ** call the nBytes parameter is ignored and a pointer to the same blob
- ** of memory returned.
- **
- ** Just before the shared-btree is closed, the function passed as the
- ** xFree argument when the memory allocation was made is invoked on the
- ** blob of allocated memory. This function should not call sqlite3_free()
- ** on the memory, the btree layer does that.
- */
- void *sqlite3BtreeSchema(Btree *p, int nBytes, void(*xFree)(void *)){
- BtShared *pBt = p->pBt;
- sqlite3BtreeEnter(p);
- if( !pBt->pSchema ){
- pBt->pSchema = sqlite3MallocZero(nBytes);
- pBt->xFreeSchema = xFree;
- }
- sqlite3BtreeLeave(p);
- return pBt->pSchema;
- }
- /*
- ** Return true if another user of the same shared btree as the argument
- ** handle holds an exclusive lock on the sqlite_master table.
- */
- int sqlite3BtreeSchemaLocked(Btree *p){
- int rc;
- assert( sqlite3_mutex_held(p->db->mutex) );
- sqlite3BtreeEnter(p);
- rc = (queryTableLock(p, MASTER_ROOT, READ_LOCK)!=SQLITE_OK);
- sqlite3BtreeLeave(p);
- return rc;
- }
- #ifndef SQLITE_OMIT_SHARED_CACHE
- /*
- ** Obtain a lock on the table whose root page is iTab. The
- ** lock is a write lock if isWritelock is true or a read lock
- ** if it is false.
- */
- int sqlite3BtreeLockTable(Btree *p, int iTab, u8 isWriteLock){
- int rc = SQLITE_OK;
- if( p->sharable ){
- u8 lockType = READ_LOCK + isWriteLock;
- assert( READ_LOCK+1==WRITE_LOCK );
- assert( isWriteLock==0 || isWriteLock==1 );
- sqlite3BtreeEnter(p);
- rc = queryTableLock(p, iTab, lockType);
- if( rc==SQLITE_OK ){
- rc = lockTable(p, iTab, lockType);
- }
- sqlite3BtreeLeave(p);
- }
- return rc;
- }
- #endif
- #ifndef SQLITE_OMIT_INCRBLOB
- /*
- ** Argument pCsr must be a cursor opened for writing on an
- ** INTKEY table currently pointing at a valid table entry.
- ** This function modifies the data stored as part of that entry.
- ** Only the data content may only be modified, it is not possible
- ** to change the length of the data stored.
- */
- int sqlite3BtreePutData(BtCursor *pCsr, u32 offset, u32 amt, void *z){
- assert( cursorHoldsMutex(pCsr) );
- assert( sqlite3_mutex_held(pCsr->pBtree->db->mutex) );
- assert(pCsr->isIncrblobHandle);
- if( pCsr->eState>=CURSOR_REQUIRESEEK ){
- if( pCsr->eState==CURSOR_FAULT ){
- return pCsr->skip;
- }else{
- return SQLITE_ABORT;
- }
- }
- /* Check some preconditions:
- ** (a) the cursor is open for writing,
- ** (b) there is no read-lock on the table being modified and
- ** (c) the cursor points at a valid row of an intKey table.
- */
- if( !pCsr->wrFlag ){
- return SQLITE_READONLY;
- }
- assert( !pCsr->pBt->readOnly
- && pCsr->pBt->inTransaction==TRANS_WRITE );
- if( checkReadLocks(pCsr->pBtree, pCsr->pgnoRoot, pCsr) ){
- return SQLITE_LOCKED; /* The table pCur points to has a read lock */
- }
- if( pCsr->eState==CURSOR_INVALID || !pCsr->pPage->intKey ){
- return SQLITE_ERROR;
- }
- return accessPayload(pCsr, offset, amt, (unsigned char *)z, 0, 1);
- }
- /*
- ** Set a flag on this cursor to cache the locations of pages from the
- ** overflow list for the current row. This is used by cursors opened
- ** for incremental blob IO only.
- **
- ** This function sets a flag only. The actual page location cache
- ** (stored in BtCursor.aOverflow[]) is allocated and used by function
- ** accessPayload() (the worker function for sqlite3BtreeData() and
- ** sqlite3BtreePutData()).
- */
- void sqlite3BtreeCacheOverflow(BtCursor *pCur){
- assert( cursorHoldsMutex(pCur) );
- assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) );
- assert(!pCur->isIncrblobHandle);
- assert(!pCur->aOverflow);
- pCur->isIncrblobHandle = 1;
- }
- #endif