btree.c
上传用户:sunhongbo
上传日期:2022-01-25
资源大小:3010k
文件大小:220k
- /*
- ** 2004 April 6
- **
- ** The author disclaims copyright to this source code. In place of
- ** a legal notice, here is a blessing:
- **
- ** May you do good and not evil.
- ** May you find forgiveness for yourself and forgive others.
- ** May you share freely, never taking more than you give.
- **
- *************************************************************************
- ** $Id: btree.c,v 1.451 2008/04/03 21:46:57 drh Exp $
- **
- ** This file implements a external (disk-based) database using BTrees.
- ** See the header comment on "btreeInt.h" for additional information.
- ** Including a description of file format and an overview of operation.
- */
- #include "btreeInt.h"
- /*
- ** The header string that appears at the beginning of every
- ** SQLite database.
- */
- static const char zMagicHeader[] = SQLITE_FILE_HEADER;
- /*
- ** Set this global variable to 1 to enable tracing using the TRACE
- ** macro.
- */
- #if SQLITE_TEST
- int sqlite3BtreeTrace=0; /* True to enable tracing */
- #endif
- #ifndef SQLITE_OMIT_SHARED_CACHE
- /*
- ** A flag to indicate whether or not shared cache is enabled. Also,
- ** a list of BtShared objects that are eligible for participation
- ** in shared cache. The variables have file scope during normal builds,
- ** but the test harness needs to access these variables so we make them
- ** global for test builds.
- */
- #ifdef SQLITE_TEST
- BtShared *sqlite3SharedCacheList = 0;
- int sqlite3SharedCacheEnabled = 0;
- #else
- static BtShared *sqlite3SharedCacheList = 0;
- static int sqlite3SharedCacheEnabled = 0;
- #endif
- #endif /* SQLITE_OMIT_SHARED_CACHE */
- #ifndef SQLITE_OMIT_SHARED_CACHE
- /*
- ** Enable or disable the shared pager and schema features.
- **
- ** This routine has no effect on existing database connections.
- ** The shared cache setting effects only future calls to
- ** sqlite3_open(), sqlite3_open16(), or sqlite3_open_v2().
- */
- int sqlite3_enable_shared_cache(int enable){
- sqlite3SharedCacheEnabled = enable;
- return SQLITE_OK;
- }
- #endif
- /*
- ** Forward declaration
- */
- static int checkReadLocks(Btree*,Pgno,BtCursor*);
- #ifdef SQLITE_OMIT_SHARED_CACHE
- /*
- ** The functions queryTableLock(), lockTable() and unlockAllTables()
- ** manipulate entries in the BtShared.pLock linked list used to store
- ** shared-cache table level locks. If the library is compiled with the
- ** shared-cache feature disabled, then there is only ever one user
- ** of each BtShared structure and so this locking is not necessary.
- ** So define the lock related functions as no-ops.
- */
- #define queryTableLock(a,b,c) SQLITE_OK
- #define lockTable(a,b,c) SQLITE_OK
- #define unlockAllTables(a)
- #endif
- #ifndef SQLITE_OMIT_SHARED_CACHE
- /*
- ** Query to see if btree handle p may obtain a lock of type eLock
- ** (READ_LOCK or WRITE_LOCK) on the table with root-page iTab. Return
- ** SQLITE_OK if the lock may be obtained (by calling lockTable()), or
- ** SQLITE_LOCKED if not.
- */
- static int queryTableLock(Btree *p, Pgno iTab, u8 eLock){
- BtShared *pBt = p->pBt;
- BtLock *pIter;
- assert( sqlite3BtreeHoldsMutex(p) );
-
- /* This is a no-op if the shared-cache is not enabled */
- if( !p->sharable ){
- return SQLITE_OK;
- }
- /* If some other connection is holding an exclusive lock, the
- ** requested lock may not be obtained.
- */
- if( pBt->pExclusive && pBt->pExclusive!=p ){
- return SQLITE_LOCKED;
- }
- /* This (along with lockTable()) is where the ReadUncommitted flag is
- ** dealt with. If the caller is querying for a read-lock and the flag is
- ** set, it is unconditionally granted - even if there are write-locks
- ** on the table. If a write-lock is requested, the ReadUncommitted flag
- ** is not considered.
- **
- ** In function lockTable(), if a read-lock is demanded and the
- ** ReadUncommitted flag is set, no entry is added to the locks list
- ** (BtShared.pLock).
- **
- ** To summarize: If the ReadUncommitted flag is set, then read cursors do
- ** not create or respect table locks. The locking procedure for a
- ** write-cursor does not change.
- */
- if(
- !p->db ||
- 0==(p->db->flags&SQLITE_ReadUncommitted) ||
- eLock==WRITE_LOCK ||
- iTab==MASTER_ROOT
- ){
- for(pIter=pBt->pLock; pIter; pIter=pIter->pNext){
- if( pIter->pBtree!=p && pIter->iTable==iTab &&
- (pIter->eLock!=eLock || eLock!=READ_LOCK) ){
- return SQLITE_LOCKED;
- }
- }
- }
- return SQLITE_OK;
- }
- #endif /* !SQLITE_OMIT_SHARED_CACHE */
- #ifndef SQLITE_OMIT_SHARED_CACHE
- /*
- ** Add a lock on the table with root-page iTable to the shared-btree used
- ** by Btree handle p. Parameter eLock must be either READ_LOCK or
- ** WRITE_LOCK.
- **
- ** SQLITE_OK is returned if the lock is added successfully. SQLITE_BUSY and
- ** SQLITE_NOMEM may also be returned.
- */
- static int lockTable(Btree *p, Pgno iTable, u8 eLock){
- BtShared *pBt = p->pBt;
- BtLock *pLock = 0;
- BtLock *pIter;
- assert( sqlite3BtreeHoldsMutex(p) );
- /* This is a no-op if the shared-cache is not enabled */
- if( !p->sharable ){
- return SQLITE_OK;
- }
- assert( SQLITE_OK==queryTableLock(p, iTable, eLock) );
- /* If the read-uncommitted flag is set and a read-lock is requested,
- ** return early without adding an entry to the BtShared.pLock list. See
- ** comment in function queryTableLock() for more info on handling
- ** the ReadUncommitted flag.
- */
- if(
- (p->db) &&
- (p->db->flags&SQLITE_ReadUncommitted) &&
- (eLock==READ_LOCK) &&
- iTable!=MASTER_ROOT
- ){
- return SQLITE_OK;
- }
- /* First search the list for an existing lock on this table. */
- for(pIter=pBt->pLock; pIter; pIter=pIter->pNext){
- if( pIter->iTable==iTable && pIter->pBtree==p ){
- pLock = pIter;
- break;
- }
- }
- /* If the above search did not find a BtLock struct associating Btree p
- ** with table iTable, allocate one and link it into the list.
- */
- if( !pLock ){
- pLock = (BtLock *)sqlite3MallocZero(sizeof(BtLock));
- if( !pLock ){
- return SQLITE_NOMEM;
- }
- pLock->iTable = iTable;
- pLock->pBtree = p;
- pLock->pNext = pBt->pLock;
- pBt->pLock = pLock;
- }
- /* Set the BtLock.eLock variable to the maximum of the current lock
- ** and the requested lock. This means if a write-lock was already held
- ** and a read-lock requested, we don't incorrectly downgrade the lock.
- */
- assert( WRITE_LOCK>READ_LOCK );
- if( eLock>pLock->eLock ){
- pLock->eLock = eLock;
- }
- return SQLITE_OK;
- }
- #endif /* !SQLITE_OMIT_SHARED_CACHE */
- #ifndef SQLITE_OMIT_SHARED_CACHE
- /*
- ** Release all the table locks (locks obtained via calls to the lockTable()
- ** procedure) held by Btree handle p.
- */
- static void unlockAllTables(Btree *p){
- BtShared *pBt = p->pBt;
- BtLock **ppIter = &pBt->pLock;
- assert( sqlite3BtreeHoldsMutex(p) );
- assert( p->sharable || 0==*ppIter );
- while( *ppIter ){
- BtLock *pLock = *ppIter;
- assert( pBt->pExclusive==0 || pBt->pExclusive==pLock->pBtree );
- if( pLock->pBtree==p ){
- *ppIter = pLock->pNext;
- sqlite3_free(pLock);
- }else{
- ppIter = &pLock->pNext;
- }
- }
- if( pBt->pExclusive==p ){
- pBt->pExclusive = 0;
- }
- }
- #endif /* SQLITE_OMIT_SHARED_CACHE */
- static void releasePage(MemPage *pPage); /* Forward reference */
- /*
- ** Verify that the cursor holds a mutex on the BtShared
- */
- #ifndef NDEBUG
- static int cursorHoldsMutex(BtCursor *p){
- return sqlite3_mutex_held(p->pBt->mutex);
- }
- #endif
- #ifndef SQLITE_OMIT_INCRBLOB
- /*
- ** Invalidate the overflow page-list cache for cursor pCur, if any.
- */
- static void invalidateOverflowCache(BtCursor *pCur){
- assert( cursorHoldsMutex(pCur) );
- sqlite3_free(pCur->aOverflow);
- pCur->aOverflow = 0;
- }
- /*
- ** Invalidate the overflow page-list cache for all cursors opened
- ** on the shared btree structure pBt.
- */
- static void invalidateAllOverflowCache(BtShared *pBt){
- BtCursor *p;
- assert( sqlite3_mutex_held(pBt->mutex) );
- for(p=pBt->pCursor; p; p=p->pNext){
- invalidateOverflowCache(p);
- }
- }
- #else
- #define invalidateOverflowCache(x)
- #define invalidateAllOverflowCache(x)
- #endif
- /*
- ** Save the current cursor position in the variables BtCursor.nKey
- ** and BtCursor.pKey. The cursor's state is set to CURSOR_REQUIRESEEK.
- */
- static int saveCursorPosition(BtCursor *pCur){
- int rc;
- assert( CURSOR_VALID==pCur->eState );
- assert( 0==pCur->pKey );
- assert( cursorHoldsMutex(pCur) );
- rc = sqlite3BtreeKeySize(pCur, &pCur->nKey);
- /* If this is an intKey table, then the above call to BtreeKeySize()
- ** stores the integer key in pCur->nKey. In this case this value is
- ** all that is required. Otherwise, if pCur is not open on an intKey
- ** table, then malloc space for and store the pCur->nKey bytes of key
- ** data.
- */
- if( rc==SQLITE_OK && 0==pCur->pPage->intKey){
- void *pKey = sqlite3_malloc(pCur->nKey);
- if( pKey ){
- rc = sqlite3BtreeKey(pCur, 0, pCur->nKey, pKey);
- if( rc==SQLITE_OK ){
- pCur->pKey = pKey;
- }else{
- sqlite3_free(pKey);
- }
- }else{
- rc = SQLITE_NOMEM;
- }
- }
- assert( !pCur->pPage->intKey || !pCur->pKey );
- if( rc==SQLITE_OK ){
- releasePage(pCur->pPage);
- pCur->pPage = 0;
- pCur->eState = CURSOR_REQUIRESEEK;
- }
- invalidateOverflowCache(pCur);
- return rc;
- }
- /*
- ** Save the positions of all cursors except pExcept open on the table
- ** with root-page iRoot. Usually, this is called just before cursor
- ** pExcept is used to modify the table (BtreeDelete() or BtreeInsert()).
- */
- static int saveAllCursors(BtShared *pBt, Pgno iRoot, BtCursor *pExcept){
- BtCursor *p;
- assert( sqlite3_mutex_held(pBt->mutex) );
- assert( pExcept==0 || pExcept->pBt==pBt );
- for(p=pBt->pCursor; p; p=p->pNext){
- if( p!=pExcept && (0==iRoot || p->pgnoRoot==iRoot) &&
- p->eState==CURSOR_VALID ){
- int rc = saveCursorPosition(p);
- if( SQLITE_OK!=rc ){
- return rc;
- }
- }
- }
- return SQLITE_OK;
- }
- /*
- ** Clear the current cursor position.
- */
- static void clearCursorPosition(BtCursor *pCur){
- assert( cursorHoldsMutex(pCur) );
- sqlite3_free(pCur->pKey);
- pCur->pKey = 0;
- pCur->eState = CURSOR_INVALID;
- }
- /*
- ** Restore the cursor to the position it was in (or as close to as possible)
- ** when saveCursorPosition() was called. Note that this call deletes the
- ** saved position info stored by saveCursorPosition(), so there can be
- ** at most one effective restoreOrClearCursorPosition() call after each
- ** saveCursorPosition().
- **
- ** If the second argument argument - doSeek - is false, then instead of
- ** returning the cursor to its saved position, any saved position is deleted
- ** and the cursor state set to CURSOR_INVALID.
- */
- int sqlite3BtreeRestoreOrClearCursorPosition(BtCursor *pCur){
- int rc;
- assert( cursorHoldsMutex(pCur) );
- assert( pCur->eState>=CURSOR_REQUIRESEEK );
- if( pCur->eState==CURSOR_FAULT ){
- return pCur->skip;
- }
- #ifndef SQLITE_OMIT_INCRBLOB
- if( pCur->isIncrblobHandle ){
- return SQLITE_ABORT;
- }
- #endif
- pCur->eState = CURSOR_INVALID;
- rc = sqlite3BtreeMoveto(pCur, pCur->pKey, 0, pCur->nKey, 0, &pCur->skip);
- if( rc==SQLITE_OK ){
- sqlite3_free(pCur->pKey);
- pCur->pKey = 0;
- assert( pCur->eState==CURSOR_VALID || pCur->eState==CURSOR_INVALID );
- }
- return rc;
- }
- #define restoreOrClearCursorPosition(p)
- (p->eState>=CURSOR_REQUIRESEEK ?
- sqlite3BtreeRestoreOrClearCursorPosition(p) :
- SQLITE_OK)
- #ifndef SQLITE_OMIT_AUTOVACUUM
- /*
- ** Given a page number of a regular database page, return the page
- ** number for the pointer-map page that contains the entry for the
- ** input page number.
- */
- static Pgno ptrmapPageno(BtShared *pBt, Pgno pgno){
- int nPagesPerMapPage, iPtrMap, ret;
- assert( sqlite3_mutex_held(pBt->mutex) );
- nPagesPerMapPage = (pBt->usableSize/5)+1;
- iPtrMap = (pgno-2)/nPagesPerMapPage;
- ret = (iPtrMap*nPagesPerMapPage) + 2;
- if( ret==PENDING_BYTE_PAGE(pBt) ){
- ret++;
- }
- return ret;
- }
- /*
- ** Write an entry into the pointer map.
- **
- ** This routine updates the pointer map entry for page number 'key'
- ** so that it maps to type 'eType' and parent page number 'pgno'.
- ** An error code is returned if something goes wrong, otherwise SQLITE_OK.
- */
- static int ptrmapPut(BtShared *pBt, Pgno key, u8 eType, Pgno parent){
- DbPage *pDbPage; /* The pointer map page */
- u8 *pPtrmap; /* The pointer map data */
- Pgno iPtrmap; /* The pointer map page number */
- int offset; /* Offset in pointer map page */
- int rc;
- assert( sqlite3_mutex_held(pBt->mutex) );
- /* The master-journal page number must never be used as a pointer map page */
- assert( 0==PTRMAP_ISPAGE(pBt, PENDING_BYTE_PAGE(pBt)) );
- assert( pBt->autoVacuum );
- if( key==0 ){
- return SQLITE_CORRUPT_BKPT;
- }
- iPtrmap = PTRMAP_PAGENO(pBt, key);
- rc = sqlite3PagerGet(pBt->pPager, iPtrmap, &pDbPage);
- if( rc!=SQLITE_OK ){
- return rc;
- }
- offset = PTRMAP_PTROFFSET(pBt, key);
- pPtrmap = (u8 *)sqlite3PagerGetData(pDbPage);
- if( eType!=pPtrmap[offset] || get4byte(&pPtrmap[offset+1])!=parent ){
- TRACE(("PTRMAP_UPDATE: %d->(%d,%d)n", key, eType, parent));
- rc = sqlite3PagerWrite(pDbPage);
- if( rc==SQLITE_OK ){
- pPtrmap[offset] = eType;
- put4byte(&pPtrmap[offset+1], parent);
- }
- }
- sqlite3PagerUnref(pDbPage);
- return rc;
- }
- /*
- ** Read an entry from the pointer map.
- **
- ** This routine retrieves the pointer map entry for page 'key', writing
- ** the type and parent page number to *pEType and *pPgno respectively.
- ** An error code is returned if something goes wrong, otherwise SQLITE_OK.
- */
- static int ptrmapGet(BtShared *pBt, Pgno key, u8 *pEType, Pgno *pPgno){
- DbPage *pDbPage; /* The pointer map page */
- int iPtrmap; /* Pointer map page index */
- u8 *pPtrmap; /* Pointer map page data */
- int offset; /* Offset of entry in pointer map */
- int rc;
- assert( sqlite3_mutex_held(pBt->mutex) );
- iPtrmap = PTRMAP_PAGENO(pBt, key);
- rc = sqlite3PagerGet(pBt->pPager, iPtrmap, &pDbPage);
- if( rc!=0 ){
- return rc;
- }
- pPtrmap = (u8 *)sqlite3PagerGetData(pDbPage);
- offset = PTRMAP_PTROFFSET(pBt, key);
- assert( pEType!=0 );
- *pEType = pPtrmap[offset];
- if( pPgno ) *pPgno = get4byte(&pPtrmap[offset+1]);
- sqlite3PagerUnref(pDbPage);
- if( *pEType<1 || *pEType>5 ) return SQLITE_CORRUPT_BKPT;
- return SQLITE_OK;
- }
- #endif /* SQLITE_OMIT_AUTOVACUUM */
- /*
- ** Given a btree page and a cell index (0 means the first cell on
- ** the page, 1 means the second cell, and so forth) return a pointer
- ** to the cell content.
- **
- ** This routine works only for pages that do not contain overflow cells.
- */
- #define findCell(pPage, iCell)
- ((pPage)->aData + get2byte(&(pPage)->aData[(pPage)->cellOffset+2*(iCell)]))
- #ifdef SQLITE_TEST
- u8 *sqlite3BtreeFindCell(MemPage *pPage, int iCell){
- assert( iCell>=0 );
- assert( iCell<get2byte(&pPage->aData[pPage->hdrOffset+3]) );
- return findCell(pPage, iCell);
- }
- #endif
- /*
- ** This a more complex version of sqlite3BtreeFindCell() that works for
- ** pages that do contain overflow cells. See insert
- */
- static u8 *findOverflowCell(MemPage *pPage, int iCell){
- int i;
- assert( sqlite3_mutex_held(pPage->pBt->mutex) );
- for(i=pPage->nOverflow-1; i>=0; i--){
- int k;
- struct _OvflCell *pOvfl;
- pOvfl = &pPage->aOvfl[i];
- k = pOvfl->idx;
- if( k<=iCell ){
- if( k==iCell ){
- return pOvfl->pCell;
- }
- iCell--;
- }
- }
- return findCell(pPage, iCell);
- }
- /*
- ** Parse a cell content block and fill in the CellInfo structure. There
- ** are two versions of this function. sqlite3BtreeParseCell() takes a
- ** cell index as the second argument and sqlite3BtreeParseCellPtr()
- ** takes a pointer to the body of the cell as its second argument.
- **
- ** Within this file, the parseCell() macro can be called instead of
- ** sqlite3BtreeParseCellPtr(). Using some compilers, this will be faster.
- */
- void sqlite3BtreeParseCellPtr(
- MemPage *pPage, /* Page containing the cell */
- u8 *pCell, /* Pointer to the cell text. */
- CellInfo *pInfo /* Fill in this structure */
- ){
- int n; /* Number bytes in cell content header */
- u32 nPayload; /* Number of bytes of cell payload */
- assert( sqlite3_mutex_held(pPage->pBt->mutex) );
- pInfo->pCell = pCell;
- assert( pPage->leaf==0 || pPage->leaf==1 );
- n = pPage->childPtrSize;
- assert( n==4-4*pPage->leaf );
- if( pPage->hasData ){
- n += getVarint32(&pCell[n], &nPayload);
- }else{
- nPayload = 0;
- }
- pInfo->nData = nPayload;
- if( pPage->intKey ){
- n += getVarint(&pCell[n], (u64 *)&pInfo->nKey);
- }else{
- u32 x;
- n += getVarint32(&pCell[n], &x);
- pInfo->nKey = x;
- nPayload += x;
- }
- pInfo->nPayload = nPayload;
- pInfo->nHeader = n;
- if( nPayload<=pPage->maxLocal ){
- /* This is the (easy) common case where the entire payload fits
- ** on the local page. No overflow is required.
- */
- int nSize; /* Total size of cell content in bytes */
- pInfo->nLocal = nPayload;
- pInfo->iOverflow = 0;
- nSize = nPayload + n;
- if( nSize<4 ){
- nSize = 4; /* Minimum cell size is 4 */
- }
- pInfo->nSize = nSize;
- }else{
- /* If the payload will not fit completely on the local page, we have
- ** to decide how much to store locally and how much to spill onto
- ** overflow pages. The strategy is to minimize the amount of unused
- ** space on overflow pages while keeping the amount of local storage
- ** in between minLocal and maxLocal.
- **
- ** Warning: changing the way overflow payload is distributed in any
- ** way will result in an incompatible file format.
- */
- int minLocal; /* Minimum amount of payload held locally */
- int maxLocal; /* Maximum amount of payload held locally */
- int surplus; /* Overflow payload available for local storage */
- minLocal = pPage->minLocal;
- maxLocal = pPage->maxLocal;
- surplus = minLocal + (nPayload - minLocal)%(pPage->pBt->usableSize - 4);
- if( surplus <= maxLocal ){
- pInfo->nLocal = surplus;
- }else{
- pInfo->nLocal = minLocal;
- }
- pInfo->iOverflow = pInfo->nLocal + n;
- pInfo->nSize = pInfo->iOverflow + 4;
- }
- }
- #define parseCell(pPage, iCell, pInfo)
- sqlite3BtreeParseCellPtr((pPage), findCell((pPage), (iCell)), (pInfo))
- void sqlite3BtreeParseCell(
- MemPage *pPage, /* Page containing the cell */
- int iCell, /* The cell index. First cell is 0 */
- CellInfo *pInfo /* Fill in this structure */
- ){
- parseCell(pPage, iCell, pInfo);
- }
- /*
- ** Compute the total number of bytes that a Cell needs in the cell
- ** data area of the btree-page. The return number includes the cell
- ** data header and the local payload, but not any overflow page or
- ** the space used by the cell pointer.
- */
- #ifndef NDEBUG
- static u16 cellSize(MemPage *pPage, int iCell){
- CellInfo info;
- sqlite3BtreeParseCell(pPage, iCell, &info);
- return info.nSize;
- }
- #endif
- static u16 cellSizePtr(MemPage *pPage, u8 *pCell){
- CellInfo info;
- sqlite3BtreeParseCellPtr(pPage, pCell, &info);
- return info.nSize;
- }
- #ifndef SQLITE_OMIT_AUTOVACUUM
- /*
- ** If the cell pCell, part of page pPage contains a pointer
- ** to an overflow page, insert an entry into the pointer-map
- ** for the overflow page.
- */
- static int ptrmapPutOvflPtr(MemPage *pPage, u8 *pCell){
- if( pCell ){
- CellInfo info;
- sqlite3BtreeParseCellPtr(pPage, pCell, &info);
- assert( (info.nData+(pPage->intKey?0:info.nKey))==info.nPayload );
- if( (info.nData+(pPage->intKey?0:info.nKey))>info.nLocal ){
- Pgno ovfl = get4byte(&pCell[info.iOverflow]);
- return ptrmapPut(pPage->pBt, ovfl, PTRMAP_OVERFLOW1, pPage->pgno);
- }
- }
- return SQLITE_OK;
- }
- /*
- ** If the cell with index iCell on page pPage contains a pointer
- ** to an overflow page, insert an entry into the pointer-map
- ** for the overflow page.
- */
- static int ptrmapPutOvfl(MemPage *pPage, int iCell){
- u8 *pCell;
- assert( sqlite3_mutex_held(pPage->pBt->mutex) );
- pCell = findOverflowCell(pPage, iCell);
- return ptrmapPutOvflPtr(pPage, pCell);
- }
- #endif
- /*
- ** Defragment the page given. All Cells are moved to the
- ** end of the page and all free space is collected into one
- ** big FreeBlk that occurs in between the header and cell
- ** pointer array and the cell content area.
- */
- static int defragmentPage(MemPage *pPage){
- int i; /* Loop counter */
- int pc; /* Address of a i-th cell */
- int addr; /* Offset of first byte after cell pointer array */
- int hdr; /* Offset to the page header */
- int size; /* Size of a cell */
- int usableSize; /* Number of usable bytes on a page */
- int cellOffset; /* Offset to the cell pointer array */
- int brk; /* Offset to the cell content area */
- int nCell; /* Number of cells on the page */
- unsigned char *data; /* The page data */
- unsigned char *temp; /* Temp area for cell content */
- assert( sqlite3PagerIswriteable(pPage->pDbPage) );
- assert( pPage->pBt!=0 );
- assert( pPage->pBt->usableSize <= SQLITE_MAX_PAGE_SIZE );
- assert( pPage->nOverflow==0 );
- assert( sqlite3_mutex_held(pPage->pBt->mutex) );
- temp = sqlite3PagerTempSpace(pPage->pBt->pPager);
- data = pPage->aData;
- hdr = pPage->hdrOffset;
- cellOffset = pPage->cellOffset;
- nCell = pPage->nCell;
- assert( nCell==get2byte(&data[hdr+3]) );
- usableSize = pPage->pBt->usableSize;
- brk = get2byte(&data[hdr+5]);
- memcpy(&temp[brk], &data[brk], usableSize - brk);
- brk = usableSize;
- for(i=0; i<nCell; i++){
- u8 *pAddr; /* The i-th cell pointer */
- pAddr = &data[cellOffset + i*2];
- pc = get2byte(pAddr);
- assert( pc<pPage->pBt->usableSize );
- size = cellSizePtr(pPage, &temp[pc]);
- brk -= size;
- memcpy(&data[brk], &temp[pc], size);
- put2byte(pAddr, brk);
- }
- assert( brk>=cellOffset+2*nCell );
- put2byte(&data[hdr+5], brk);
- data[hdr+1] = 0;
- data[hdr+2] = 0;
- data[hdr+7] = 0;
- addr = cellOffset+2*nCell;
- memset(&data[addr], 0, brk-addr);
- return SQLITE_OK;
- }
- /*
- ** Allocate nByte bytes of space on a page.
- **
- ** Return the index into pPage->aData[] of the first byte of
- ** the new allocation. Or return 0 if there is not enough free
- ** space on the page to satisfy the allocation request.
- **
- ** If the page contains nBytes of free space but does not contain
- ** nBytes of contiguous free space, then this routine automatically
- ** calls defragementPage() to consolidate all free space before
- ** allocating the new chunk.
- */
- static int allocateSpace(MemPage *pPage, int nByte){
- int addr, pc, hdr;
- int size;
- int nFrag;
- int top;
- int nCell;
- int cellOffset;
- unsigned char *data;
-
- data = pPage->aData;
- assert( sqlite3PagerIswriteable(pPage->pDbPage) );
- assert( pPage->pBt );
- assert( sqlite3_mutex_held(pPage->pBt->mutex) );
- if( nByte<4 ) nByte = 4;
- if( pPage->nFree<nByte || pPage->nOverflow>0 ) return 0;
- pPage->nFree -= nByte;
- hdr = pPage->hdrOffset;
- nFrag = data[hdr+7];
- if( nFrag<60 ){
- /* Search the freelist looking for a slot big enough to satisfy the
- ** space request. */
- addr = hdr+1;
- while( (pc = get2byte(&data[addr]))>0 ){
- size = get2byte(&data[pc+2]);
- if( size>=nByte ){
- if( size<nByte+4 ){
- memcpy(&data[addr], &data[pc], 2);
- data[hdr+7] = nFrag + size - nByte;
- return pc;
- }else{
- put2byte(&data[pc+2], size-nByte);
- return pc + size - nByte;
- }
- }
- addr = pc;
- }
- }
- /* Allocate memory from the gap in between the cell pointer array
- ** and the cell content area.
- */
- top = get2byte(&data[hdr+5]);
- nCell = get2byte(&data[hdr+3]);
- cellOffset = pPage->cellOffset;
- if( nFrag>=60 || cellOffset + 2*nCell > top - nByte ){
- if( defragmentPage(pPage) ) return 0;
- top = get2byte(&data[hdr+5]);
- }
- top -= nByte;
- assert( cellOffset + 2*nCell <= top );
- put2byte(&data[hdr+5], top);
- return top;
- }
- /*
- ** Return a section of the pPage->aData to the freelist.
- ** The first byte of the new free block is pPage->aDisk[start]
- ** and the size of the block is "size" bytes.
- **
- ** Most of the effort here is involved in coalesing adjacent
- ** free blocks into a single big free block.
- */
- static void freeSpace(MemPage *pPage, int start, int size){
- int addr, pbegin, hdr;
- unsigned char *data = pPage->aData;
- assert( pPage->pBt!=0 );
- assert( sqlite3PagerIswriteable(pPage->pDbPage) );
- assert( start>=pPage->hdrOffset+6+(pPage->leaf?0:4) );
- assert( (start + size)<=pPage->pBt->usableSize );
- assert( sqlite3_mutex_held(pPage->pBt->mutex) );
- if( size<4 ) size = 4;
- #ifdef SQLITE_SECURE_DELETE
- /* Overwrite deleted information with zeros when the SECURE_DELETE
- ** option is enabled at compile-time */
- memset(&data[start], 0, size);
- #endif
- /* Add the space back into the linked list of freeblocks */
- hdr = pPage->hdrOffset;
- addr = hdr + 1;
- while( (pbegin = get2byte(&data[addr]))<start && pbegin>0 ){
- assert( pbegin<=pPage->pBt->usableSize-4 );
- assert( pbegin>addr );
- addr = pbegin;
- }
- assert( pbegin<=pPage->pBt->usableSize-4 );
- assert( pbegin>addr || pbegin==0 );
- put2byte(&data[addr], start);
- put2byte(&data[start], pbegin);
- put2byte(&data[start+2], size);
- pPage->nFree += size;
- /* Coalesce adjacent free blocks */
- addr = pPage->hdrOffset + 1;
- while( (pbegin = get2byte(&data[addr]))>0 ){
- int pnext, psize;
- assert( pbegin>addr );
- assert( pbegin<=pPage->pBt->usableSize-4 );
- pnext = get2byte(&data[pbegin]);
- psize = get2byte(&data[pbegin+2]);
- if( pbegin + psize + 3 >= pnext && pnext>0 ){
- int frag = pnext - (pbegin+psize);
- assert( frag<=data[pPage->hdrOffset+7] );
- data[pPage->hdrOffset+7] -= frag;
- put2byte(&data[pbegin], get2byte(&data[pnext]));
- put2byte(&data[pbegin+2], pnext+get2byte(&data[pnext+2])-pbegin);
- }else{
- addr = pbegin;
- }
- }
- /* If the cell content area begins with a freeblock, remove it. */
- if( data[hdr+1]==data[hdr+5] && data[hdr+2]==data[hdr+6] ){
- int top;
- pbegin = get2byte(&data[hdr+1]);
- memcpy(&data[hdr+1], &data[pbegin], 2);
- top = get2byte(&data[hdr+5]);
- put2byte(&data[hdr+5], top + get2byte(&data[pbegin+2]));
- }
- }
- /*
- ** Decode the flags byte (the first byte of the header) for a page
- ** and initialize fields of the MemPage structure accordingly.
- */
- static void decodeFlags(MemPage *pPage, int flagByte){
- BtShared *pBt; /* A copy of pPage->pBt */
- assert( pPage->hdrOffset==(pPage->pgno==1 ? 100 : 0) );
- assert( sqlite3_mutex_held(pPage->pBt->mutex) );
- pPage->intKey = (flagByte & (PTF_INTKEY|PTF_LEAFDATA))!=0;
- pPage->zeroData = (flagByte & PTF_ZERODATA)!=0;
- pPage->leaf = (flagByte & PTF_LEAF)!=0;
- pPage->childPtrSize = 4*(pPage->leaf==0);
- pBt = pPage->pBt;
- if( flagByte & PTF_LEAFDATA ){
- pPage->leafData = 1;
- pPage->maxLocal = pBt->maxLeaf;
- pPage->minLocal = pBt->minLeaf;
- }else{
- pPage->leafData = 0;
- pPage->maxLocal = pBt->maxLocal;
- pPage->minLocal = pBt->minLocal;
- }
- pPage->hasData = !(pPage->zeroData || (!pPage->leaf && pPage->leafData));
- }
- /*
- ** Initialize the auxiliary information for a disk block.
- **
- ** The pParent parameter must be a pointer to the MemPage which
- ** is the parent of the page being initialized. The root of a
- ** BTree has no parent and so for that page, pParent==NULL.
- **
- ** Return SQLITE_OK on success. If we see that the page does
- ** not contain a well-formed database page, then return
- ** SQLITE_CORRUPT. Note that a return of SQLITE_OK does not
- ** guarantee that the page is well-formed. It only shows that
- ** we failed to detect any corruption.
- */
- int sqlite3BtreeInitPage(
- MemPage *pPage, /* The page to be initialized */
- MemPage *pParent /* The parent. Might be NULL */
- ){
- int pc; /* Address of a freeblock within pPage->aData[] */
- int hdr; /* Offset to beginning of page header */
- u8 *data; /* Equal to pPage->aData */
- BtShared *pBt; /* The main btree structure */
- int usableSize; /* Amount of usable space on each page */
- int cellOffset; /* Offset from start of page to first cell pointer */
- int nFree; /* Number of unused bytes on the page */
- int top; /* First byte of the cell content area */
- pBt = pPage->pBt;
- assert( pBt!=0 );
- assert( pParent==0 || pParent->pBt==pBt );
- assert( sqlite3_mutex_held(pBt->mutex) );
- assert( pPage->pgno==sqlite3PagerPagenumber(pPage->pDbPage) );
- assert( pPage == sqlite3PagerGetExtra(pPage->pDbPage) );
- assert( pPage->aData == sqlite3PagerGetData(pPage->pDbPage) );
- if( pPage->pParent!=pParent && (pPage->pParent!=0 || pPage->isInit) ){
- /* The parent page should never change unless the file is corrupt */
- return SQLITE_CORRUPT_BKPT;
- }
- if( pPage->isInit ) return SQLITE_OK;
- if( pPage->pParent==0 && pParent!=0 ){
- pPage->pParent = pParent;
- sqlite3PagerRef(pParent->pDbPage);
- }
- hdr = pPage->hdrOffset;
- data = pPage->aData;
- decodeFlags(pPage, data[hdr]);
- pPage->nOverflow = 0;
- pPage->idxShift = 0;
- usableSize = pBt->usableSize;
- pPage->cellOffset = cellOffset = hdr + 12 - 4*pPage->leaf;
- top = get2byte(&data[hdr+5]);
- pPage->nCell = get2byte(&data[hdr+3]);
- if( pPage->nCell>MX_CELL(pBt) ){
- /* To many cells for a single page. The page must be corrupt */
- return SQLITE_CORRUPT_BKPT;
- }
- if( pPage->nCell==0 && pParent!=0 && pParent->pgno!=1 ){
- /* All pages must have at least one cell, except for root pages */
- return SQLITE_CORRUPT_BKPT;
- }
- /* Compute the total free space on the page */
- pc = get2byte(&data[hdr+1]);
- nFree = data[hdr+7] + top - (cellOffset + 2*pPage->nCell);
- while( pc>0 ){
- int next, size;
- if( pc>usableSize-4 ){
- /* Free block is off the page */
- return SQLITE_CORRUPT_BKPT;
- }
- next = get2byte(&data[pc]);
- size = get2byte(&data[pc+2]);
- if( next>0 && next<=pc+size+3 ){
- /* Free blocks must be in accending order */
- return SQLITE_CORRUPT_BKPT;
- }
- nFree += size;
- pc = next;
- }
- pPage->nFree = nFree;
- if( nFree>=usableSize ){
- /* Free space cannot exceed total page size */
- return SQLITE_CORRUPT_BKPT;
- }
- pPage->isInit = 1;
- return SQLITE_OK;
- }
- /*
- ** Set up a raw page so that it looks like a database page holding
- ** no entries.
- */
- static void zeroPage(MemPage *pPage, int flags){
- unsigned char *data = pPage->aData;
- BtShared *pBt = pPage->pBt;
- int hdr = pPage->hdrOffset;
- int first;
- assert( sqlite3PagerPagenumber(pPage->pDbPage)==pPage->pgno );
- assert( sqlite3PagerGetExtra(pPage->pDbPage) == (void*)pPage );
- assert( sqlite3PagerGetData(pPage->pDbPage) == data );
- assert( sqlite3PagerIswriteable(pPage->pDbPage) );
- assert( sqlite3_mutex_held(pBt->mutex) );
- memset(&data[hdr], 0, pBt->usableSize - hdr);
- data[hdr] = flags;
- first = hdr + 8 + 4*((flags&PTF_LEAF)==0);
- memset(&data[hdr+1], 0, 4);
- data[hdr+7] = 0;
- put2byte(&data[hdr+5], pBt->usableSize);
- pPage->nFree = pBt->usableSize - first;
- decodeFlags(pPage, flags);
- pPage->hdrOffset = hdr;
- pPage->cellOffset = first;
- pPage->nOverflow = 0;
- pPage->idxShift = 0;
- pPage->nCell = 0;
- pPage->isInit = 1;
- }
- /*
- ** Get a page from the pager. Initialize the MemPage.pBt and
- ** MemPage.aData elements if needed.
- **
- ** If the noContent flag is set, it means that we do not care about
- ** the content of the page at this time. So do not go to the disk
- ** to fetch the content. Just fill in the content with zeros for now.
- ** If in the future we call sqlite3PagerWrite() on this page, that
- ** means we have started to be concerned about content and the disk
- ** read should occur at that point.
- */
- int sqlite3BtreeGetPage(
- BtShared *pBt, /* The btree */
- Pgno pgno, /* Number of the page to fetch */
- MemPage **ppPage, /* Return the page in this parameter */
- int noContent /* Do not load page content if true */
- ){
- int rc;
- MemPage *pPage;
- DbPage *pDbPage;
- assert( sqlite3_mutex_held(pBt->mutex) );
- rc = sqlite3PagerAcquire(pBt->pPager, pgno, (DbPage**)&pDbPage, noContent);
- if( rc ) return rc;
- pPage = (MemPage *)sqlite3PagerGetExtra(pDbPage);
- pPage->aData = sqlite3PagerGetData(pDbPage);
- pPage->pDbPage = pDbPage;
- pPage->pBt = pBt;
- pPage->pgno = pgno;
- pPage->hdrOffset = pPage->pgno==1 ? 100 : 0;
- *ppPage = pPage;
- return SQLITE_OK;
- }
- /*
- ** Get a page from the pager and initialize it. This routine
- ** is just a convenience wrapper around separate calls to
- ** sqlite3BtreeGetPage() and sqlite3BtreeInitPage().
- */
- static int getAndInitPage(
- BtShared *pBt, /* The database file */
- Pgno pgno, /* Number of the page to get */
- MemPage **ppPage, /* Write the page pointer here */
- MemPage *pParent /* Parent of the page */
- ){
- int rc;
- assert( sqlite3_mutex_held(pBt->mutex) );
- if( pgno==0 ){
- return SQLITE_CORRUPT_BKPT;
- }
- rc = sqlite3BtreeGetPage(pBt, pgno, ppPage, 0);
- if( rc==SQLITE_OK && (*ppPage)->isInit==0 ){
- rc = sqlite3BtreeInitPage(*ppPage, pParent);
- }
- return rc;
- }
- /*
- ** Release a MemPage. This should be called once for each prior
- ** call to sqlite3BtreeGetPage.
- */
- static void releasePage(MemPage *pPage){
- if( pPage ){
- assert( pPage->aData );
- assert( pPage->pBt );
- assert( sqlite3PagerGetExtra(pPage->pDbPage) == (void*)pPage );
- assert( sqlite3PagerGetData(pPage->pDbPage)==pPage->aData );
- assert( sqlite3_mutex_held(pPage->pBt->mutex) );
- sqlite3PagerUnref(pPage->pDbPage);
- }
- }
- /*
- ** This routine is called when the reference count for a page
- ** reaches zero. We need to unref the pParent pointer when that
- ** happens.
- */
- static void pageDestructor(DbPage *pData, int pageSize){
- MemPage *pPage;
- assert( (pageSize & 7)==0 );
- pPage = (MemPage *)sqlite3PagerGetExtra(pData);
- assert( pPage->isInit==0 || sqlite3_mutex_held(pPage->pBt->mutex) );
- if( pPage->pParent ){
- MemPage *pParent = pPage->pParent;
- assert( pParent->pBt==pPage->pBt );
- pPage->pParent = 0;
- releasePage(pParent);
- }
- pPage->isInit = 0;
- }
- /*
- ** During a rollback, when the pager reloads information into the cache
- ** so that the cache is restored to its original state at the start of
- ** the transaction, for each page restored this routine is called.
- **
- ** This routine needs to reset the extra data section at the end of the
- ** page to agree with the restored data.
- */
- static void pageReinit(DbPage *pData, int pageSize){
- MemPage *pPage;
- assert( (pageSize & 7)==0 );
- pPage = (MemPage *)sqlite3PagerGetExtra(pData);
- if( pPage->isInit ){
- assert( sqlite3_mutex_held(pPage->pBt->mutex) );
- pPage->isInit = 0;
- sqlite3BtreeInitPage(pPage, pPage->pParent);
- }
- }
- /*
- ** Invoke the busy handler for a btree.
- */
- static int sqlite3BtreeInvokeBusyHandler(void *pArg, int n){
- BtShared *pBt = (BtShared*)pArg;
- assert( pBt->db );
- assert( sqlite3_mutex_held(pBt->db->mutex) );
- return sqlite3InvokeBusyHandler(&pBt->db->busyHandler);
- }
- /*
- ** Open a database file.
- **
- ** zFilename is the name of the database file. If zFilename is NULL
- ** a new database with a random name is created. This randomly named
- ** database file will be deleted when sqlite3BtreeClose() is called.
- ** If zFilename is ":memory:" then an in-memory database is created
- ** that is automatically destroyed when it is closed.
- */
- int sqlite3BtreeOpen(
- const char *zFilename, /* Name of the file containing the BTree database */
- sqlite3 *db, /* Associated database handle */
- Btree **ppBtree, /* Pointer to new Btree object written here */
- int flags, /* Options */
- int vfsFlags /* Flags passed through to sqlite3_vfs.xOpen() */
- ){
- sqlite3_vfs *pVfs; /* The VFS to use for this btree */
- BtShared *pBt = 0; /* Shared part of btree structure */
- Btree *p; /* Handle to return */
- int rc = SQLITE_OK;
- int nReserve;
- unsigned char zDbHeader[100];
- /* Set the variable isMemdb to true for an in-memory database, or
- ** false for a file-based database. This symbol is only required if
- ** either of the shared-data or autovacuum features are compiled
- ** into the library.
- */
- #if !defined(SQLITE_OMIT_SHARED_CACHE) || !defined(SQLITE_OMIT_AUTOVACUUM)
- #ifdef SQLITE_OMIT_MEMORYDB
- const int isMemdb = 0;
- #else
- const int isMemdb = zFilename && !strcmp(zFilename, ":memory:");
- #endif
- #endif
- assert( db!=0 );
- assert( sqlite3_mutex_held(db->mutex) );
- pVfs = db->pVfs;
- p = sqlite3MallocZero(sizeof(Btree));
- if( !p ){
- return SQLITE_NOMEM;
- }
- p->inTrans = TRANS_NONE;
- p->db = db;
- #if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO)
- /*
- ** If this Btree is a candidate for shared cache, try to find an
- ** existing BtShared object that we can share with
- */
- if( (flags & BTREE_PRIVATE)==0
- && isMemdb==0
- && (db->flags & SQLITE_Vtab)==0
- && zFilename && zFilename[0]
- ){
- if( sqlite3SharedCacheEnabled ){
- int nFullPathname = pVfs->mxPathname+1;
- char *zFullPathname = (char *)sqlite3_malloc(nFullPathname);
- sqlite3_mutex *mutexShared;
- p->sharable = 1;
- if( db ){
- db->flags |= SQLITE_SharedCache;
- }
- if( !zFullPathname ){
- sqlite3_free(p);
- return SQLITE_NOMEM;
- }
- sqlite3OsFullPathname(pVfs, zFilename, nFullPathname, zFullPathname);
- mutexShared = sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_MASTER);
- sqlite3_mutex_enter(mutexShared);
- for(pBt=sqlite3SharedCacheList; pBt; pBt=pBt->pNext){
- assert( pBt->nRef>0 );
- if( 0==strcmp(zFullPathname, sqlite3PagerFilename(pBt->pPager))
- && sqlite3PagerVfs(pBt->pPager)==pVfs ){
- p->pBt = pBt;
- pBt->nRef++;
- break;
- }
- }
- sqlite3_mutex_leave(mutexShared);
- sqlite3_free(zFullPathname);
- }
- #ifdef SQLITE_DEBUG
- else{
- /* In debug mode, we mark all persistent databases as sharable
- ** even when they are not. This exercises the locking code and
- ** gives more opportunity for asserts(sqlite3_mutex_held())
- ** statements to find locking problems.
- */
- p->sharable = 1;
- }
- #endif
- }
- #endif
- if( pBt==0 ){
- /*
- ** The following asserts make sure that structures used by the btree are
- ** the right size. This is to guard against size changes that result
- ** when compiling on a different architecture.
- */
- assert( sizeof(i64)==8 || sizeof(i64)==4 );
- assert( sizeof(u64)==8 || sizeof(u64)==4 );
- assert( sizeof(u32)==4 );
- assert( sizeof(u16)==2 );
- assert( sizeof(Pgno)==4 );
-
- pBt = sqlite3MallocZero( sizeof(*pBt) );
- if( pBt==0 ){
- rc = SQLITE_NOMEM;
- goto btree_open_out;
- }
- pBt->busyHdr.xFunc = sqlite3BtreeInvokeBusyHandler;
- pBt->busyHdr.pArg = pBt;
- rc = sqlite3PagerOpen(pVfs, &pBt->pPager, zFilename,
- EXTRA_SIZE, flags, vfsFlags);
- if( rc==SQLITE_OK ){
- rc = sqlite3PagerReadFileheader(pBt->pPager,sizeof(zDbHeader),zDbHeader);
- }
- if( rc!=SQLITE_OK ){
- goto btree_open_out;
- }
- sqlite3PagerSetBusyhandler(pBt->pPager, &pBt->busyHdr);
- p->pBt = pBt;
-
- sqlite3PagerSetDestructor(pBt->pPager, pageDestructor);
- sqlite3PagerSetReiniter(pBt->pPager, pageReinit);
- pBt->pCursor = 0;
- pBt->pPage1 = 0;
- pBt->readOnly = sqlite3PagerIsreadonly(pBt->pPager);
- pBt->pageSize = get2byte(&zDbHeader[16]);
- if( pBt->pageSize<512 || pBt->pageSize>SQLITE_MAX_PAGE_SIZE
- || ((pBt->pageSize-1)&pBt->pageSize)!=0 ){
- pBt->pageSize = 0;
- sqlite3PagerSetPagesize(pBt->pPager, &pBt->pageSize);
- pBt->maxEmbedFrac = 64; /* 25% */
- pBt->minEmbedFrac = 32; /* 12.5% */
- pBt->minLeafFrac = 32; /* 12.5% */
- #ifndef SQLITE_OMIT_AUTOVACUUM
- /* If the magic name ":memory:" will create an in-memory database, then
- ** leave the autoVacuum mode at 0 (do not auto-vacuum), even if
- ** SQLITE_DEFAULT_AUTOVACUUM is true. On the other hand, if
- ** SQLITE_OMIT_MEMORYDB has been defined, then ":memory:" is just a
- ** regular file-name. In this case the auto-vacuum applies as per normal.
- */
- if( zFilename && !isMemdb ){
- pBt->autoVacuum = (SQLITE_DEFAULT_AUTOVACUUM ? 1 : 0);
- pBt->incrVacuum = (SQLITE_DEFAULT_AUTOVACUUM==2 ? 1 : 0);
- }
- #endif
- nReserve = 0;
- }else{
- nReserve = zDbHeader[20];
- pBt->maxEmbedFrac = zDbHeader[21];
- pBt->minEmbedFrac = zDbHeader[22];
- pBt->minLeafFrac = zDbHeader[23];
- pBt->pageSizeFixed = 1;
- #ifndef SQLITE_OMIT_AUTOVACUUM
- pBt->autoVacuum = (get4byte(&zDbHeader[36 + 4*4])?1:0);
- pBt->incrVacuum = (get4byte(&zDbHeader[36 + 7*4])?1:0);
- #endif
- }
- pBt->usableSize = pBt->pageSize - nReserve;
- assert( (pBt->pageSize & 7)==0 ); /* 8-byte alignment of pageSize */
- sqlite3PagerSetPagesize(pBt->pPager, &pBt->pageSize);
-
- #if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO)
- /* Add the new BtShared object to the linked list sharable BtShareds.
- */
- if( p->sharable ){
- sqlite3_mutex *mutexShared;
- pBt->nRef = 1;
- mutexShared = sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_MASTER);
- if( SQLITE_THREADSAFE ){
- pBt->mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST);
- if( pBt->mutex==0 ){
- rc = SQLITE_NOMEM;
- db->mallocFailed = 0;
- goto btree_open_out;
- }
- }
- sqlite3_mutex_enter(mutexShared);
- pBt->pNext = sqlite3SharedCacheList;
- sqlite3SharedCacheList = pBt;
- sqlite3_mutex_leave(mutexShared);
- }
- #endif
- }
- #if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO)
- /* If the new Btree uses a sharable pBtShared, then link the new
- ** Btree into the list of all sharable Btrees for the same connection.
- ** The list is kept in ascending order by pBt address.
- */
- if( p->sharable ){
- int i;
- Btree *pSib;
- for(i=0; i<db->nDb; i++){
- if( (pSib = db->aDb[i].pBt)!=0 && pSib->sharable ){
- while( pSib->pPrev ){ pSib = pSib->pPrev; }
- if( p->pBt<pSib->pBt ){
- p->pNext = pSib;
- p->pPrev = 0;
- pSib->pPrev = p;
- }else{
- while( pSib->pNext && pSib->pNext->pBt<p->pBt ){
- pSib = pSib->pNext;
- }
- p->pNext = pSib->pNext;
- p->pPrev = pSib;
- if( p->pNext ){
- p->pNext->pPrev = p;
- }
- pSib->pNext = p;
- }
- break;
- }
- }
- }
- #endif
- *ppBtree = p;
- btree_open_out:
- if( rc!=SQLITE_OK ){
- if( pBt && pBt->pPager ){
- sqlite3PagerClose(pBt->pPager);
- }
- sqlite3_free(pBt);
- sqlite3_free(p);
- *ppBtree = 0;
- }
- return rc;
- }
- /*
- ** Decrement the BtShared.nRef counter. When it reaches zero,
- ** remove the BtShared structure from the sharing list. Return
- ** true if the BtShared.nRef counter reaches zero and return
- ** false if it is still positive.
- */
- static int removeFromSharingList(BtShared *pBt){
- #ifndef SQLITE_OMIT_SHARED_CACHE
- sqlite3_mutex *pMaster;
- BtShared *pList;
- int removed = 0;
- assert( sqlite3_mutex_notheld(pBt->mutex) );
- pMaster = sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_MASTER);
- sqlite3_mutex_enter(pMaster);
- pBt->nRef--;
- if( pBt->nRef<=0 ){
- if( sqlite3SharedCacheList==pBt ){
- sqlite3SharedCacheList = pBt->pNext;
- }else{
- pList = sqlite3SharedCacheList;
- while( pList && pList->pNext!=pBt ){
- pList=pList->pNext;
- }
- if( pList ){
- pList->pNext = pBt->pNext;
- }
- }
- if( SQLITE_THREADSAFE ){
- sqlite3_mutex_free(pBt->mutex);
- }
- removed = 1;
- }
- sqlite3_mutex_leave(pMaster);
- return removed;
- #else
- return 1;
- #endif
- }
- /*
- ** Close an open database and invalidate all cursors.
- */
- int sqlite3BtreeClose(Btree *p){
- BtShared *pBt = p->pBt;
- BtCursor *pCur;
- /* Close all cursors opened via this handle. */
- assert( sqlite3_mutex_held(p->db->mutex) );
- sqlite3BtreeEnter(p);
- pBt->db = p->db;
- pCur = pBt->pCursor;
- while( pCur ){
- BtCursor *pTmp = pCur;
- pCur = pCur->pNext;
- if( pTmp->pBtree==p ){
- sqlite3BtreeCloseCursor(pTmp);
- }
- }
- /* Rollback any active transaction and free the handle structure.
- ** The call to sqlite3BtreeRollback() drops any table-locks held by
- ** this handle.
- */
- sqlite3BtreeRollback(p);
- sqlite3BtreeLeave(p);
- /* If there are still other outstanding references to the shared-btree
- ** structure, return now. The remainder of this procedure cleans
- ** up the shared-btree.
- */
- assert( p->wantToLock==0 && p->locked==0 );
- if( !p->sharable || removeFromSharingList(pBt) ){
- /* The pBt is no longer on the sharing list, so we can access
- ** it without having to hold the mutex.
- **
- ** Clean out and delete the BtShared object.
- */
- assert( !pBt->pCursor );
- sqlite3PagerClose(pBt->pPager);
- if( pBt->xFreeSchema && pBt->pSchema ){
- pBt->xFreeSchema(pBt->pSchema);
- }
- sqlite3_free(pBt->pSchema);
- sqlite3_free(pBt->pTmpSpace);
- sqlite3_free(pBt);
- }
- #ifndef SQLITE_OMIT_SHARED_CACHE
- assert( p->wantToLock==0 );
- assert( p->locked==0 );
- if( p->pPrev ) p->pPrev->pNext = p->pNext;
- if( p->pNext ) p->pNext->pPrev = p->pPrev;
- #endif
- sqlite3_free(p);
- return SQLITE_OK;
- }
- /*
- ** Change the limit on the number of pages allowed in the cache.
- **
- ** The maximum number of cache pages is set to the absolute
- ** value of mxPage. If mxPage is negative, the pager will
- ** operate asynchronously - it will not stop to do fsync()s
- ** to insure data is written to the disk surface before
- ** continuing. Transactions still work if synchronous is off,
- ** and the database cannot be corrupted if this program
- ** crashes. But if the operating system crashes or there is
- ** an abrupt power failure when synchronous is off, the database
- ** could be left in an inconsistent and unrecoverable state.
- ** Synchronous is on by default so database corruption is not
- ** normally a worry.
- */
- int sqlite3BtreeSetCacheSize(Btree *p, int mxPage){
- BtShared *pBt = p->pBt;
- assert( sqlite3_mutex_held(p->db->mutex) );
- sqlite3BtreeEnter(p);
- sqlite3PagerSetCachesize(pBt->pPager, mxPage);
- sqlite3BtreeLeave(p);
- return SQLITE_OK;
- }
- /*
- ** Change the way data is synced to disk in order to increase or decrease
- ** how well the database resists damage due to OS crashes and power
- ** failures. Level 1 is the same as asynchronous (no syncs() occur and
- ** there is a high probability of damage) Level 2 is the default. There
- ** is a very low but non-zero probability of damage. Level 3 reduces the
- ** probability of damage to near zero but with a write performance reduction.
- */
- #ifndef SQLITE_OMIT_PAGER_PRAGMAS
- int sqlite3BtreeSetSafetyLevel(Btree *p, int level, int fullSync){
- BtShared *pBt = p->pBt;
- assert( sqlite3_mutex_held(p->db->mutex) );
- sqlite3BtreeEnter(p);
- sqlite3PagerSetSafetyLevel(pBt->pPager, level, fullSync);
- sqlite3BtreeLeave(p);
- return SQLITE_OK;
- }
- #endif
- /*
- ** Return TRUE if the given btree is set to safety level 1. In other
- ** words, return TRUE if no sync() occurs on the disk files.
- */
- int sqlite3BtreeSyncDisabled(Btree *p){
- BtShared *pBt = p->pBt;
- int rc;
- assert( sqlite3_mutex_held(p->db->mutex) );
- sqlite3BtreeEnter(p);
- assert( pBt && pBt->pPager );
- rc = sqlite3PagerNosync(pBt->pPager);
- sqlite3BtreeLeave(p);
- return rc;
- }
- #if !defined(SQLITE_OMIT_PAGER_PRAGMAS) || !defined(SQLITE_OMIT_VACUUM)
- /*
- ** Change the default pages size and the number of reserved bytes per page.
- **
- ** The page size must be a power of 2 between 512 and 65536. If the page
- ** size supplied does not meet this constraint then the page size is not
- ** changed.
- **
- ** Page sizes are constrained to be a power of two so that the region
- ** of the database file used for locking (beginning at PENDING_BYTE,
- ** the first byte past the 1GB boundary, 0x40000000) needs to occur
- ** at the beginning of a page.
- **
- ** If parameter nReserve is less than zero, then the number of reserved
- ** bytes per page is left unchanged.
- */
- int sqlite3BtreeSetPageSize(Btree *p, int pageSize, int nReserve){
- int rc = SQLITE_OK;
- BtShared *pBt = p->pBt;
- sqlite3BtreeEnter(p);
- if( pBt->pageSizeFixed ){
- sqlite3BtreeLeave(p);
- return SQLITE_READONLY;
- }
- if( nReserve<0 ){
- nReserve = pBt->pageSize - pBt->usableSize;
- }
- if( pageSize>=512 && pageSize<=SQLITE_MAX_PAGE_SIZE &&
- ((pageSize-1)&pageSize)==0 ){
- assert( (pageSize & 7)==0 );
- assert( !pBt->pPage1 && !pBt->pCursor );
- pBt->pageSize = pageSize;
- sqlite3_free(pBt->pTmpSpace);
- pBt->pTmpSpace = 0;
- rc = sqlite3PagerSetPagesize(pBt->pPager, &pBt->pageSize);
- }
- pBt->usableSize = pBt->pageSize - nReserve;
- sqlite3BtreeLeave(p);
- return rc;
- }
- /*
- ** Return the currently defined page size
- */
- int sqlite3BtreeGetPageSize(Btree *p){
- return p->pBt->pageSize;
- }
- int sqlite3BtreeGetReserve(Btree *p){
- int n;
- sqlite3BtreeEnter(p);
- n = p->pBt->pageSize - p->pBt->usableSize;
- sqlite3BtreeLeave(p);
- return n;
- }
- /*
- ** Set the maximum page count for a database if mxPage is positive.
- ** No changes are made if mxPage is 0 or negative.
- ** Regardless of the value of mxPage, return the maximum page count.
- */
- int sqlite3BtreeMaxPageCount(Btree *p, int mxPage){
- int n;
- sqlite3BtreeEnter(p);
- n = sqlite3PagerMaxPageCount(p->pBt->pPager, mxPage);
- sqlite3BtreeLeave(p);
- return n;
- }
- #endif /* !defined(SQLITE_OMIT_PAGER_PRAGMAS) || !defined(SQLITE_OMIT_VACUUM) */
- /*
- ** Change the 'auto-vacuum' property of the database. If the 'autoVacuum'
- ** parameter is non-zero, then auto-vacuum mode is enabled. If zero, it
- ** is disabled. The default value for the auto-vacuum property is
- ** determined by the SQLITE_DEFAULT_AUTOVACUUM macro.
- */
- int sqlite3BtreeSetAutoVacuum(Btree *p, int autoVacuum){
- #ifdef SQLITE_OMIT_AUTOVACUUM
- return SQLITE_READONLY;
- #else
- BtShared *pBt = p->pBt;
- int rc = SQLITE_OK;
- int av = (autoVacuum?1:0);
- sqlite3BtreeEnter(p);
- if( pBt->pageSizeFixed && av!=pBt->autoVacuum ){
- rc = SQLITE_READONLY;
- }else{
- pBt->autoVacuum = av;
- }
- sqlite3BtreeLeave(p);
- return rc;
- #endif
- }
- /*
- ** Return the value of the 'auto-vacuum' property. If auto-vacuum is
- ** enabled 1 is returned. Otherwise 0.
- */
- int sqlite3BtreeGetAutoVacuum(Btree *p){
- #ifdef SQLITE_OMIT_AUTOVACUUM
- return BTREE_AUTOVACUUM_NONE;
- #else
- int rc;
- sqlite3BtreeEnter(p);
- rc = (
- (!p->pBt->autoVacuum)?BTREE_AUTOVACUUM_NONE:
- (!p->pBt->incrVacuum)?BTREE_AUTOVACUUM_FULL:
- BTREE_AUTOVACUUM_INCR
- );
- sqlite3BtreeLeave(p);
- return rc;
- #endif
- }
- /*
- ** Get a reference to pPage1 of the database file. This will
- ** also acquire a readlock on that file.
- **
- ** SQLITE_OK is returned on success. If the file is not a
- ** well-formed database file, then SQLITE_CORRUPT is returned.
- ** SQLITE_BUSY is returned if the database is locked. SQLITE_NOMEM
- ** is returned if we run out of memory.
- */
- static int lockBtree(BtShared *pBt){
- int rc;
- MemPage *pPage1;
- assert( sqlite3_mutex_held(pBt->mutex) );
- if( pBt->pPage1 ) return SQLITE_OK;
- rc = sqlite3BtreeGetPage(pBt, 1, &pPage1, 0);
- if( rc!=SQLITE_OK ) return rc;
- /* Do some checking to help insure the file we opened really is
- ** a valid database file.
- */
- rc = SQLITE_NOTADB;
- if( sqlite3PagerPagecount(pBt->pPager)>0 ){
- int pageSize;
- int usableSize;
- u8 *page1 = pPage1->aData;
- if( memcmp(page1, zMagicHeader, 16)!=0 ){
- goto page1_init_failed;
- }
- if( page1[18]>1 ){
- pBt->readOnly = 1;
- }
- if( page1[19]>1 ){
- goto page1_init_failed;
- }
- pageSize = get2byte(&page1[16]);
- if( ((pageSize-1)&pageSize)!=0 || pageSize<512 ||
- (SQLITE_MAX_PAGE_SIZE<32768 && pageSize>SQLITE_MAX_PAGE_SIZE)
- ){
- goto page1_init_failed;
- }
- assert( (pageSize & 7)==0 );
- usableSize = pageSize - page1[20];
- if( pageSize!=pBt->pageSize ){
- /* After reading the first page of the database assuming a page size
- ** of BtShared.pageSize, we have discovered that the page-size is
- ** actually pageSize. Unlock the database, leave pBt->pPage1 at
- ** zero and return SQLITE_OK. The caller will call this function
- ** again with the correct page-size.
- */
- releasePage(pPage1);
- pBt->usableSize = usableSize;
- pBt->pageSize = pageSize;
- sqlite3_free(pBt->pTmpSpace);
- pBt->pTmpSpace = 0;
- sqlite3PagerSetPagesize(pBt->pPager, &pBt->pageSize);
- return SQLITE_OK;
- }
- if( usableSize<500 ){
- goto page1_init_failed;
- }
- pBt->pageSize = pageSize;
- pBt->usableSize = usableSize;
- pBt->maxEmbedFrac = page1[21];
- pBt->minEmbedFrac = page1[22];
- pBt->minLeafFrac = page1[23];
- #ifndef SQLITE_OMIT_AUTOVACUUM
- pBt->autoVacuum = (get4byte(&page1[36 + 4*4])?1:0);
- pBt->incrVacuum = (get4byte(&page1[36 + 7*4])?1:0);
- #endif
- }
- /* maxLocal is the maximum amount of payload to store locally for
- ** a cell. Make sure it is small enough so that at least minFanout
- ** cells can will fit on one page. We assume a 10-byte page header.
- ** Besides the payload, the cell must store:
- ** 2-byte pointer to the cell
- ** 4-byte child pointer
- ** 9-byte nKey value
- ** 4-byte nData value
- ** 4-byte overflow page pointer
- ** So a cell consists of a 2-byte poiner, a header which is as much as
- ** 17 bytes long, 0 to N bytes of payload, and an optional 4 byte overflow
- ** page pointer.
- */
- pBt->maxLocal = (pBt->usableSize-12)*pBt->maxEmbedFrac/255 - 23;
- pBt->minLocal = (pBt->usableSize-12)*pBt->minEmbedFrac/255 - 23;
- pBt->maxLeaf = pBt->usableSize - 35;
- pBt->minLeaf = (pBt->usableSize-12)*pBt->minLeafFrac/255 - 23;
- if( pBt->minLocal>pBt->maxLocal || pBt->maxLocal<0 ){
- goto page1_init_failed;
- }
- assert( pBt->maxLeaf + 23 <= MX_CELL_SIZE(pBt) );
- pBt->pPage1 = pPage1;
- return SQLITE_OK;
- page1_init_failed:
- releasePage(pPage1);
- pBt->pPage1 = 0;
- return rc;
- }
- /*
- ** This routine works like lockBtree() except that it also invokes the
- ** busy callback if there is lock contention.
- */
- static int lockBtreeWithRetry(Btree *pRef){
- int rc = SQLITE_OK;
- assert( sqlite3BtreeHoldsMutex(pRef) );
- if( pRef->inTrans==TRANS_NONE ){
- u8 inTransaction = pRef->pBt->inTransaction;
- btreeIntegrity(pRef);
- rc = sqlite3BtreeBeginTrans(pRef, 0);
- pRef->pBt->inTransaction = inTransaction;
- pRef->inTrans = TRANS_NONE;
- if( rc==SQLITE_OK ){
- pRef->pBt->nTransaction--;
- }
- btreeIntegrity(pRef);
- }
- return rc;
- }
-
- /*
- ** If there are no outstanding cursors and we are not in the middle
- ** of a transaction but there is a read lock on the database, then
- ** this routine unrefs the first page of the database file which
- ** has the effect of releasing the read lock.
- **
- ** If there are any outstanding cursors, this routine is a no-op.
- **
- ** If there is a transaction in progress, this routine is a no-op.
- */
- static void unlockBtreeIfUnused(BtShared *pBt){
- assert( sqlite3_mutex_held(pBt->mutex) );
- if( pBt->inTransaction==TRANS_NONE && pBt->pCursor==0 && pBt->pPage1!=0 ){
- if( sqlite3PagerRefcount(pBt->pPager)>=1 ){
- assert( pBt->pPage1->aData );
- #if 0
- if( pBt->pPage1->aData==0 ){
- MemPage *pPage = pBt->pPage1;
- pPage->aData = sqlite3PagerGetData(pPage->pDbPage);
- pPage->pBt = pBt;
- pPage->pgno = 1;
- }
- #endif
- releasePage(pBt->pPage1);
- }
- pBt->pPage1 = 0;
- pBt->inStmt = 0;
- }
- }
- /*
- ** Create a new database by initializing the first page of the
- ** file.
- */
- static int newDatabase(BtShared *pBt){
- MemPage *pP1;
- unsigned char *data;
- int rc;
- assert( sqlite3_mutex_held(pBt->mutex) );
- if( sqlite3PagerPagecount(pBt->pPager)>0 ) return SQLITE_OK;
- pP1 = pBt->pPage1;
- assert( pP1!=0 );
- data = pP1->aData;
- rc = sqlite3PagerWrite(pP1->pDbPage);
- if( rc ) return rc;
- memcpy(data, zMagicHeader, sizeof(zMagicHeader));
- assert( sizeof(zMagicHeader)==16 );
- put2byte(&data[16], pBt->pageSize);
- data[18] = 1;
- data[19] = 1;
- data[20] = pBt->pageSize - pBt->usableSize;
- data[21] = pBt->maxEmbedFrac;
- data[22] = pBt->minEmbedFrac;
- data[23] = pBt->minLeafFrac;
- memset(&data[24], 0, 100-24);
- zeroPage(pP1, PTF_INTKEY|PTF_LEAF|PTF_LEAFDATA );
- pBt->pageSizeFixed = 1;
- #ifndef SQLITE_OMIT_AUTOVACUUM
- assert( pBt->autoVacuum==1 || pBt->autoVacuum==0 );
- assert( pBt->incrVacuum==1 || pBt->incrVacuum==0 );
- put4byte(&data[36 + 4*4], pBt->autoVacuum);
- put4byte(&data[36 + 7*4], pBt->incrVacuum);
- #endif
- return SQLITE_OK;
- }
- /*
- ** Attempt to start a new transaction. A write-transaction
- ** is started if the second argument is nonzero, otherwise a read-
- ** transaction. If the second argument is 2 or more and exclusive
- ** transaction is started, meaning that no other process is allowed
- ** to access the database. A preexisting transaction may not be
- ** upgraded to exclusive by calling this routine a second time - the
- ** exclusivity flag only works for a new transaction.
- **
- ** A write-transaction must be started before attempting any
- ** changes to the database. None of the following routines
- ** will work unless a transaction is started first:
- **
- ** sqlite3BtreeCreateTable()
- ** sqlite3BtreeCreateIndex()
- ** sqlite3BtreeClearTable()
- ** sqlite3BtreeDropTable()
- ** sqlite3BtreeInsert()
- ** sqlite3BtreeDelete()
- ** sqlite3BtreeUpdateMeta()
- **
- ** If an initial attempt to acquire the lock fails because of lock contention
- ** and the database was previously unlocked, then invoke the busy handler
- ** if there is one. But if there was previously a read-lock, do not
- ** invoke the busy handler - just return SQLITE_BUSY. SQLITE_BUSY is
- ** returned when there is already a read-lock in order to avoid a deadlock.
- **
- ** Suppose there are two processes A and B. A has a read lock and B has
- ** a reserved lock. B tries to promote to exclusive but is blocked because
- ** of A's read lock. A tries to promote to reserved but is blocked by B.
- ** One or the other of the two processes must give way or there can be
- ** no progress. By returning SQLITE_BUSY and not invoking the busy callback
- ** when A already has a read lock, we encourage A to give up and let B
- ** proceed.
- */
- int sqlite3BtreeBeginTrans(Btree *p, int wrflag){
- BtShared *pBt = p->pBt;
- int rc = SQLITE_OK;
- sqlite3BtreeEnter(p);
- pBt->db = p->db;
- btreeIntegrity(p);
- /* If the btree is already in a write-transaction, or it
- ** is already in a read-transaction and a read-transaction
- ** is requested, this is a no-op.
- */
- if( p->inTrans==TRANS_WRITE || (p->inTrans==TRANS_READ && !wrflag) ){
- goto trans_begun;
- }
- /* Write transactions are not possible on a read-only database */
- if( pBt->readOnly && wrflag ){
- rc = SQLITE_READONLY;
- goto trans_begun;
- }
- /* If another database handle has already opened a write transaction
- ** on this shared-btree structure and a second write transaction is
- ** requested, return SQLITE_BUSY.
- */
- if( pBt->inTransaction==TRANS_WRITE && wrflag ){
- rc = SQLITE_BUSY;
- goto trans_begun;
- }
- #ifndef SQLITE_OMIT_SHARED_CACHE
- if( wrflag>1 ){
- BtLock *pIter;
- for(pIter=pBt->pLock; pIter; pIter=pIter->pNext){
- if( pIter->pBtree!=p ){
- rc = SQLITE_BUSY;
- goto trans_begun;
- }
- }
- }
- #endif
- do {
- while( rc==SQLITE_OK && pBt->pPage1==0 ){
- rc = lockBtree(pBt);
- }
- if( rc==SQLITE_OK && wrflag ){
- if( pBt->readOnly ){
- rc = SQLITE_READONLY;
- }else{
- rc = sqlite3PagerBegin(pBt->pPage1->pDbPage, wrflag>1);
- if( rc==SQLITE_OK ){
- rc = newDatabase(pBt);
- }
- }
- }
-
- if( rc==SQLITE_OK ){
- if( wrflag ) pBt->inStmt = 0;
- }else{
- unlockBtreeIfUnused(pBt);
- }
- }while( rc==SQLITE_BUSY && pBt->inTransaction==TRANS_NONE &&
- sqlite3BtreeInvokeBusyHandler(pBt, 0) );
- if( rc==SQLITE_OK ){
- if( p->inTrans==TRANS_NONE ){
- pBt->nTransaction++;
- }
- p->inTrans = (wrflag?TRANS_WRITE:TRANS_READ);
- if( p->inTrans>pBt->inTransaction ){
- pBt->inTransaction = p->inTrans;
- }
- #ifndef SQLITE_OMIT_SHARED_CACHE
- if( wrflag>1 ){
- assert( !pBt->pExclusive );
- pBt->pExclusive = p;
- }
- #endif
- }
- trans_begun:
- btreeIntegrity(p);
- sqlite3BtreeLeave(p);
- return rc;
- }
- #ifndef SQLITE_OMIT_AUTOVACUUM
- /*
- ** Set the pointer-map entries for all children of page pPage. Also, if
- ** pPage contains cells that point to overflow pages, set the pointer
- ** map entries for the overflow pages as well.
- */
- static int setChildPtrmaps(MemPage *pPage){
- int i; /* Counter variable */
- int nCell; /* Number of cells in page pPage */
- int rc; /* Return code */
- BtShared *pBt = pPage->pBt;
- int isInitOrig = pPage->isInit;
- Pgno pgno = pPage->pgno;
- assert( sqlite3_mutex_held(pPage->pBt->mutex) );
- rc = sqlite3BtreeInitPage(pPage, pPage->pParent);
- if( rc!=SQLITE_OK ){
- goto set_child_ptrmaps_out;
- }
- nCell = pPage->nCell;
- for(i=0; i<nCell; i++){
- u8 *pCell = findCell(pPage, i);
- rc = ptrmapPutOvflPtr(pPage, pCell);
- if( rc!=SQLITE_OK ){
- goto set_child_ptrmaps_out;
- }
- if( !pPage->leaf ){
- Pgno childPgno = get4byte(pCell);
- rc = ptrmapPut(pBt, childPgno, PTRMAP_BTREE, pgno);
- if( rc!=SQLITE_OK ) goto set_child_ptrmaps_out;
- }
- }
- if( !pPage->leaf ){
- Pgno childPgno = get4byte(&pPage->aData[pPage->hdrOffset+8]);
- rc = ptrmapPut(pBt, childPgno, PTRMAP_BTREE, pgno);
- }
- set_child_ptrmaps_out:
- pPage->isInit = isInitOrig;
- return rc;
- }
- /*
- ** Somewhere on pPage, which is guarenteed to be a btree page, not an overflow
- ** page, is a pointer to page iFrom. Modify this pointer so that it points to
- ** iTo. Parameter eType describes the type of pointer to be modified, as
- ** follows:
- **
- ** PTRMAP_BTREE: pPage is a btree-page. The pointer points at a child
- ** page of pPage.
- **
- ** PTRMAP_OVERFLOW1: pPage is a btree-page. The pointer points at an overflow
- ** page pointed to by one of the cells on pPage.
- **
- ** PTRMAP_OVERFLOW2: pPage is an overflow-page. The pointer points at the next
- ** overflow page in the list.
- */
- static int modifyPagePointer(MemPage *pPage, Pgno iFrom, Pgno iTo, u8 eType){
- assert( sqlite3_mutex_held(pPage->pBt->mutex) );
- if( eType==PTRMAP_OVERFLOW2 ){
- /* The pointer is always the first 4 bytes of the page in this case. */
- if( get4byte(pPage->aData)!=iFrom ){
- return SQLITE_CORRUPT_BKPT;
- }
- put4byte(pPage->aData, iTo);
- }else{
- int isInitOrig = pPage->isInit;
- int i;
- int nCell;
- sqlite3BtreeInitPage(pPage, 0);
- nCell = pPage->nCell;
- for(i=0; i<nCell; i++){
- u8 *pCell = findCell(pPage, i);
- if( eType==PTRMAP_OVERFLOW1 ){
- CellInfo info;
- sqlite3BtreeParseCellPtr(pPage, pCell, &info);
- if( info.iOverflow ){
- if( iFrom==get4byte(&pCell[info.iOverflow]) ){
- put4byte(&pCell[info.iOverflow], iTo);
- break;
- }
- }
- }else{
- if( get4byte(pCell)==iFrom ){
- put4byte(pCell, iTo);
- break;
- }
- }
- }
-
- if( i==nCell ){
- if( eType!=PTRMAP_BTREE ||
- get4byte(&pPage->aData[pPage->hdrOffset+8])!=iFrom ){
- return SQLITE_CORRUPT_BKPT;
- }
- put4byte(&pPage->aData[pPage->hdrOffset+8], iTo);
- }
- pPage->isInit = isInitOrig;
- }
- return SQLITE_OK;
- }
- /*
- ** Move the open database page pDbPage to location iFreePage in the
- ** database. The pDbPage reference remains valid.
- */
- static int relocatePage(
- BtShared *pBt, /* Btree */
- MemPage *pDbPage, /* Open page to move */
- u8 eType, /* Pointer map 'type' entry for pDbPage */
- Pgno iPtrPage, /* Pointer map 'page-no' entry for pDbPage */
- Pgno iFreePage /* The location to move pDbPage to */
- ){
- MemPage *pPtrPage; /* The page that contains a pointer to pDbPage */
- Pgno iDbPage = pDbPage->pgno;
- Pager *pPager = pBt->pPager;
- int rc;
- assert( eType==PTRMAP_OVERFLOW2 || eType==PTRMAP_OVERFLOW1 ||
- eType==PTRMAP_BTREE || eType==PTRMAP_ROOTPAGE );
- assert( sqlite3_mutex_held(pBt->mutex) );
- assert( pDbPage->pBt==pBt );
- /* Move page iDbPage from its current location to page number iFreePage */
- TRACE(("AUTOVACUUM: Moving %d to free page %d (ptr page %d type %d)n",
- iDbPage, iFreePage, iPtrPage, eType));
- rc = sqlite3PagerMovepage(pPager, pDbPage->pDbPage, iFreePage);
- if( rc!=SQLITE_OK ){
- return rc;
- }
- pDbPage->pgno = iFreePage;
- /* If pDbPage was a btree-page, then it may have child pages and/or cells
- ** that point to overflow pages. The pointer map entries for all these
- ** pages need to be changed.
- **
- ** If pDbPage is an overflow page, then the first 4 bytes may store a
- ** pointer to a subsequent overflow page. If this is the case, then
- ** the pointer map needs to be updated for the subsequent overflow page.
- */
- if( eType==PTRMAP_BTREE || eType==PTRMAP_ROOTPAGE ){
- rc = setChildPtrmaps(pDbPage);
- if( rc!=SQLITE_OK ){
- return rc;
- }
- }else{
- Pgno nextOvfl = get4byte(pDbPage->aData);
- if( nextOvfl!=0 ){
- rc = ptrmapPut(pBt, nextOvfl, PTRMAP_OVERFLOW2, iFreePage);
- if( rc!=SQLITE_OK ){
- return rc;
- }
- }
- }
- /* Fix the database pointer on page iPtrPage that pointed at iDbPage so
- ** that it points at iFreePage. Also fix the pointer map entry for
- ** iPtrPage.
- */
- if( eType!=PTRMAP_ROOTPAGE ){
- rc = sqlite3BtreeGetPage(pBt, iPtrPage, &pPtrPage, 0);
- if( rc!=SQLITE_OK ){
- return rc;
- }
- rc = sqlite3PagerWrite(pPtrPage->pDbPage);
- if( rc!=SQLITE_OK ){
- releasePage(pPtrPage);
- return rc;
- }
- rc = modifyPagePointer(pPtrPage, iDbPage, iFreePage, eType);
- releasePage(pPtrPage);
- if( rc==SQLITE_OK ){
- rc = ptrmapPut(pBt, iFreePage, eType, iPtrPage);
- }
- }
- return rc;
- }
- /* Forward declaration required by incrVacuumStep(). */
- static int allocateBtreePage(BtShared *, MemPage **, Pgno *, Pgno, u8);
- /*
- ** Perform a single step of an incremental-vacuum. If successful,
- ** return SQLITE_OK. If there is no work to do (and therefore no
- ** point in calling this function again), return SQLITE_DONE.
- **
- ** More specificly, this function attempts to re-organize the
- ** database so that the last page of the file currently in use
- ** is no longer in use.
- **
- ** If the nFin parameter is non-zero, the implementation assumes
- ** that the caller will keep calling incrVacuumStep() until
- ** it returns SQLITE_DONE or an error, and that nFin is the
- ** number of pages the database file will contain after this
- ** process is complete.
- */
- static int incrVacuumStep(BtShared *pBt, Pgno nFin){
- Pgno iLastPg; /* Last page in the database */
- Pgno nFreeList; /* Number of pages still on the free-list */
- assert( sqlite3_mutex_held(pBt->mutex) );
- iLastPg = pBt->nTrunc;
- if( iLastPg==0 ){
- iLastPg = sqlite3PagerPagecount(pBt->pPager);
- }
- if( !PTRMAP_ISPAGE(pBt, iLastPg) && iLastPg!=PENDING_BYTE_PAGE(pBt) ){
- int rc;
- u8 eType;
- Pgno iPtrPage;
- nFreeList = get4byte(&pBt->pPage1->aData[36]);
- if( nFreeList==0 || nFin==iLastPg ){
- return SQLITE_DONE;
- }
- rc = ptrmapGet(pBt, iLastPg, &eType, &iPtrPage);
- if( rc!=SQLITE_OK ){
- return rc;
- }
- if( eType==PTRMAP_ROOTPAGE ){
- return SQLITE_CORRUPT_BKPT;
- }
- if( eType==PTRMAP_FREEPAGE ){
- if( nFin==0 ){
- /* Remove the page from the files free-list. This is not required
- ** if nFin is non-zero. In that case, the free-list will be
- ** truncated to zero after this function returns, so it doesn't
- ** matter if it still contains some garbage entries.
- */
- Pgno iFreePg;
- MemPage *pFreePg;
- rc = allocateBtreePage(pBt, &pFreePg, &iFreePg, iLastPg, 1);
- if( rc!=SQLITE_OK ){
- return rc;
- }
- assert( iFreePg==iLastPg );
- releasePage(pFreePg);
- }
- } else {
- Pgno iFreePg; /* Index of free page to move pLastPg to */
- MemPage *pLastPg;
- rc = sqlite3BtreeGetPage(pBt, iLastPg, &pLastPg, 0);
- if( rc!=SQLITE_OK ){
- return rc;
- }
- /* If nFin is zero, this loop runs exactly once and page pLastPg
- ** is swapped with the first free page pulled off the free list.
- **
- ** On the other hand, if nFin is greater than zero, then keep
- ** looping until a free-page located within the first nFin pages
- ** of the file is found.
- */
- do {
- MemPage *pFreePg;
- rc = allocateBtreePage(pBt, &pFreePg, &iFreePg, 0, 0);
- if( rc!=SQLITE_OK ){
- releasePage(pLastPg);
- return rc;
- }
- releasePage(pFreePg);
- }while( nFin!=0 && iFreePg>nFin );
- assert( iFreePg<iLastPg );
-
- rc = sqlite3PagerWrite(pLastPg->pDbPage);
- if( rc==SQLITE_OK ){
- rc = relocatePage(pBt, pLastPg, eType, iPtrPage, iFreePg);
- }
- releasePage(pLastPg);
- if( rc!=SQLITE_OK ){
- return rc;
- }
- }
- }
- pBt->nTrunc = iLastPg - 1;
- while( pBt->nTrunc==PENDING_BYTE_PAGE(pBt)||PTRMAP_ISPAGE(pBt, pBt->nTrunc) ){
- pBt->nTrunc--;
- }
- return SQLITE_OK;
- }
- /*
- ** A write-transaction must be opened before calling this function.
- ** It performs a single unit of work towards an incremental vacuum.
- **
- ** If the incremental vacuum is finished after this function has run,
- ** SQLITE_DONE is returned. If it is not finished, but no error occured,
- ** SQLITE_OK is returned. Otherwise an SQLite error code.
- */
- int sqlite3BtreeIncrVacuum(Btree *p){
- int rc;
- BtShared *pBt = p->pBt;
- sqlite3BtreeEnter(p);
- pBt->db = p->db;
- assert( pBt->inTransaction==TRANS_WRITE && p->inTrans==TRANS_WRITE );
- if( !pBt->autoVacuum ){
- rc = SQLITE_DONE;
- }else{
- invalidateAllOverflowCache(pBt);
- rc = incrVacuumStep(pBt, 0);
- }
- sqlite3BtreeLeave(p);
- return rc;
- }
- /*
- ** This routine is called prior to sqlite3PagerCommit when a transaction
- ** is commited for an auto-vacuum database.
- **
- ** If SQLITE_OK is returned, then *pnTrunc is set to the number of pages
- ** the database file should be truncated to during the commit process.
- ** i.e. the database has been reorganized so that only the first *pnTrunc
- ** pages are in use.
- */
- static int autoVacuumCommit(BtShared *pBt, Pgno *pnTrunc){
- int rc = SQLITE_OK;
- Pager *pPager = pBt->pPager;
- #ifndef NDEBUG
- int nRef = sqlite3PagerRefcount(pPager);
- #endif
- assert( sqlite3_mutex_held(pBt->mutex) );
- invalidateAllOverflowCache(pBt);
- assert(pBt->autoVacuum);
- if( !pBt->incrVacuum ){
- Pgno nFin = 0;
- if( pBt->nTrunc==0 ){
- Pgno nFree;
- Pgno nPtrmap;
- const int pgsz = pBt->pageSize;
- Pgno nOrig = sqlite3PagerPagecount(pBt->pPager);
- if( PTRMAP_ISPAGE(pBt, nOrig) ){
- return SQLITE_CORRUPT_BKPT;
- }
- if( nOrig==PENDING_BYTE_PAGE(pBt) ){
- nOrig--;
- }
- nFree = get4byte(&pBt->pPage1->aData[36]);
- nPtrmap = (nFree-nOrig+PTRMAP_PAGENO(pBt, nOrig)+pgsz/5)/(pgsz/5);
- nFin = nOrig - nFree - nPtrmap;
- if( nOrig>PENDING_BYTE_PAGE(pBt) && nFin<=PENDING_BYTE_PAGE(pBt) ){
- nFin--;
- }
- while( PTRMAP_ISPAGE(pBt, nFin) || nFin==PENDING_BYTE_PAGE(pBt) ){
- nFin--;
- }
- }
- while( rc==SQLITE_OK ){
- rc = incrVacuumStep(pBt, nFin);
- }
- if( rc==SQLITE_DONE ){
- assert(nFin==0 || pBt->nTrunc==0 || nFin<=pBt->nTrunc);
- rc = SQLITE_OK;
- if( pBt->nTrunc ){
- rc = sqlite3PagerWrite(pBt->pPage1->pDbPage);
- put4byte(&pBt->pPage1->aData[32], 0);
- put4byte(&pBt->pPage1->aData[36], 0);
- pBt->nTrunc = nFin;
- }
- }
- if( rc!=SQLITE_OK ){
- sqlite3PagerRollback(pPager);
- }
- }
- if( rc==SQLITE_OK ){
- *pnTrunc = pBt->nTrunc;
- pBt->nTrunc = 0;
- }
- assert( nRef==sqlite3PagerRefcount(pPager) );
- return rc;
- }
- #endif
- /*
- ** This routine does the first phase of a two-phase commit. This routine
- ** causes a rollback journal to be created (if it does not already exist)
- ** and populated with enough information so that if a power loss occurs
- ** the database can be restored to its original state by playing back
- ** the journal. Then the contents of the journal are flushed out to
- ** the disk. After the journal is safely on oxide, the changes to the
- ** database are written into the database file and flushed to oxide.
- ** At the end of this call, the rollback journal still exists on the
- ** disk and we are still holding all locks, so the transaction has not
- ** committed. See sqlite3BtreeCommit() for the second phase of the
- ** commit process.
- **
- ** This call is a no-op if no write-transaction is currently active on pBt.
- **
- ** Otherwise, sync the database file for the btree pBt. zMaster points to
- ** the name of a master journal file that should be written into the
- ** individual journal file, or is NULL, indicating no master journal file
- ** (single database transaction).
- **
- ** When this is called, the master journal should already have been
- ** created, populated with this journal pointer and synced to disk.
- **
- ** Once this is routine has returned, the only thing required to commit
- ** the write-transaction for this database file is to delete the journal.
- */
- int sqlite3BtreeCommitPhaseOne(Btree *p, const char *zMaster){
- int rc = SQLITE_OK;
- if( p->inTrans==TRANS_WRITE ){
- BtShared *pBt = p->pBt;
- Pgno nTrunc = 0;
- sqlite3BtreeEnter(p);
- pBt->db = p->db;
- #ifndef SQLITE_OMIT_AUTOVACUUM
- if( pBt->autoVacuum ){
- rc = autoVacuumCommit(pBt, &nTrunc);
- if( rc!=SQLITE_OK ){
- sqlite3BtreeLeave(p);
- return rc;
- }
- }
- #endif
- rc = sqlite3PagerCommitPhaseOne(pBt->pPager, zMaster, nTrunc, 0);
- sqlite3BtreeLeave(p);
- }
- return rc;
- }
- /*
- ** Commit the transaction currently in progress.
- **
- ** This routine implements the second phase of a 2-phase commit. The
- ** sqlite3BtreeSync() routine does the first phase and should be invoked
- ** prior to calling this routine. The sqlite3BtreeSync() routine did
- ** all the work of writing information out to disk and flushing the
- ** contents so that they are written onto the disk platter. All this
- ** routine has to do is delete or truncate the rollback journal
- ** (which causes the transaction to commit) and drop locks.
- **
- ** This will release the write lock on the database file. If there
- ** are no active cursors, it also releases the read lock.
- */
- int sqlite3BtreeCommitPhaseTwo(Btree *p){
- BtShared *pBt = p->pBt;
- sqlite3BtreeEnter(p);
- pBt->db = p->db;
- btreeIntegrity(p);
- /* If the handle has a write-transaction open, commit the shared-btrees
- ** transaction and set the shared state to TRANS_READ.
- */
- if( p->inTrans==TRANS_WRITE ){
- int rc;
- assert( pBt->inTransaction==TRANS_WRITE );
- assert( pBt->nTransaction>0 );
- rc = sqlite3PagerCommitPhaseTwo(pBt->pPager);
- if( rc!=SQLITE_OK ){
- sqlite3BtreeLeave(p);
- return rc;
- }
- pBt->inTransaction = TRANS_READ;
- pBt->inStmt = 0;
- }
- unlockAllTables(p);
- /* If the handle has any kind of transaction open, decrement the transaction
- ** count of the shared btree. If the transaction count reaches 0, set
- ** the shared state to TRANS_NONE. The unlockBtreeIfUnused() call below
- ** will unlock the pager.
- */
- if( p->inTrans!=TRANS_NONE ){
- pBt->nTransaction--;
- if( 0==pBt->nTransaction ){
- pBt->inTransaction = TRANS_NONE;
- }
- }
- /* Set the handles current transaction state to TRANS_NONE and unlock
- ** the pager if this call closed the only read or write transaction.
- */
- p->inTrans = TRANS_NONE;
- unlockBtreeIfUnused(pBt);
- btreeIntegrity(p);
- sqlite3BtreeLeave(p);
- return SQLITE_OK;
- }
- /*
- ** Do both phases of a commit.
- */
- int sqlite3BtreeCommit(Btree *p){
- int rc;
- sqlite3BtreeEnter(p);
- rc = sqlite3BtreeCommitPhaseOne(p, 0);
- if( rc==SQLITE_OK ){
- rc = sqlite3BtreeCommitPhaseTwo(p);
- }
- sqlite3BtreeLeave(p);
- return rc;
- }
- #ifndef NDEBUG
- /*
- ** Return the number of write-cursors open on this handle. This is for use
- ** in assert() expressions, so it is only compiled if NDEBUG is not
- ** defined.
- **
- ** For the purposes of this routine, a write-cursor is any cursor that
- ** is capable of writing to the databse. That means the cursor was
- ** originally opened for writing and the cursor has not be disabled
- ** by having its state changed to CURSOR_FAULT.
- */
- static int countWriteCursors(BtShared *pBt){
- BtCursor *pCur;
- int r = 0;
- for(pCur=pBt->pCursor; pCur; pCur=pCur->pNext){
- if( pCur->wrFlag && pCur->eState!=CURSOR_FAULT ) r++;
- }
- return r;
- }
- #endif
- /*
- ** This routine sets the state to CURSOR_FAULT and the error
- ** code to errCode for every cursor on BtShared that pBtree
- ** references.
- **
- ** Every cursor is tripped, including cursors that belong
- ** to other database connections that happen to be sharing
- ** the cache with pBtree.
- **
- ** This routine gets called when a rollback occurs.
- ** All cursors using the same cache must be tripped
- ** to prevent them from trying to use the btree after
- ** the rollback. The rollback may have deleted tables
- ** or moved root pages, so it is not sufficient to
- ** save the state of the cursor. The cursor must be
- ** invalidated.
- */
- void sqlite3BtreeTripAllCursors(Btree *pBtree, int errCode){
- BtCursor *p;
- sqlite3BtreeEnter(pBtree);
- for(p=pBtree->pBt->pCursor; p; p=p->pNext){
- clearCursorPosition(p);
- p->eState = CURSOR_FAULT;
- p->skip = errCode;
- }
- sqlite3BtreeLeave(pBtree);
- }
- /*
- ** Rollback the transaction in progress. All cursors will be
- ** invalided by this operation. Any attempt to use a cursor
- ** that was open at the beginning of this operation will result
- ** in an error.
- **
- ** This will release the write lock on the database file. If there
- ** are no active cursors, it also releases the read lock.
- */
- int sqlite3BtreeRollback(Btree *p){
- int rc;
- BtShared *pBt = p->pBt;
- MemPage *pPage1;
- sqlite3BtreeEnter(p);
- pBt->db = p->db;
- rc = saveAllCursors(pBt, 0, 0);
- #ifndef SQLITE_OMIT_SHARED_CACHE
- if( rc!=SQLITE_OK ){
- /* This is a horrible situation. An IO or malloc() error occured whilst
- ** trying to save cursor positions. If this is an automatic rollback (as
- ** the result of a constraint, malloc() failure or IO error) then
- ** the cache may be internally inconsistent (not contain valid trees) so
- ** we cannot simply return the error to the caller. Instead, abort
- ** all queries that may be using any of the cursors that failed to save.
- */
- sqlite3BtreeTripAllCursors(p, rc);
- }
- #endif
- btreeIntegrity(p);
- unlockAllTables(p);
- if( p->inTrans==TRANS_WRITE ){
- int rc2;
- #ifndef SQLITE_OMIT_AUTOVACUUM
- pBt->nTrunc = 0;
- #endif
- assert( TRANS_WRITE==pBt->inTransaction );
- rc2 = sqlite3PagerRollback(pBt->pPager);
- if( rc2!=SQLITE_OK ){
- rc = rc2;
- }
- /* The rollback may have destroyed the pPage1->aData value. So
- ** call sqlite3BtreeGetPage() on page 1 again to make
- ** sure pPage1->aData is set correctly. */
- if( sqlite3BtreeGetPage(pBt, 1, &pPage1, 0)==SQLITE_OK ){
- releasePage(pPage1);
- }
- assert( countWriteCursors(pBt)==0 );
- pBt->inTransaction = TRANS_READ;
- }
- if( p->inTrans!=TRANS_NONE ){
- assert( pBt->nTransaction>0 );
- pBt->nTransaction--;
- if( 0==pBt->nTransaction ){
- pBt->inTransaction = TRANS_NONE;
- }
- }
- p->inTrans = TRANS_NONE;
- pBt->inStmt = 0;
- unlockBtreeIfUnused(pBt);
- btreeIntegrity(p);
- sqlite3BtreeLeave(p);
- return rc;
- }
- /*
- ** Start a statement subtransaction. The subtransaction can
- ** can be rolled back independently of the main transaction.
- ** You must start a transaction before starting a subtransaction.
- ** The subtransaction is ended automatically if the main transaction
- ** commits or rolls back.
- **
- ** Only one subtransaction may be active at a time. It is an error to try
- ** to start a new subtransaction if another subtransaction is already active.
- **
- ** Statement subtransactions are used around individual SQL statements
- ** that are contained within a BEGIN...COMMIT block. If a constraint
- ** error occurs within the statement, the effect of that one statement
- ** can be rolled back without having to rollback the entire transaction.
- */
- int sqlite3BtreeBeginStmt(Btree *p){
- int rc;
- BtShared *pBt = p->pBt;
- sqlite3BtreeEnter(p);
- pBt->db = p->db;
- if( (p->inTrans!=TRANS_WRITE) || pBt->inStmt ){
- rc = pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
- }else{
- assert( pBt->inTransaction==TRANS_WRITE );
- rc = pBt->readOnly ? SQLITE_OK : sqlite3PagerStmtBegin(pBt->pPager);
- pBt->inStmt = 1;
- }
- sqlite3BtreeLeave(p);
- return rc;
- }
- /*
- ** Commit the statment subtransaction currently in progress. If no
- ** subtransaction is active, this is a no-op.
- */
- int sqlite3BtreeCommitStmt(Btree *p){
- int rc;
- BtShared *pBt = p->pBt;
- sqlite3BtreeEnter(p);
- pBt->db = p->db;
- if( pBt->inStmt && !pBt->readOnly ){
- rc = sqlite3PagerStmtCommit(pBt->pPager);
- }else{
- rc = SQLITE_OK;
- }
- pBt->inStmt = 0;
- sqlite3BtreeLeave(p);
- return rc;
- }
- /*
- ** Rollback the active statement subtransaction. If no subtransaction
- ** is active this routine is a no-op.
- **
- ** All cursors will be invalidated by this operation. Any attempt
- ** to use a cursor that was open at the beginning of this operation
- ** will result in an error.
- */
- int sqlite3BtreeRollbackStmt(Btree *p){
- int rc = SQLITE_OK;
- BtShared *pBt = p->pBt;
- sqlite3BtreeEnter(p);
- pBt->db = p->db;
- if( pBt->inStmt && !pBt->readOnly ){
- rc = sqlite3PagerStmtRollback(pBt->pPager);
- assert( countWriteCursors(pBt)==0 );
- pBt->inStmt = 0;
- }
- sqlite3BtreeLeave(p);
- return rc;
- }
- /*
- ** Create a new cursor for the BTree whose root is on the page
- ** iTable. The act of acquiring a cursor gets a read lock on
- ** the database file.
- **
- ** If wrFlag==0, then the cursor can only be used for reading.
- ** If wrFlag==1, then the cursor can be used for reading or for
- ** writing if other conditions for writing are also met. These
- ** are the conditions that must be met in order for writing to
- ** be allowed:
- **
- ** 1: The cursor must have been opened with wrFlag==1
- **
- ** 2: Other database connections that share the same pager cache
- ** but which are not in the READ_UNCOMMITTED state may not have
- ** cursors open with wrFlag==0 on the same table. Otherwise
- ** the changes made by this write cursor would be visible to
- ** the read cursors in the other database connection.
- **
- ** 3: The database must be writable (not on read-only media)
- **
- ** 4: There must be an active transaction.
- **
- ** No checking is done to make sure that page iTable really is the
- ** root page of a b-tree. If it is not, then the cursor acquired
- ** will not work correctly.
- */
- static int btreeCursor(
- Btree *p, /* The btree */
- int iTable, /* Root page of table to open */
- int wrFlag, /* 1 to write. 0 read-only */
- struct KeyInfo *pKeyInfo, /* First arg to comparison function */
- BtCursor *pCur /* Space for new cursor */
- ){
- int rc;
- BtShared *pBt = p->pBt;
- assert( sqlite3BtreeHoldsMutex(p) );
- if( wrFlag ){
- if( pBt->readOnly ){
- return SQLITE_READONLY;
- }
- if( checkReadLocks(p, iTable, 0) ){
- return SQLITE_LOCKED;
- }
- }
- if( pBt->pPage1==0 ){
- rc = lockBtreeWithRetry(p);
- if( rc!=SQLITE_OK ){
- return rc;
- }
- if( pBt->readOnly && wrFlag ){
- return SQLITE_READONLY;
- }
- }
- pCur->pgnoRoot = (Pgno)iTable;
- if( iTable==1 && sqlite3PagerPagecount(pBt->pPager)==0 ){
- rc = SQLITE_EMPTY;
- goto create_cursor_exception;
- }
- rc = getAndInitPage(pBt, pCur->pgnoRoot, &pCur->pPage, 0);
- if( rc!=SQLITE_OK ){
- goto create_cursor_exception;
- }
- /* Now that no other errors can occur, finish filling in the BtCursor
- ** variables, link the cursor into the BtShared list and set *ppCur (the
- ** output argument to this function).
- */
- pCur->pKeyInfo = pKeyInfo;
- pCur->pBtree = p;
- pCur->pBt = pBt;
- pCur->wrFlag = wrFlag;
- pCur->pNext = pBt->pCursor;
- if( pCur->pNext ){
- pCur->pNext->pPrev = pCur;
- }
- pBt->pCursor = pCur;
- pCur->eState = CURSOR_INVALID;
- return SQLITE_OK;
- create_cursor_exception:
- if( pCur ){
- releasePage(pCur->pPage);
- }
- unlockBtreeIfUnused(pBt);
- return rc;
- }
- int sqlite3BtreeCursor(
- Btree *p, /* The btree */
- int iTable, /* Root page of table to open */
- int wrFlag, /* 1 to write. 0 read-only */
- struct KeyInfo *pKeyInfo, /* First arg to xCompare() */
- BtCursor *pCur /* Write new cursor here */
- ){
- int rc;
- sqlite3BtreeEnter(p);
- p->pBt->db = p->db;
- rc = btreeCursor(p, iTable, wrFlag, pKeyInfo, pCur);
- sqlite3BtreeLeave(p);
- return rc;
- }
- int sqlite3BtreeCursorSize(){
- return sizeof(BtCursor);
- }
- /*
- ** Close a cursor. The read lock on the database file is released
- ** when the last cursor is closed.
- */
- int sqlite3BtreeCloseCursor(BtCursor *pCur){
- Btree *pBtree = pCur->pBtree;
- if( pBtree ){
- BtShared *pBt = pCur->pBt;
- sqlite3BtreeEnter(pBtree);
- pBt->db = pBtree->db;
- clearCursorPosition(pCur);
- if( pCur->pPrev ){
- pCur->pPrev->pNext = pCur->pNext;
- }else{
- pBt->pCursor = pCur->pNext;
- }
- if( pCur->pNext ){
- pCur->pNext->pPrev = pCur->pPrev;
- }
- releasePage(pCur->pPage);
- unlockBtreeIfUnused(pBt);
- invalidateOverflowCache(pCur);
- /* sqlite3_free(pCur); */
- sqlite3BtreeLeave(pBtree);
- }
- return SQLITE_OK;
- }
- /*
- ** Make a temporary cursor by filling in the fields of pTempCur.
- ** The temporary cursor is not on the cursor list for the Btree.
- */
- void sqlite3BtreeGetTempCursor(BtCursor *pCur, BtCursor *pTempCur){
- assert( cursorHoldsMutex(pCur) );
- memcpy(pTempCur, pCur, sizeof(*pCur));
- pTempCur->pNext = 0;
- pTempCur->pPrev = 0;
- if( pTempCur->pPage ){
- sqlite3PagerRef(pTempCur->pPage->pDbPage);
- }
- }
- /*
- ** Delete a temporary cursor such as was made by the CreateTemporaryCursor()
- ** function above.
- */
- void sqlite3BtreeReleaseTempCursor(BtCursor *pCur){
- assert( cursorHoldsMutex(pCur) );
- if( pCur->pPage ){
- sqlite3PagerUnref(pCur->pPage->pDbPage);
- }
- }
- /*
- ** Make sure the BtCursor* given in the argument has a valid
- ** BtCursor.info structure. If it is not already valid, call
- ** sqlite3BtreeParseCell() to fill it in.
- **
- ** BtCursor.info is a cache of the information in the current cell.
- ** Using this cache reduces the number of calls to sqlite3BtreeParseCell().
- **
- ** 2007-06-25: There is a bug in some versions of MSVC that cause the
- ** compiler to crash when getCellInfo() is implemented as a macro.
- ** But there is a measureable speed advantage to using the macro on gcc
- ** (when less compiler optimizations like -Os or -O0 are used and the
- ** compiler is not doing agressive inlining.) So we use a real function
- ** for MSVC and a macro for everything else. Ticket #2457.
- */
- #ifndef NDEBUG
- static void assertCellInfo(BtCursor *pCur){
- CellInfo info;
- memset(&info, 0, sizeof(info));
- sqlite3BtreeParseCell(pCur->pPage, pCur->idx, &info);
- assert( memcmp(&info, &pCur->info, sizeof(info))==0 );
- }
- #else
- #define assertCellInfo(x)
- #endif
- #ifdef _MSC_VER
- /* Use a real function in MSVC to work around bugs in that compiler. */
- static void getCellInfo(BtCursor *pCur){
- if( pCur->info.nSize==0 ){
- sqlite3BtreeParseCell(pCur->pPage, pCur->idx, &pCur->info);
- pCur->validNKey = 1;
- }else{
- assertCellInfo(pCur);
- }
- }
- #else /* if not _MSC_VER */
- /* Use a macro in all other compilers so that the function is inlined */
- #define getCellInfo(pCur)
- if( pCur->info.nSize==0 ){
- sqlite3BtreeParseCell(pCur->pPage, pCur->idx, &pCur->info);
- pCur->validNKey = 1;
- }else{
- assertCellInfo(pCur);
- }
- #endif /* _MSC_VER */
- /*
- ** Set *pSize to the size of the buffer needed to hold the value of
- ** the key for the current entry. If the cursor is not pointing
- ** to a valid entry, *pSize is set to 0.
- **
- ** For a table with the INTKEY flag set, this routine returns the key
- ** itself, not the number of bytes in the key.
- */
- int sqlite3BtreeKeySize(BtCursor *pCur, i64 *pSize){
- int rc;
- assert( cursorHoldsMutex(pCur) );
- rc = restoreOrClearCursorPosition(pCur);
- if( rc==SQLITE_OK ){
- assert( pCur->eState==CURSOR_INVALID || pCur->eState==CURSOR_VALID );
- if( pCur->eState==CURSOR_INVALID ){
- *pSize = 0;
- }else{
- getCellInfo(pCur);
- *pSize = pCur->info.nKey;
- }
- }
- return rc;
- }
- /*
- ** Set *pSize to the number of bytes of data in the entry the
- ** cursor currently points to. Always return SQLITE_OK.
- ** Failure is not possible. If the cursor is not currently
- ** pointing to an entry (which can happen, for example, if
- ** the database is empty) then *pSize is set to 0.
- */
- int sqlite3BtreeDataSize(BtCursor *pCur, u32 *pSize){
- int rc;
- assert( cursorHoldsMutex(pCur) );
- rc = restoreOrClearCursorPosition(pCur);
- if( rc==SQLITE_OK ){
- assert( pCur->eState==CURSOR_INVALID || pCur->eState==CURSOR_VALID );
- if( pCur->eState==CURSOR_INVALID ){
- /* Not pointing at a valid entry - set *pSize to 0. */
- *pSize = 0;
- }else{
- getCellInfo(pCur);
- *pSize = pCur->info.nData;
- }
- }
- return rc;
- }
- /*
- ** Given the page number of an overflow page in the database (parameter
- ** ovfl), this function finds the page number of the next page in the
- ** linked list of overflow pages. If possible, it uses the auto-vacuum
- ** pointer-map data instead of reading the content of page ovfl to do so.
- **
- ** If an error occurs an SQLite error code is returned. Otherwise:
- **
- ** Unless pPgnoNext is NULL, the page number of the next overflow
- ** page in the linked list is written to *pPgnoNext. If page ovfl
- ** is the last page in its linked list, *pPgnoNext is set to zero.
- **
- ** If ppPage is not NULL, *ppPage is set to the MemPage* handle
- ** for page ovfl. The underlying pager page may have been requested
- ** with the noContent flag set, so the page data accessable via
- ** this handle may not be trusted.
- */
- static int getOverflowPage(
- BtShared *pBt,
- Pgno ovfl, /* Overflow page */
- MemPage **ppPage, /* OUT: MemPage handle */
- Pgno *pPgnoNext /* OUT: Next overflow page number */
- ){
- Pgno next = 0;
- int rc;
- assert( sqlite3_mutex_held(pBt->mutex) );
- /* One of these must not be NULL. Otherwise, why call this function? */
- assert(ppPage || pPgnoNext);
- /* If pPgnoNext is NULL, then this function is being called to obtain
- ** a MemPage* reference only. No page-data is required in this case.
- */
- if( !pPgnoNext ){
- return sqlite3BtreeGetPage(pBt, ovfl, ppPage, 1);
- }
- #ifndef SQLITE_OMIT_AUTOVACUUM
- /* Try to find the next page in the overflow list using the
- ** autovacuum pointer-map pages. Guess that the next page in
- ** the overflow list is page number (ovfl+1). If that guess turns
- ** out to be wrong, fall back to loading the data of page
- ** number ovfl to determine the next page number.
- */
- if( pBt->autoVacuum ){
- Pgno pgno;
- Pgno iGuess = ovfl+1;
- u8 eType;
- while( PTRMAP_ISPAGE(pBt, iGuess) || iGuess==PENDING_BYTE_PAGE(pBt) ){
- iGuess++;
- }
- if( iGuess<=sqlite3PagerPagecount(pBt->pPager) ){
- rc = ptrmapGet(pBt, iGuess, &eType, &pgno);
- if( rc!=SQLITE_OK ){
- return rc;
- }
- if( eType==PTRMAP_OVERFLOW2 && pgno==ovfl ){
- next = iGuess;
- }
- }
- }
- #endif
- if( next==0 || ppPage ){
- MemPage *pPage = 0;
- rc = sqlite3BtreeGetPage(pBt, ovfl, &pPage, next!=0);
- assert(rc==SQLITE_OK || pPage==0);
- if( next==0 && rc==SQLITE_OK ){
- next = get4byte(pPage->aData);
- }
- if( ppPage ){
- *ppPage = pPage;
- }else{
- releasePage(pPage);
- }
- }
- *pPgnoNext = next;
- return rc;
- }
- /*
- ** Copy data from a buffer to a page, or from a page to a buffer.
- **
- ** pPayload is a pointer to data stored on database page pDbPage.
- ** If argument eOp is false, then nByte bytes of data are copied
- ** from pPayload to the buffer pointed at by pBuf. If eOp is true,
- ** then sqlite3PagerWrite() is called on pDbPage and nByte bytes
- ** of data are copied from the buffer pBuf to pPayload.
- **
- ** SQLITE_OK is returned on success, otherwise an error code.
- */
- static int copyPayload(
- void *pPayload, /* Pointer to page data */
- void *pBuf, /* Pointer to buffer */
- int nByte, /* Number of bytes to copy */
- int eOp, /* 0 -> copy from page, 1 -> copy to page */
- DbPage *pDbPage /* Page containing pPayload */
- ){
- if( eOp ){
- /* Copy data from buffer to page (a write operation) */
- int rc = sqlite3PagerWrite(pDbPage);
- if( rc!=SQLITE_OK ){
- return rc;
- }
- memcpy(pPayload, pBuf, nByte);
- }else{
- /* Copy data from page to buffer (a read operation) */
- memcpy(pBuf, pPayload, nByte);
- }
- return SQLITE_OK;
- }
- /*
- ** This function is used to read or overwrite payload information
- ** for the entry that the pCur cursor is pointing to. If the eOp
- ** parameter is 0, this is a read operation (data copied into
- ** buffer pBuf). If it is non-zero, a write (data copied from
- ** buffer pBuf).
- **
- ** A total of "amt" bytes are read or written beginning at "offset".
- ** Data is read to or from the buffer pBuf.
- **
- ** This routine does not make a distinction between key and data.
- ** It just reads or writes bytes from the payload area. Data might
- ** appear on the main page or be scattered out on multiple overflow
- ** pages.
- **
- ** If the BtCursor.isIncrblobHandle flag is set, and the current
- ** cursor entry uses one or more overflow pages, this function
- ** allocates space for and lazily popluates the overflow page-list
- ** cache array (BtCursor.aOverflow). Subsequent calls use this
- ** cache to make seeking to the supplied offset more efficient.
- **
- ** Once an overflow page-list cache has been allocated, it may be
- ** invalidated if some other cursor writes to the same table, or if
- ** the cursor is moved to a different row. Additionally, in auto-vacuum
- ** mode, the following events may invalidate an overflow page-list cache.
- **
- ** * An incremental vacuum,
- ** * A commit in auto_vacuum="full" mode,
- ** * Creating a table (may require moving an overflow page).
- */
- static int accessPayload(
- BtCursor *pCur, /* Cursor pointing to entry to read from */
- int offset, /* Begin reading this far into payload */
- int amt, /* Read this many bytes */
- unsigned char *pBuf, /* Write the bytes into this buffer */
- int skipKey, /* offset begins at data if this is true */
- int eOp /* zero to read. non-zero to write. */
- ){
- unsigned char *aPayload;
- int rc = SQLITE_OK;
- u32 nKey;
- int iIdx = 0;
- MemPage *pPage = pCur->pPage; /* Btree page of current cursor entry */
- BtShared *pBt; /* Btree this cursor belongs to */
- assert( pPage );
- assert( pCur->eState==CURSOR_VALID );
- assert( pCur->idx>=0 && pCur->idx<pPage->nCell );
- assert( offset>=0 );
- assert( cursorHoldsMutex(pCur) );
- getCellInfo(pCur);
- aPayload = pCur->info.pCell + pCur->info.nHeader;
- nKey = (pPage->intKey ? 0 : pCur->info.nKey);
- if( skipKey ){
- offset += nKey;
- }
- if( offset+amt > nKey+pCur->info.nData ){
- /* Trying to read or write past the end of the data is an error */
- return SQLITE_ERROR;
- }
- /* Check if data must be read/written to/from the btree page itself. */
- if( offset<pCur->info.nLocal ){
- int a = amt;
- if( a+offset>pCur->info.nLocal ){
- a = pCur->info.nLocal - offset;
- }
- rc = copyPayload(&aPayload[offset], pBuf, a, eOp, pPage->pDbPage);
- offset = 0;
- pBuf += a;
- amt -= a;
- }else{
- offset -= pCur->info.nLocal;
- }
- pBt = pCur->pBt;
- if( rc==SQLITE_OK && amt>0 ){
- const int ovflSize = pBt->usableSize - 4; /* Bytes content per ovfl page */
- Pgno nextPage;
- nextPage = get4byte(&aPayload[pCur->info.nLocal]);
- #ifndef SQLITE_OMIT_INCRBLOB
- /* If the isIncrblobHandle flag is set and the BtCursor.aOverflow[]
- ** has not been allocated, allocate it now. The array is sized at
- ** one entry for each overflow page in the overflow chain. The
- ** page number of the first overflow page is stored in aOverflow[0],
- ** etc. A value of 0 in the aOverflow[] array means "not yet known"
- ** (the cache is lazily populated).
- */
- if( pCur->isIncrblobHandle && !pCur->aOverflow ){
- int nOvfl = (pCur->info.nPayload-pCur->info.nLocal+ovflSize-1)/ovflSize;
- pCur->aOverflow = (Pgno *)sqlite3MallocZero(sizeof(Pgno)*nOvfl);
- if( nOvfl && !pCur->aOverflow ){
- rc = SQLITE_NOMEM;
- }
- }
- /* If the overflow page-list cache has been allocated and the
- ** entry for the first required overflow page is valid, skip
- ** directly to it.
- */
- if( pCur->aOverflow && pCur->aOverflow[offset/ovflSize] ){
- iIdx = (offset/ovflSize);
- nextPage = pCur->aOverflow[iIdx];
- offset = (offset%ovflSize);
- }
- #endif
- for( ; rc==SQLITE_OK && amt>0 && nextPage; iIdx++){
- #ifndef SQLITE_OMIT_INCRBLOB
- /* If required, populate the overflow page-list cache. */
- if( pCur->aOverflow ){
- assert(!pCur->aOverflow[iIdx] || pCur->aOverflow[iIdx]==nextPage);
- pCur->aOverflow[iIdx] = nextPage;
- }
- #endif
- if( offset>=ovflSize ){
- /* The only reason to read this page is to obtain the page
- ** number for the next page in the overflow chain. The page
- ** data is not required. So first try to lookup the overflow
- ** page-list cache, if any, then fall back to the getOverflowPage()
- ** function.
- */
- #ifndef SQLITE_OMIT_INCRBLOB
- if( pCur->aOverflow && pCur->aOverflow[iIdx+1] ){
- nextPage = pCur->aOverflow[iIdx+1];
- } else
- #endif
- rc = getOverflowPage(pBt, nextPage, 0, &nextPage);
- offset -= ovflSize;
- }else{
- /* Need to read this page properly. It contains some of the
- ** range of data that is being read (eOp==0) or written (eOp!=0).
- */
- DbPage *pDbPage;
- int a = amt;
- rc = sqlite3PagerGet(pBt->pPager, nextPage, &pDbPage);
- if( rc==SQLITE_OK ){
- aPayload = sqlite3PagerGetData(pDbPage);
- nextPage = get4byte(aPayload);
- if( a + offset > ovflSize ){
- a = ovflSize - offset;
- }
- rc = copyPayload(&aPayload[offset+4], pBuf, a, eOp, pDbPage);
- sqlite3PagerUnref(pDbPage);
- offset = 0;
- amt -= a;
- pBuf += a;
- }
- }
- }
- }
- if( rc==SQLITE_OK && amt>0 ){
- return SQLITE_CORRUPT_BKPT;
- }
- return rc;
- }
- /*
- ** Read part of the key associated with cursor pCur. Exactly
- ** "amt" bytes will be transfered into pBuf[]. The transfer
- ** begins at "offset".
- **
- ** Return SQLITE_OK on success or an error code if anything goes
- ** wrong. An error is returned if "offset+amt" is larger than
- ** the available payload.
- */
- int sqlite3BtreeKey(BtCursor *pCur, u32 offset, u32 amt, void *pBuf){
- int rc;
- assert( cursorHoldsMutex(pCur) );
- rc = restoreOrClearCursorPosition(pCur);
- if( rc==SQLITE_OK ){
- assert( pCur->eState==CURSOR_VALID );
- assert( pCur->pPage!=0 );
- if( pCur->pPage->intKey ){
- return SQLITE_CORRUPT_BKPT;
- }
- assert( pCur->pPage->intKey==0 );
- assert( pCur->idx>=0 && pCur->idx<pCur->pPage->nCell );
- rc = accessPayload(pCur, offset, amt, (unsigned char*)pBuf, 0, 0);
- }
- return rc;
- }
- /*
- ** Read part of the data associated with cursor pCur. Exactly
- ** "amt" bytes will be transfered into pBuf[]. The transfer
- ** begins at "offset".
- **
- ** Return SQLITE_OK on success or an error code if anything goes
- ** wrong. An error is returned if "offset+amt" is larger than
- ** the available payload.
- */
- int sqlite3BtreeData(BtCursor *pCur, u32 offset, u32 amt, void *pBuf){
- int rc;
- assert( cursorHoldsMutex(pCur) );
- rc = restoreOrClearCursorPosition(pCur);
- if( rc==SQLITE_OK ){
- assert( pCur->eState==CURSOR_VALID );
- assert( pCur->pPage!=0 );
- assert( pCur->idx>=0 && pCur->idx<pCur->pPage->nCell );
- rc = accessPayload(pCur, offset, amt, pBuf, 1, 0);
- }
- return rc;
- }
- /*
- ** Return a pointer to payload information from the entry that the
- ** pCur cursor is pointing to. The pointer is to the beginning of
- ** the key if skipKey==0 and it points to the beginning of data if
- ** skipKey==1. The number of bytes of available key/data is written
- ** into *pAmt. If *pAmt==0, then the value returned will not be
- ** a valid pointer.
- **
- ** This routine is an optimization. It is common for the entire key
- ** and data to fit on the local page and for there to be no overflow
- ** pages. When that is so, this routine can be used to access the
- ** key and data without making a copy. If the key and/or data spills
- ** onto overflow pages, then accessPayload() must be used to reassembly
- ** the key/data and copy it into a preallocated buffer.
- **
- ** The pointer returned by this routine looks directly into the cached
- ** page of the database. The data might change or move the next time
- ** any btree routine is called.
- */
- static const unsigned char *fetchPayload(
- BtCursor *pCur, /* Cursor pointing to entry to read from */
- int *pAmt, /* Write the number of available bytes here */
- int skipKey /* read beginning at data if this is true */
- ){
- unsigned char *aPayload;
- MemPage *pPage;
- u32 nKey;
- int nLocal;
- assert( pCur!=0 && pCur->pPage!=0 );
- assert( pCur->eState==CURSOR_VALID );
- assert( cursorHoldsMutex(pCur) );
- pPage = pCur->pPage;
- assert( pCur->idx>=0 && pCur->idx<pPage->nCell );
- getCellInfo(pCur);
- aPayload = pCur->info.pCell;
- aPayload += pCur->info.nHeader;
- if( pPage->intKey ){
- nKey = 0;
- }else{
- nKey = pCur->info.nKey;
- }
- if( skipKey ){
- aPayload += nKey;
- nLocal = pCur->info.nLocal - nKey;
- }else{
- nLocal = pCur->info.nLocal;
- if( nLocal>nKey ){
- nLocal = nKey;
- }
- }
- *pAmt = nLocal;
- return aPayload;
- }
- /*
- ** For the entry that cursor pCur is point to, return as
- ** many bytes of the key or data as are available on the local
- ** b-tree page. Write the number of available bytes into *pAmt.
- **
- ** The pointer returned is ephemeral. The key/data may move
- ** or be destroyed on the next call to any Btree routine,
- ** including calls from other threads against the same cache.
- ** Hence, a mutex on the BtShared should be held prior to calling
- ** this routine.
- **
- ** These routines is used to get quick access to key and data
- ** in the common case where no overflow pages are used.
- */
- const void *sqlite3BtreeKeyFetch(BtCursor *pCur, int *pAmt){
- assert( cursorHoldsMutex(pCur) );
- if( pCur->eState==CURSOR_VALID ){
- return (const void*)fetchPayload(pCur, pAmt, 0);
- }
- return 0;
- }
- const void *sqlite3BtreeDataFetch(BtCursor *pCur, int *pAmt){
- assert( cursorHoldsMutex(pCur) );
- if( pCur->eState==CURSOR_VALID ){
- return (const void*)fetchPayload(pCur, pAmt, 1);
- }
- return 0;
- }
- /*
- ** Move the cursor down to a new child page. The newPgno argument is the
- ** page number of the child page to move to.
- */
- static int moveToChild(BtCursor *pCur, u32 newPgno){
- int rc;
- MemPage *pNewPage;
- MemPage *pOldPage;
- BtShared *pBt = pCur->pBt;
- assert( cursorHoldsMutex(pCur) );
- assert( pCur->eState==CURSOR_VALID );
- rc = getAndInitPage(pBt, newPgno, &pNewPage, pCur->pPage);
- if( rc ) return rc;
- pNewPage->idxParent = pCur->idx;
- pOldPage = pCur->pPage;
- pOldPage->idxShift = 0;
- releasePage(pOldPage);
- pCur->pPage = pNewPage;
- pCur->idx = 0;
- pCur->info.nSize = 0;
- pCur->validNKey = 0;
- if( pNewPage->nCell<1 ){
- return SQLITE_CORRUPT_BKPT;
- }
- return SQLITE_OK;
- }
- /*
- ** Return true if the page is the virtual root of its table.
- **
- ** The virtual root page is the root page for most tables. But
- ** for the table rooted on page 1, sometime the real root page
- ** is empty except for the right-pointer. In such cases the
- ** virtual root page is the page that the right-pointer of page
- ** 1 is pointing to.
- */
- int sqlite3BtreeIsRootPage(MemPage *pPage){
- MemPage *pParent;
- assert( sqlite3_mutex_held(pPage->pBt->mutex) );
- pParent = pPage->pParent;
- if( pParent==0 ) return 1;
- if( pParent->pgno>1 ) return 0;
- if( get2byte(&pParent->aData[pParent->hdrOffset+3])==0 ) return 1;
- return 0;
- }
- /*
- ** Move the cursor up to the parent page.
- **
- ** pCur->idx is set to the cell index that contains the pointer
- ** to the page we are coming from. If we are coming from the
- ** right-most child page then pCur->idx is set to one more than
- ** the largest cell index.
- */
- void sqlite3BtreeMoveToParent(BtCursor *pCur){
- MemPage *pParent;
- MemPage *pPage;
- int idxParent;
- assert( cursorHoldsMutex(pCur) );
- assert( pCur->eState==CURSOR_VALID );
- pPage = pCur->pPage;
- assert( pPage!=0 );
- assert( !sqlite3BtreeIsRootPage(pPage) );
- pParent = pPage->pParent;
- assert( pParent!=0 );
- idxParent = pPage->idxParent;
- sqlite3PagerRef(pParent->pDbPage);
- releasePage(pPage);
- pCur->pPage = pParent;
- pCur->info.nSize = 0;
- pCur->validNKey = 0;
- assert( pParent->idxShift==0 );
- pCur->idx = idxParent;
- }
- /*
- ** Move the cursor to the root page
- */
- static int moveToRoot(BtCursor *pCur){
- MemPage *pRoot;
- int rc = SQLITE_OK;
- Btree *p = pCur->pBtree;
- BtShared *pBt = p->pBt;
- assert( cursorHoldsMutex(pCur) );
- assert( CURSOR_INVALID < CURSOR_REQUIRESEEK );
- assert( CURSOR_VALID < CURSOR_REQUIRESEEK );
- assert( CURSOR_FAULT > CURSOR_REQUIRESEEK );
- if( pCur->eState>=CURSOR_REQUIRESEEK ){
- if( pCur->eState==CURSOR_FAULT ){
- return pCur->skip;
- }
- clearCursorPosition(pCur);
- }
- pRoot = pCur->pPage;
- if( pRoot && pRoot->pgno==pCur->pgnoRoot ){
- assert( pRoot->isInit );
- }else{
- if(
- SQLITE_OK!=(rc = getAndInitPage(pBt, pCur->pgnoRoot, &pRoot, 0))
- ){
- pCur->eState = CURSOR_INVALID;
- return rc;
- }
- releasePage(pCur->pPage);
- pCur->pPage = pRoot;
- }
- pCur->idx = 0;
- pCur->info.nSize = 0;
- pCur->atLast = 0;
- pCur->validNKey = 0;
- if( pRoot->nCell==0 && !pRoot->leaf ){
- Pgno subpage;
- assert( pRoot->pgno==1 );
- subpage = get4byte(&pRoot->aData[pRoot->hdrOffset+8]);
- assert( subpage>0 );
- pCur->eState = CURSOR_VALID;
- rc = moveToChild(pCur, subpage);
- }
- pCur->eState = ((pCur->pPage->nCell>0)?CURSOR_VALID:CURSOR_INVALID);
- return rc;
- }
- /*
- ** Move the cursor down to the left-most leaf entry beneath the
- ** entry to which it is currently pointing.
- **
- ** The left-most leaf is the one with the smallest key - the first
- ** in ascending order.
- */
- static int moveToLeftmost(BtCursor *pCur){
- Pgno pgno;
- int rc = SQLITE_OK;
- MemPage *pPage;
- assert( cursorHoldsMutex(pCur) );
- assert( pCur->eState==CURSOR_VALID );
- while( rc==SQLITE_OK && !(pPage = pCur->pPage)->leaf ){
- assert( pCur->idx>=0 && pCur->idx<pPage->nCell );
- pgno = get4byte(findCell(pPage, pCur->idx));
- rc = moveToChild(pCur, pgno);
- }
- return rc;
- }
- /*
- ** Move the cursor down to the right-most leaf entry beneath the
- ** page to which it is currently pointing. Notice the difference
- ** between moveToLeftmost() and moveToRightmost(). moveToLeftmost()
- ** finds the left-most entry beneath the *entry* whereas moveToRightmost()
- ** finds the right-most entry beneath the *page*.
- **
- ** The right-most entry is the one with the largest key - the last
- ** key in ascending order.
- */
- static int moveToRightmost(BtCursor *pCur){
- Pgno pgno;
- int rc = SQLITE_OK;
- MemPage *pPage;
- assert( cursorHoldsMutex(pCur) );
- assert( pCur->eState==CURSOR_VALID );
- while( rc==SQLITE_OK && !(pPage = pCur->pPage)->leaf ){
- pgno = get4byte(&pPage->aData[pPage->hdrOffset+8]);
- pCur->idx = pPage->nCell;
- rc = moveToChild(pCur, pgno);
- }
- if( rc==SQLITE_OK ){
- pCur->idx = pPage->nCell - 1;
- pCur->info.nSize = 0;
- pCur->validNKey = 0;
- }
- return SQLITE_OK;
- }
- /* Move the cursor to the first entry in the table. Return SQLITE_OK
- ** on success. Set *pRes to 0 if the cursor actually points to something
- ** or set *pRes to 1 if the table is empty.
- */
- int sqlite3BtreeFirst(BtCursor *pCur, int *pRes){
- int rc;
- assert( cursorHoldsMutex(pCur) );
- assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) );
- rc = moveToRoot(pCur);
- if( rc==SQLITE_OK ){
- if( pCur->eState==CURSOR_INVALID ){
- assert( pCur->pPage->nCell==0 );
- *pRes = 1;
- rc = SQLITE_OK;
- }else{
- assert( pCur->pPage->nCell>0 );
- *pRes = 0;
- rc = moveToLeftmost(pCur);
- }
- }
- return rc;
- }
- /* Move the cursor to the last entry in the table. Return SQLITE_OK
- ** on success. Set *pRes to 0 if the cursor actually points to something
- ** or set *pRes to 1 if the table is empty.
- */
- int sqlite3BtreeLast(BtCursor *pCur, int *pRes){
- int rc;
-
- assert( cursorHoldsMutex(pCur) );
- assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) );
- rc = moveToRoot(pCur);
- if( rc==SQLITE_OK ){
- if( CURSOR_INVALID==pCur->eState ){
- assert( pCur->pPage->nCell==0 );
- *pRes = 1;
- }else{
- assert( pCur->eState==CURSOR_VALID );
- *pRes = 0;
- rc = moveToRightmost(pCur);
- getCellInfo(pCur);
- pCur->atLast = rc==SQLITE_OK;
- }
- }
- return rc;
- }
- /* Move the cursor so that it points to an entry near the key
- ** specified by pKey/nKey/pUnKey. Return a success code.
- **
- ** For INTKEY tables, only the nKey parameter is used. pKey
- ** and pUnKey must be NULL. For index tables, either pUnKey
- ** must point to a key that has already been unpacked, or else
- ** pKey/nKey describes a blob containing the key.
- **
- ** If an exact match is not found, then the cursor is always
- ** left pointing at a leaf page which would hold the entry if it
- ** were present. The cursor might point to an entry that comes