pager.c.svn-base
上传用户:sunhongbo
上传日期:2022-01-25
资源大小:3010k
文件大小:172k
- /*
- ** 2001 September 15
- **
- ** The author disclaims copyright to this source code. In place of
- ** a legal notice, here is a blessing:
- **
- ** May you do good and not evil.
- ** May you find forgiveness for yourself and forgive others.
- ** May you share freely, never taking more than you give.
- **
- *************************************************************************
- ** This is the implementation of the page cache subsystem or "pager".
- **
- ** The pager is used to access a database disk file. It implements
- ** atomic commit and rollback through the use of a journal file that
- ** is separate from the database file. The pager also implements file
- ** locking to prevent two processes from writing the same database
- ** file simultaneously, or one process from reading the database while
- ** another is writing.
- **
- ** @(#) $Id: pager.c,v 1.426 2008/04/14 23:13:46 drh Exp $
- */
- #ifndef SQLITE_OMIT_DISKIO
- #include "sqliteInt.h"
- #include <assert.h>
- #include <string.h>
- /*
- ** Macros for troubleshooting. Normally turned off
- */
- #if 0
- #define sqlite3DebugPrintf printf
- #define PAGERTRACE1(X) sqlite3DebugPrintf(X)
- #define PAGERTRACE2(X,Y) sqlite3DebugPrintf(X,Y)
- #define PAGERTRACE3(X,Y,Z) sqlite3DebugPrintf(X,Y,Z)
- #define PAGERTRACE4(X,Y,Z,W) sqlite3DebugPrintf(X,Y,Z,W)
- #define PAGERTRACE5(X,Y,Z,W,V) sqlite3DebugPrintf(X,Y,Z,W,V)
- #else
- #define PAGERTRACE1(X)
- #define PAGERTRACE2(X,Y)
- #define PAGERTRACE3(X,Y,Z)
- #define PAGERTRACE4(X,Y,Z,W)
- #define PAGERTRACE5(X,Y,Z,W,V)
- #endif
- /*
- ** The following two macros are used within the PAGERTRACEX() macros above
- ** to print out file-descriptors.
- **
- ** PAGERID() takes a pointer to a Pager struct as its argument. The
- ** associated file-descriptor is returned. FILEHANDLEID() takes an sqlite3_file
- ** struct as its argument.
- */
- #define PAGERID(p) ((int)(p->fd))
- #define FILEHANDLEID(fd) ((int)fd)
- /*
- ** The page cache as a whole is always in one of the following
- ** states:
- **
- ** PAGER_UNLOCK The page cache is not currently reading or
- ** writing the database file. There is no
- ** data held in memory. This is the initial
- ** state.
- **
- ** PAGER_SHARED The page cache is reading the database.
- ** Writing is not permitted. There can be
- ** multiple readers accessing the same database
- ** file at the same time.
- **
- ** PAGER_RESERVED This process has reserved the database for writing
- ** but has not yet made any changes. Only one process
- ** at a time can reserve the database. The original
- ** database file has not been modified so other
- ** processes may still be reading the on-disk
- ** database file.
- **
- ** PAGER_EXCLUSIVE The page cache is writing the database.
- ** Access is exclusive. No other processes or
- ** threads can be reading or writing while one
- ** process is writing.
- **
- ** PAGER_SYNCED The pager moves to this state from PAGER_EXCLUSIVE
- ** after all dirty pages have been written to the
- ** database file and the file has been synced to
- ** disk. All that remains to do is to remove or
- ** truncate the journal file and the transaction
- ** will be committed.
- **
- ** The page cache comes up in PAGER_UNLOCK. The first time a
- ** sqlite3PagerGet() occurs, the state transitions to PAGER_SHARED.
- ** After all pages have been released using sqlite_page_unref(),
- ** the state transitions back to PAGER_UNLOCK. The first time
- ** that sqlite3PagerWrite() is called, the state transitions to
- ** PAGER_RESERVED. (Note that sqlite3PagerWrite() can only be
- ** called on an outstanding page which means that the pager must
- ** be in PAGER_SHARED before it transitions to PAGER_RESERVED.)
- ** PAGER_RESERVED means that there is an open rollback journal.
- ** The transition to PAGER_EXCLUSIVE occurs before any changes
- ** are made to the database file, though writes to the rollback
- ** journal occurs with just PAGER_RESERVED. After an sqlite3PagerRollback()
- ** or sqlite3PagerCommitPhaseTwo(), the state can go back to PAGER_SHARED,
- ** or it can stay at PAGER_EXCLUSIVE if we are in exclusive access mode.
- */
- #define PAGER_UNLOCK 0
- #define PAGER_SHARED 1 /* same as SHARED_LOCK */
- #define PAGER_RESERVED 2 /* same as RESERVED_LOCK */
- #define PAGER_EXCLUSIVE 4 /* same as EXCLUSIVE_LOCK */
- #define PAGER_SYNCED 5
- /*
- ** If the SQLITE_BUSY_RESERVED_LOCK macro is set to true at compile-time,
- ** then failed attempts to get a reserved lock will invoke the busy callback.
- ** This is off by default. To see why, consider the following scenario:
- **
- ** Suppose thread A already has a shared lock and wants a reserved lock.
- ** Thread B already has a reserved lock and wants an exclusive lock. If
- ** both threads are using their busy callbacks, it might be a long time
- ** be for one of the threads give up and allows the other to proceed.
- ** But if the thread trying to get the reserved lock gives up quickly
- ** (if it never invokes its busy callback) then the contention will be
- ** resolved quickly.
- */
- #ifndef SQLITE_BUSY_RESERVED_LOCK
- # define SQLITE_BUSY_RESERVED_LOCK 0
- #endif
- /*
- ** This macro rounds values up so that if the value is an address it
- ** is guaranteed to be an address that is aligned to an 8-byte boundary.
- */
- #define FORCE_ALIGNMENT(X) (((X)+7)&~7)
- typedef struct PgHdr PgHdr;
- /*
- ** Each pager stores all currently unreferenced pages in a list sorted
- ** in least-recently-used (LRU) order (i.e. the first item on the list has
- ** not been referenced in a long time, the last item has been recently
- ** used). An instance of this structure is included as part of each
- ** pager structure for this purpose (variable Pager.lru).
- **
- ** Additionally, if memory-management is enabled, all unreferenced pages
- ** are stored in a global LRU list (global variable sqlite3LruPageList).
- **
- ** In both cases, the PagerLruList.pFirstSynced variable points to
- ** the first page in the corresponding list that does not require an
- ** fsync() operation before its memory can be reclaimed. If no such
- ** page exists, PagerLruList.pFirstSynced is set to NULL.
- */
- typedef struct PagerLruList PagerLruList;
- struct PagerLruList {
- PgHdr *pFirst; /* First page in LRU list */
- PgHdr *pLast; /* Last page in LRU list (the most recently used) */
- PgHdr *pFirstSynced; /* First page in list with PgHdr.needSync==0 */
- };
- /*
- ** The following structure contains the next and previous pointers used
- ** to link a PgHdr structure into a PagerLruList linked list.
- */
- typedef struct PagerLruLink PagerLruLink;
- struct PagerLruLink {
- PgHdr *pNext;
- PgHdr *pPrev;
- };
- /*
- ** Each in-memory image of a page begins with the following header.
- ** This header is only visible to this pager module. The client
- ** code that calls pager sees only the data that follows the header.
- **
- ** Client code should call sqlite3PagerWrite() on a page prior to making
- ** any modifications to that page. The first time sqlite3PagerWrite()
- ** is called, the original page contents are written into the rollback
- ** journal and PgHdr.inJournal and PgHdr.needSync are set. Later, once
- ** the journal page has made it onto the disk surface, PgHdr.needSync
- ** is cleared. The modified page cannot be written back into the original
- ** database file until the journal pages has been synced to disk and the
- ** PgHdr.needSync has been cleared.
- **
- ** The PgHdr.dirty flag is set when sqlite3PagerWrite() is called and
- ** is cleared again when the page content is written back to the original
- ** database file.
- **
- ** Details of important structure elements:
- **
- ** needSync
- **
- ** If this is true, this means that it is not safe to write the page
- ** content to the database because the original content needed
- ** for rollback has not by synced to the main rollback journal.
- ** The original content may have been written to the rollback journal
- ** but it has not yet been synced. So we cannot write to the database
- ** file because power failure might cause the page in the journal file
- ** to never reach the disk. It is as if the write to the journal file
- ** does not occur until the journal file is synced.
- **
- ** This flag is false if the page content exactly matches what
- ** currently exists in the database file. The needSync flag is also
- ** false if the original content has been written to the main rollback
- ** journal and synced. If the page represents a new page that has
- ** been added onto the end of the database during the current
- ** transaction, the needSync flag is true until the original database
- ** size in the journal header has been synced to disk.
- **
- ** inJournal
- **
- ** This is true if the original page has been written into the main
- ** rollback journal. This is always false for new pages added to
- ** the end of the database file during the current transaction.
- ** And this flag says nothing about whether or not the journal
- ** has been synced to disk. For pages that are in the original
- ** database file, the following expression should always be true:
- **
- ** inJournal = sqlite3BitvecTest(pPager->pInJournal, pgno)
- **
- ** The pPager->pInJournal object is only valid for the original
- ** pages of the database, not new pages that are added to the end
- ** of the database, so obviously the above expression cannot be
- ** valid for new pages. For new pages inJournal is always 0.
- **
- ** dirty
- **
- ** When true, this means that the content of the page has been
- ** modified and needs to be written back to the database file.
- ** If false, it means that either the content of the page is
- ** unchanged or else the content is unimportant and we do not
- ** care whether or not it is preserved.
- **
- ** alwaysRollback
- **
- ** This means that the sqlite3PagerDontRollback() API should be
- ** ignored for this page. The DontRollback() API attempts to say
- ** that the content of the page on disk is unimportant (it is an
- ** unused page on the freelist) so that it is unnecessary to
- ** rollback changes to this page because the content of the page
- ** can change without changing the meaning of the database. This
- ** flag overrides any DontRollback() attempt. This flag is set
- ** when a page that originally contained valid data is added to
- ** the freelist. Later in the same transaction, this page might
- ** be pulled from the freelist and reused for something different
- ** and at that point the DontRollback() API will be called because
- ** pages taken from the freelist do not need to be protected by
- ** the rollback journal. But this flag says that the page was
- ** not originally part of the freelist so that it still needs to
- ** be rolled back in spite of any subsequent DontRollback() calls.
- **
- ** needRead
- **
- ** This flag means (when true) that the content of the page has
- ** not yet been loaded from disk. The in-memory content is just
- ** garbage. (Actually, we zero the content, but you should not
- ** make any assumptions about the content nevertheless.) If the
- ** content is needed in the future, it should be read from the
- ** original database file.
- */
- struct PgHdr {
- Pager *pPager; /* The pager to which this page belongs */
- Pgno pgno; /* The page number for this page */
- PgHdr *pNextHash, *pPrevHash; /* Hash collision chain for PgHdr.pgno */
- PagerLruLink free; /* Next and previous free pages */
- PgHdr *pNextAll; /* A list of all pages */
- u8 inJournal; /* TRUE if has been written to journal */
- u8 dirty; /* TRUE if we need to write back changes */
- u8 needSync; /* Sync journal before writing this page */
- u8 alwaysRollback; /* Disable DontRollback() for this page */
- u8 needRead; /* Read content if PagerWrite() is called */
- short int nRef; /* Number of users of this page */
- PgHdr *pDirty, *pPrevDirty; /* Dirty pages */
- #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
- PagerLruLink gfree; /* Global list of nRef==0 pages */
- #endif
- #ifdef SQLITE_CHECK_PAGES
- u32 pageHash;
- #endif
- void *pData; /* Page data */
- /* Pager.nExtra bytes of local data appended to this header */
- };
- /*
- ** For an in-memory only database, some extra information is recorded about
- ** each page so that changes can be rolled back. (Journal files are not
- ** used for in-memory databases.) The following information is added to
- ** the end of every EXTRA block for in-memory databases.
- **
- ** This information could have been added directly to the PgHdr structure.
- ** But then it would take up an extra 8 bytes of storage on every PgHdr
- ** even for disk-based databases. Splitting it out saves 8 bytes. This
- ** is only a savings of 0.8% but those percentages add up.
- */
- typedef struct PgHistory PgHistory;
- struct PgHistory {
- u8 *pOrig; /* Original page text. Restore to this on a full rollback */
- u8 *pStmt; /* Text as it was at the beginning of the current statement */
- PgHdr *pNextStmt, *pPrevStmt; /* List of pages in the statement journal */
- u8 inStmt; /* TRUE if in the statement subjournal */
- };
- /*
- ** A macro used for invoking the codec if there is one
- */
- #ifdef SQLITE_HAS_CODEC
- # define CODEC1(P,D,N,X) if( P->xCodec!=0 ){ P->xCodec(P->pCodecArg,D,N,X); }
- # define CODEC2(P,D,N,X) ((char*)(P->xCodec!=0?P->xCodec(P->pCodecArg,D,N,X):D))
- #else
- # define CODEC1(P,D,N,X) /* NO-OP */
- # define CODEC2(P,D,N,X) ((char*)D)
- #endif
- /*
- ** Convert a pointer to a PgHdr into a pointer to its data
- ** and back again.
- */
- #define PGHDR_TO_DATA(P) ((P)->pData)
- #define PGHDR_TO_EXTRA(G,P) ((void*)&((G)[1]))
- #define PGHDR_TO_HIST(P,PGR)
- ((PgHistory*)&((char*)(&(P)[1]))[(PGR)->nExtra])
- /*
- ** A open page cache is an instance of the following structure.
- **
- ** Pager.errCode may be set to SQLITE_IOERR, SQLITE_CORRUPT, or
- ** or SQLITE_FULL. Once one of the first three errors occurs, it persists
- ** and is returned as the result of every major pager API call. The
- ** SQLITE_FULL return code is slightly different. It persists only until the
- ** next successful rollback is performed on the pager cache. Also,
- ** SQLITE_FULL does not affect the sqlite3PagerGet() and sqlite3PagerLookup()
- ** APIs, they may still be used successfully.
- */
- struct Pager {
- sqlite3_vfs *pVfs; /* OS functions to use for IO */
- u8 journalOpen; /* True if journal file descriptors is valid */
- u8 journalStarted; /* True if header of journal is synced */
- u8 useJournal; /* Use a rollback journal on this file */
- u8 noReadlock; /* Do not bother to obtain readlocks */
- u8 stmtOpen; /* True if the statement subjournal is open */
- u8 stmtInUse; /* True we are in a statement subtransaction */
- u8 stmtAutoopen; /* Open stmt journal when main journal is opened*/
- u8 noSync; /* Do not sync the journal if true */
- u8 fullSync; /* Do extra syncs of the journal for robustness */
- u8 sync_flags; /* One of SYNC_NORMAL or SYNC_FULL */
- u8 state; /* PAGER_UNLOCK, _SHARED, _RESERVED, etc. */
- u8 tempFile; /* zFilename is a temporary file */
- u8 readOnly; /* True for a read-only database */
- u8 needSync; /* True if an fsync() is needed on the journal */
- u8 dirtyCache; /* True if cached pages have changed */
- u8 alwaysRollback; /* Disable DontRollback() for all pages */
- u8 memDb; /* True to inhibit all file I/O */
- u8 setMaster; /* True if a m-j name has been written to jrnl */
- u8 doNotSync; /* Boolean. While true, do not spill the cache */
- u8 exclusiveMode; /* Boolean. True if locking_mode==EXCLUSIVE */
- u8 changeCountDone; /* Set after incrementing the change-counter */
- u32 vfsFlags; /* Flags for sqlite3_vfs.xOpen() */
- int errCode; /* One of several kinds of errors */
- int dbSize; /* Number of pages in the file */
- int origDbSize; /* dbSize before the current change */
- int stmtSize; /* Size of database (in pages) at stmt_begin() */
- int nRec; /* Number of pages written to the journal */
- u32 cksumInit; /* Quasi-random value added to every checksum */
- int stmtNRec; /* Number of records in stmt subjournal */
- int nExtra; /* Add this many bytes to each in-memory page */
- int pageSize; /* Number of bytes in a page */
- int nPage; /* Total number of in-memory pages */
- int nRef; /* Number of in-memory pages with PgHdr.nRef>0 */
- int mxPage; /* Maximum number of pages to hold in cache */
- Pgno mxPgno; /* Maximum allowed size of the database */
- Bitvec *pInJournal; /* One bit for each page in the database file */
- Bitvec *pInStmt; /* One bit for each page in the database */
- char *zFilename; /* Name of the database file */
- char *zJournal; /* Name of the journal file */
- char *zDirectory; /* Directory hold database and journal files */
- char *zStmtJrnl; /* Name of the statement journal file */
- sqlite3_file *fd, *jfd; /* File descriptors for database and journal */
- sqlite3_file *stfd; /* File descriptor for the statement subjournal*/
- BusyHandler *pBusyHandler; /* Pointer to sqlite.busyHandler */
- PagerLruList lru; /* LRU list of free pages */
- PgHdr *pAll; /* List of all pages */
- PgHdr *pStmt; /* List of pages in the statement subjournal */
- PgHdr *pDirty; /* List of all dirty pages */
- i64 journalOff; /* Current byte offset in the journal file */
- i64 journalHdr; /* Byte offset to previous journal header */
- i64 stmtHdrOff; /* First journal header written this statement */
- i64 stmtCksum; /* cksumInit when statement was started */
- i64 stmtJSize; /* Size of journal at stmt_begin() */
- int sectorSize; /* Assumed sector size during rollback */
- #ifdef SQLITE_TEST
- int nHit, nMiss; /* Cache hits and missing */
- int nRead, nWrite; /* Database pages read/written */
- #endif
- void (*xDestructor)(DbPage*,int); /* Call this routine when freeing pages */
- void (*xReiniter)(DbPage*,int); /* Call this routine when reloading pages */
- #ifdef SQLITE_HAS_CODEC
- void *(*xCodec)(void*,void*,Pgno,int); /* Routine for en/decoding data */
- void *pCodecArg; /* First argument to xCodec() */
- #endif
- int nHash; /* Size of the pager hash table */
- PgHdr **aHash; /* Hash table to map page number to PgHdr */
- #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
- Pager *pNext; /* Doubly linked list of pagers on which */
- Pager *pPrev; /* sqlite3_release_memory() will work */
- int iInUseMM; /* Non-zero if unavailable to MM */
- int iInUseDB; /* Non-zero if in sqlite3_release_memory() */
- #endif
- char *pTmpSpace; /* Pager.pageSize bytes of space for tmp use */
- char dbFileVers[16]; /* Changes whenever database file changes */
- };
- /*
- ** The following global variables hold counters used for
- ** testing purposes only. These variables do not exist in
- ** a non-testing build. These variables are not thread-safe.
- */
- #ifdef SQLITE_TEST
- int sqlite3_pager_readdb_count = 0; /* Number of full pages read from DB */
- int sqlite3_pager_writedb_count = 0; /* Number of full pages written to DB */
- int sqlite3_pager_writej_count = 0; /* Number of pages written to journal */
- int sqlite3_pager_pgfree_count = 0; /* Number of cache pages freed */
- # define PAGER_INCR(v) v++
- #else
- # define PAGER_INCR(v)
- #endif
- /*
- ** The following variable points to the head of a double-linked list
- ** of all pagers that are eligible for page stealing by the
- ** sqlite3_release_memory() interface. Access to this list is
- ** protected by the SQLITE_MUTEX_STATIC_MEM2 mutex.
- */
- #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
- static Pager *sqlite3PagerList = 0;
- static PagerLruList sqlite3LruPageList = {0, 0, 0};
- #endif
- /*
- ** Journal files begin with the following magic string. The data
- ** was obtained from /dev/random. It is used only as a sanity check.
- **
- ** Since version 2.8.0, the journal format contains additional sanity
- ** checking information. If the power fails while the journal is begin
- ** written, semi-random garbage data might appear in the journal
- ** file after power is restored. If an attempt is then made
- ** to roll the journal back, the database could be corrupted. The additional
- ** sanity checking data is an attempt to discover the garbage in the
- ** journal and ignore it.
- **
- ** The sanity checking information for the new journal format consists
- ** of a 32-bit checksum on each page of data. The checksum covers both
- ** the page number and the pPager->pageSize bytes of data for the page.
- ** This cksum is initialized to a 32-bit random value that appears in the
- ** journal file right after the header. The random initializer is important,
- ** because garbage data that appears at the end of a journal is likely
- ** data that was once in other files that have now been deleted. If the
- ** garbage data came from an obsolete journal file, the checksums might
- ** be correct. But by initializing the checksum to random value which
- ** is different for every journal, we minimize that risk.
- */
- static const unsigned char aJournalMagic[] = {
- 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd7,
- };
- /*
- ** The size of the header and of each page in the journal is determined
- ** by the following macros.
- */
- #define JOURNAL_PG_SZ(pPager) ((pPager->pageSize) + 8)
- /*
- ** The journal header size for this pager. In the future, this could be
- ** set to some value read from the disk controller. The important
- ** characteristic is that it is the same size as a disk sector.
- */
- #define JOURNAL_HDR_SZ(pPager) (pPager->sectorSize)
- /*
- ** The macro MEMDB is true if we are dealing with an in-memory database.
- ** We do this as a macro so that if the SQLITE_OMIT_MEMORYDB macro is set,
- ** the value of MEMDB will be a constant and the compiler will optimize
- ** out code that would never execute.
- */
- #ifdef SQLITE_OMIT_MEMORYDB
- # define MEMDB 0
- #else
- # define MEMDB pPager->memDb
- #endif
- /*
- ** Page number PAGER_MJ_PGNO is never used in an SQLite database (it is
- ** reserved for working around a windows/posix incompatibility). It is
- ** used in the journal to signify that the remainder of the journal file
- ** is devoted to storing a master journal name - there are no more pages to
- ** roll back. See comments for function writeMasterJournal() for details.
- */
- /* #define PAGER_MJ_PGNO(x) (PENDING_BYTE/((x)->pageSize)) */
- #define PAGER_MJ_PGNO(x) ((PENDING_BYTE/((x)->pageSize))+1)
- /*
- ** The maximum legal page number is (2^31 - 1).
- */
- #define PAGER_MAX_PGNO 2147483647
- /*
- ** The pagerEnter() and pagerLeave() routines acquire and release
- ** a mutex on each pager. The mutex is recursive.
- **
- ** This is a special-purpose mutex. It only provides mutual exclusion
- ** between the Btree and the Memory Management sqlite3_release_memory()
- ** function. It does not prevent, for example, two Btrees from accessing
- ** the same pager at the same time. Other general-purpose mutexes in
- ** the btree layer handle that chore.
- */
- #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
- static void pagerEnter(Pager *p){
- p->iInUseDB++;
- if( p->iInUseMM && p->iInUseDB==1 ){
- sqlite3_mutex *mutex;
- mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_MEM2);
- p->iInUseDB = 0;
- sqlite3_mutex_enter(mutex);
- p->iInUseDB = 1;
- sqlite3_mutex_leave(mutex);
- }
- assert( p->iInUseMM==0 );
- }
- static void pagerLeave(Pager *p){
- p->iInUseDB--;
- assert( p->iInUseDB>=0 );
- }
- #else
- # define pagerEnter(X)
- # define pagerLeave(X)
- #endif
- /*
- ** Add page pPg to the end of the linked list managed by structure
- ** pList (pPg becomes the last entry in the list - the most recently
- ** used). Argument pLink should point to either pPg->free or pPg->gfree,
- ** depending on whether pPg is being added to the pager-specific or
- ** global LRU list.
- */
- static void listAdd(PagerLruList *pList, PagerLruLink *pLink, PgHdr *pPg){
- pLink->pNext = 0;
- pLink->pPrev = pList->pLast;
- #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
- assert(pLink==&pPg->free || pLink==&pPg->gfree);
- assert(pLink==&pPg->gfree || pList!=&sqlite3LruPageList);
- #endif
- if( pList->pLast ){
- int iOff = (char *)pLink - (char *)pPg;
- PagerLruLink *pLastLink = (PagerLruLink *)(&((u8 *)pList->pLast)[iOff]);
- pLastLink->pNext = pPg;
- }else{
- assert(!pList->pFirst);
- pList->pFirst = pPg;
- }
- pList->pLast = pPg;
- if( !pList->pFirstSynced && pPg->needSync==0 ){
- pList->pFirstSynced = pPg;
- }
- }
- /*
- ** Remove pPg from the list managed by the structure pointed to by pList.
- **
- ** Argument pLink should point to either pPg->free or pPg->gfree, depending
- ** on whether pPg is being added to the pager-specific or global LRU list.
- */
- static void listRemove(PagerLruList *pList, PagerLruLink *pLink, PgHdr *pPg){
- int iOff = (char *)pLink - (char *)pPg;
- #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
- assert(pLink==&pPg->free || pLink==&pPg->gfree);
- assert(pLink==&pPg->gfree || pList!=&sqlite3LruPageList);
- #endif
- if( pPg==pList->pFirst ){
- pList->pFirst = pLink->pNext;
- }
- if( pPg==pList->pLast ){
- pList->pLast = pLink->pPrev;
- }
- if( pLink->pPrev ){
- PagerLruLink *pPrevLink = (PagerLruLink *)(&((u8 *)pLink->pPrev)[iOff]);
- pPrevLink->pNext = pLink->pNext;
- }
- if( pLink->pNext ){
- PagerLruLink *pNextLink = (PagerLruLink *)(&((u8 *)pLink->pNext)[iOff]);
- pNextLink->pPrev = pLink->pPrev;
- }
- if( pPg==pList->pFirstSynced ){
- PgHdr *p = pLink->pNext;
- while( p && p->needSync ){
- PagerLruLink *pL = (PagerLruLink *)(&((u8 *)p)[iOff]);
- p = pL->pNext;
- }
- pList->pFirstSynced = p;
- }
- pLink->pNext = pLink->pPrev = 0;
- }
- /*
- ** Add page pPg to the list of free pages for the pager. If
- ** memory-management is enabled, also add the page to the global
- ** list of free pages.
- */
- static void lruListAdd(PgHdr *pPg){
- listAdd(&pPg->pPager->lru, &pPg->free, pPg);
- #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
- if( !pPg->pPager->memDb ){
- sqlite3_mutex_enter(sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_LRU));
- listAdd(&sqlite3LruPageList, &pPg->gfree, pPg);
- sqlite3_mutex_leave(sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_LRU));
- }
- #endif
- }
- /*
- ** Remove page pPg from the list of free pages for the associated pager.
- ** If memory-management is enabled, also remove pPg from the global list
- ** of free pages.
- */
- static void lruListRemove(PgHdr *pPg){
- listRemove(&pPg->pPager->lru, &pPg->free, pPg);
- #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
- if( !pPg->pPager->memDb ){
- sqlite3_mutex_enter(sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_LRU));
- listRemove(&sqlite3LruPageList, &pPg->gfree, pPg);
- sqlite3_mutex_leave(sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_LRU));
- }
- #endif
- }
- /*
- ** This function is called just after the needSync flag has been cleared
- ** from all pages managed by pPager (usually because the journal file
- ** has just been synced). It updates the pPager->lru.pFirstSynced variable
- ** and, if memory-management is enabled, the sqlite3LruPageList.pFirstSynced
- ** variable also.
- */
- static void lruListSetFirstSynced(Pager *pPager){
- pPager->lru.pFirstSynced = pPager->lru.pFirst;
- #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
- if( !pPager->memDb ){
- PgHdr *p;
- sqlite3_mutex_enter(sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_LRU));
- for(p=sqlite3LruPageList.pFirst; p && p->needSync; p=p->gfree.pNext);
- assert(p==pPager->lru.pFirstSynced || p==sqlite3LruPageList.pFirstSynced);
- sqlite3LruPageList.pFirstSynced = p;
- sqlite3_mutex_leave(sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_LRU));
- }
- #endif
- }
- /*
- ** Return true if page *pPg has already been written to the statement
- ** journal (or statement snapshot has been created, if *pPg is part
- ** of an in-memory database).
- */
- static int pageInStatement(PgHdr *pPg){
- Pager *pPager = pPg->pPager;
- if( MEMDB ){
- return PGHDR_TO_HIST(pPg, pPager)->inStmt;
- }else{
- return sqlite3BitvecTest(pPager->pInStmt, pPg->pgno);
- }
- }
- /*
- ** Change the size of the pager hash table to N. N must be a power
- ** of two.
- */
- static void pager_resize_hash_table(Pager *pPager, int N){
- PgHdr **aHash, *pPg;
- assert( N>0 && (N&(N-1))==0 );
- #ifdef SQLITE_MALLOC_SOFT_LIMIT
- if( N*sizeof(aHash[0])>SQLITE_MALLOC_SOFT_LIMIT ){
- N = SQLITE_MALLOC_SOFT_LIMIT/sizeof(aHash[0]);
- }
- if( N==pPager->nHash ) return;
- #endif
- pagerLeave(pPager);
- sqlite3FaultBenign(SQLITE_FAULTINJECTOR_MALLOC, pPager->aHash!=0);
- aHash = sqlite3MallocZero( sizeof(aHash[0])*N );
- sqlite3FaultBenign(SQLITE_FAULTINJECTOR_MALLOC, 0);
- pagerEnter(pPager);
- if( aHash==0 ){
- /* Failure to rehash is not an error. It is only a performance hit. */
- return;
- }
- sqlite3_free(pPager->aHash);
- pPager->nHash = N;
- pPager->aHash = aHash;
- for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
- int h;
- if( pPg->pgno==0 ){
- assert( pPg->pNextHash==0 && pPg->pPrevHash==0 );
- continue;
- }
- h = pPg->pgno & (N-1);
- pPg->pNextHash = aHash[h];
- if( aHash[h] ){
- aHash[h]->pPrevHash = pPg;
- }
- aHash[h] = pPg;
- pPg->pPrevHash = 0;
- }
- }
- /*
- ** Read a 32-bit integer from the given file descriptor. Store the integer
- ** that is read in *pRes. Return SQLITE_OK if everything worked, or an
- ** error code is something goes wrong.
- **
- ** All values are stored on disk as big-endian.
- */
- static int read32bits(sqlite3_file *fd, i64 offset, u32 *pRes){
- unsigned char ac[4];
- int rc = sqlite3OsRead(fd, ac, sizeof(ac), offset);
- if( rc==SQLITE_OK ){
- *pRes = sqlite3Get4byte(ac);
- }
- return rc;
- }
- /*
- ** Write a 32-bit integer into a string buffer in big-endian byte order.
- */
- #define put32bits(A,B) sqlite3Put4byte((u8*)A,B)
- /*
- ** Write a 32-bit integer into the given file descriptor. Return SQLITE_OK
- ** on success or an error code is something goes wrong.
- */
- static int write32bits(sqlite3_file *fd, i64 offset, u32 val){
- char ac[4];
- put32bits(ac, val);
- return sqlite3OsWrite(fd, ac, 4, offset);
- }
- /*
- ** If file pFd is open, call sqlite3OsUnlock() on it.
- */
- static int osUnlock(sqlite3_file *pFd, int eLock){
- if( !pFd->pMethods ){
- return SQLITE_OK;
- }
- return sqlite3OsUnlock(pFd, eLock);
- }
- /*
- ** This function determines whether or not the atomic-write optimization
- ** can be used with this pager. The optimization can be used if:
- **
- ** (a) the value returned by OsDeviceCharacteristics() indicates that
- ** a database page may be written atomically, and
- ** (b) the value returned by OsSectorSize() is less than or equal
- ** to the page size.
- **
- ** If the optimization cannot be used, 0 is returned. If it can be used,
- ** then the value returned is the size of the journal file when it
- ** contains rollback data for exactly one page.
- */
- #ifdef SQLITE_ENABLE_ATOMIC_WRITE
- static int jrnlBufferSize(Pager *pPager){
- int dc; /* Device characteristics */
- int nSector; /* Sector size */
- int nPage; /* Page size */
- sqlite3_file *fd = pPager->fd;
- if( fd->pMethods ){
- dc = sqlite3OsDeviceCharacteristics(fd);
- nSector = sqlite3OsSectorSize(fd);
- nPage = pPager->pageSize;
- }
- assert(SQLITE_IOCAP_ATOMIC512==(512>>8));
- assert(SQLITE_IOCAP_ATOMIC64K==(65536>>8));
- if( !fd->pMethods || (dc&(SQLITE_IOCAP_ATOMIC|(nPage>>8))&&nSector<=nPage) ){
- return JOURNAL_HDR_SZ(pPager) + JOURNAL_PG_SZ(pPager);
- }
- return 0;
- }
- #endif
- /*
- ** This function should be called when an error occurs within the pager
- ** code. The first argument is a pointer to the pager structure, the
- ** second the error-code about to be returned by a pager API function.
- ** The value returned is a copy of the second argument to this function.
- **
- ** If the second argument is SQLITE_IOERR, SQLITE_CORRUPT, or SQLITE_FULL
- ** the error becomes persistent. Until the persisten error is cleared,
- ** subsequent API calls on this Pager will immediately return the same
- ** error code.
- **
- ** A persistent error indicates that the contents of the pager-cache
- ** cannot be trusted. This state can be cleared by completely discarding
- ** the contents of the pager-cache. If a transaction was active when
- ** the persistent error occured, then the rollback journal may need
- ** to be replayed.
- */
- static void pager_unlock(Pager *pPager);
- static int pager_error(Pager *pPager, int rc){
- int rc2 = rc & 0xff;
- assert(
- pPager->errCode==SQLITE_FULL ||
- pPager->errCode==SQLITE_OK ||
- (pPager->errCode & 0xff)==SQLITE_IOERR
- );
- if(
- rc2==SQLITE_FULL ||
- rc2==SQLITE_IOERR ||
- rc2==SQLITE_CORRUPT
- ){
- pPager->errCode = rc;
- if( pPager->state==PAGER_UNLOCK && pPager->nRef==0 ){
- /* If the pager is already unlocked, call pager_unlock() now to
- ** clear the error state and ensure that the pager-cache is
- ** completely empty.
- */
- pager_unlock(pPager);
- }
- }
- return rc;
- }
- /*
- ** If SQLITE_CHECK_PAGES is defined then we do some sanity checking
- ** on the cache using a hash function. This is used for testing
- ** and debugging only.
- */
- #ifdef SQLITE_CHECK_PAGES
- /*
- ** Return a 32-bit hash of the page data for pPage.
- */
- static u32 pager_datahash(int nByte, unsigned char *pData){
- u32 hash = 0;
- int i;
- for(i=0; i<nByte; i++){
- hash = (hash*1039) + pData[i];
- }
- return hash;
- }
- static u32 pager_pagehash(PgHdr *pPage){
- return pager_datahash(pPage->pPager->pageSize,
- (unsigned char *)PGHDR_TO_DATA(pPage));
- }
- /*
- ** The CHECK_PAGE macro takes a PgHdr* as an argument. If SQLITE_CHECK_PAGES
- ** is defined, and NDEBUG is not defined, an assert() statement checks
- ** that the page is either dirty or still matches the calculated page-hash.
- */
- #define CHECK_PAGE(x) checkPage(x)
- static void checkPage(PgHdr *pPg){
- Pager *pPager = pPg->pPager;
- assert( !pPg->pageHash || pPager->errCode || MEMDB || pPg->dirty ||
- pPg->pageHash==pager_pagehash(pPg) );
- }
- #else
- #define pager_datahash(X,Y) 0
- #define pager_pagehash(X) 0
- #define CHECK_PAGE(x)
- #endif
- /*
- ** When this is called the journal file for pager pPager must be open.
- ** The master journal file name is read from the end of the file and
- ** written into memory supplied by the caller.
- **
- ** zMaster must point to a buffer of at least nMaster bytes allocated by
- ** the caller. This should be sqlite3_vfs.mxPathname+1 (to ensure there is
- ** enough space to write the master journal name). If the master journal
- ** name in the journal is longer than nMaster bytes (including a
- ** nul-terminator), then this is handled as if no master journal name
- ** were present in the journal.
- **
- ** If no master journal file name is present zMaster[0] is set to 0 and
- ** SQLITE_OK returned.
- */
- static int readMasterJournal(sqlite3_file *pJrnl, char *zMaster, int nMaster){
- int rc;
- u32 len;
- i64 szJ;
- u32 cksum;
- int i;
- unsigned char aMagic[8]; /* A buffer to hold the magic header */
- zMaster[0] = ' ';
- rc = sqlite3OsFileSize(pJrnl, &szJ);
- if( rc!=SQLITE_OK || szJ<16 ) return rc;
- rc = read32bits(pJrnl, szJ-16, &len);
- if( rc!=SQLITE_OK ) return rc;
- if( len>=nMaster ){
- return SQLITE_OK;
- }
- rc = read32bits(pJrnl, szJ-12, &cksum);
- if( rc!=SQLITE_OK ) return rc;
- rc = sqlite3OsRead(pJrnl, aMagic, 8, szJ-8);
- if( rc!=SQLITE_OK || memcmp(aMagic, aJournalMagic, 8) ) return rc;
- rc = sqlite3OsRead(pJrnl, zMaster, len, szJ-16-len);
- if( rc!=SQLITE_OK ){
- return rc;
- }
- zMaster[len] = ' ';
- /* See if the checksum matches the master journal name */
- for(i=0; i<len; i++){
- cksum -= zMaster[i];
- }
- if( cksum ){
- /* If the checksum doesn't add up, then one or more of the disk sectors
- ** containing the master journal filename is corrupted. This means
- ** definitely roll back, so just return SQLITE_OK and report a (nul)
- ** master-journal filename.
- */
- zMaster[0] = ' ';
- }
-
- return SQLITE_OK;
- }
- /*
- ** Seek the journal file descriptor to the next sector boundary where a
- ** journal header may be read or written. Pager.journalOff is updated with
- ** the new seek offset.
- **
- ** i.e for a sector size of 512:
- **
- ** Input Offset Output Offset
- ** ---------------------------------------
- ** 0 0
- ** 512 512
- ** 100 512
- ** 2000 2048
- **
- */
- static void seekJournalHdr(Pager *pPager){
- i64 offset = 0;
- i64 c = pPager->journalOff;
- if( c ){
- offset = ((c-1)/JOURNAL_HDR_SZ(pPager) + 1) * JOURNAL_HDR_SZ(pPager);
- }
- assert( offset%JOURNAL_HDR_SZ(pPager)==0 );
- assert( offset>=c );
- assert( (offset-c)<JOURNAL_HDR_SZ(pPager) );
- pPager->journalOff = offset;
- }
- /*
- ** The journal file must be open when this routine is called. A journal
- ** header (JOURNAL_HDR_SZ bytes) is written into the journal file at the
- ** current location.
- **
- ** The format for the journal header is as follows:
- ** - 8 bytes: Magic identifying journal format.
- ** - 4 bytes: Number of records in journal, or -1 no-sync mode is on.
- ** - 4 bytes: Random number used for page hash.
- ** - 4 bytes: Initial database page count.
- ** - 4 bytes: Sector size used by the process that wrote this journal.
- ** - 4 bytes: Database page size.
- **
- ** Followed by (JOURNAL_HDR_SZ - 28) bytes of unused space.
- */
- static int writeJournalHdr(Pager *pPager){
- char zHeader[sizeof(aJournalMagic)+20];
- int rc;
- if( pPager->stmtHdrOff==0 ){
- pPager->stmtHdrOff = pPager->journalOff;
- }
- seekJournalHdr(pPager);
- pPager->journalHdr = pPager->journalOff;
- memcpy(zHeader, aJournalMagic, sizeof(aJournalMagic));
- /*
- ** Write the nRec Field - the number of page records that follow this
- ** journal header. Normally, zero is written to this value at this time.
- ** After the records are added to the journal (and the journal synced,
- ** if in full-sync mode), the zero is overwritten with the true number
- ** of records (see syncJournal()).
- **
- ** A faster alternative is to write 0xFFFFFFFF to the nRec field. When
- ** reading the journal this value tells SQLite to assume that the
- ** rest of the journal file contains valid page records. This assumption
- ** is dangerous, as if a failure occured whilst writing to the journal
- ** file it may contain some garbage data. There are two scenarios
- ** where this risk can be ignored:
- **
- ** * When the pager is in no-sync mode. Corruption can follow a
- ** power failure in this case anyway.
- **
- ** * When the SQLITE_IOCAP_SAFE_APPEND flag is set. This guarantees
- ** that garbage data is never appended to the journal file.
- */
- assert(pPager->fd->pMethods||pPager->noSync);
- if( (pPager->noSync)
- || (sqlite3OsDeviceCharacteristics(pPager->fd)&SQLITE_IOCAP_SAFE_APPEND)
- ){
- put32bits(&zHeader[sizeof(aJournalMagic)], 0xffffffff);
- }else{
- put32bits(&zHeader[sizeof(aJournalMagic)], 0);
- }
- /* The random check-hash initialiser */
- sqlite3_randomness(sizeof(pPager->cksumInit), &pPager->cksumInit);
- put32bits(&zHeader[sizeof(aJournalMagic)+4], pPager->cksumInit);
- /* The initial database size */
- put32bits(&zHeader[sizeof(aJournalMagic)+8], pPager->dbSize);
- /* The assumed sector size for this process */
- put32bits(&zHeader[sizeof(aJournalMagic)+12], pPager->sectorSize);
- if( pPager->journalHdr==0 ){
- /* The page size */
- put32bits(&zHeader[sizeof(aJournalMagic)+16], pPager->pageSize);
- }
- IOTRACE(("JHDR %p %lld %dn", pPager, pPager->journalHdr, sizeof(zHeader)))
- rc = sqlite3OsWrite(pPager->jfd, zHeader, sizeof(zHeader),pPager->journalOff);
- pPager->journalOff += JOURNAL_HDR_SZ(pPager);
- /* The journal header has been written successfully. Seek the journal
- ** file descriptor to the end of the journal header sector.
- */
- if( rc==SQLITE_OK ){
- IOTRACE(("JTAIL %p %lldn", pPager, pPager->journalOff-1))
- rc = sqlite3OsWrite(pPager->jfd, "