btree.h
上传用户:tsgydb
上传日期:2007-04-14
资源大小:10674k
文件大小:11k
- /*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996, 1997, 1998, 1999, 2000
- * Sleepycat Software. All rights reserved.
- */
- /*
- * Copyright (c) 1990, 1993, 1994, 1995, 1996
- * Keith Bostic. All rights reserved.
- */
- /*
- * Copyright (c) 1990, 1993, 1994, 1995
- * The Regents of the University of California. All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Mike Olson.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $Id: btree.h,v 11.37 2001/01/17 17:09:52 bostic Exp $
- */
- /* Forward structure declarations. */
- struct __btree; typedef struct __btree BTREE;
- struct __cursor; typedef struct __cursor BTREE_CURSOR;
- struct __epg; typedef struct __epg EPG;
- struct __recno; typedef struct __recno RECNO;
- #define DEFMINKEYPAGE (2)
- /*
- * A recno order of 0 indicates that we don't have an order, not that we've
- * an order less than 1.
- */
- #define INVALID_ORDER 0
- #define ISINTERNAL(p) (TYPE(p) == P_IBTREE || TYPE(p) == P_IRECNO)
- #define ISLEAF(p) (TYPE(p) == P_LBTREE ||
- TYPE(p) == P_LRECNO || TYPE(p) == P_LDUP)
- /* Flags for __bam_cadjust_log(). */
- #define CAD_UPDATEROOT 0x01 /* Root page count was updated. */
- /* Flags for __bam_split_log(). */
- #define SPL_NRECS 0x01 /* Split tree has record count. */
- /* Flags for __bam_iitem(). */
- #define BI_DELETED 0x01 /* Key/data pair only placeholder. */
- /* Flags for __bam_stkrel(). */
- #define STK_CLRDBC 0x01 /* Clear dbc->page reference. */
- #define STK_NOLOCK 0x02 /* Don't retain locks. */
- /* Flags for __ram_ca(). These get logged, so make the values explicit. */
- typedef enum {
- CA_DELETE = 0, /* Delete the current record. */
- CA_IAFTER = 1, /* Insert before the current record. */
- CA_IBEFORE = 2, /* Insert after the current record. */
- CA_ICURRENT = 3 /* Overwrite the current record. */
- } ca_recno_arg;
- /*
- * Flags for __bam_search() and __bam_rsearch().
- *
- * Note, internal page searches must find the largest record less than key in
- * the tree so that descents work. Leaf page searches must find the smallest
- * record greater than key so that the returned index is the record's correct
- * position for insertion.
- *
- * The flags parameter to the search routines describes three aspects of the
- * search: the type of locking required (including if we're locking a pair of
- * pages), the item to return in the presence of duplicates and whether or not
- * to return deleted entries. To simplify both the mnemonic representation
- * and the code that checks for various cases, we construct a set of bitmasks.
- */
- #define S_READ 0x00001 /* Read locks. */
- #define S_WRITE 0x00002 /* Write locks. */
- #define S_APPEND 0x00040 /* Append to the tree. */
- #define S_DELNO 0x00080 /* Don't return deleted items. */
- #define S_DUPFIRST 0x00100 /* Return first duplicate. */
- #define S_DUPLAST 0x00200 /* Return last duplicate. */
- #define S_EXACT 0x00400 /* Exact items only. */
- #define S_PARENT 0x00800 /* Lock page pair. */
- #define S_STACK 0x01000 /* Need a complete stack. */
- #define S_PAST_EOF 0x02000 /* If doing insert search (or keyfirst
- * or keylast operations), or a split
- * on behalf of an insert, it's okay to
- * return an entry one past end-of-page.
- */
- #define S_STK_ONLY 0x04000 /* Just return info in the stack */
- #define S_DELETE (S_WRITE | S_DUPFIRST | S_DELNO | S_EXACT | S_STACK)
- #define S_FIND (S_READ | S_DUPFIRST | S_DELNO)
- #define S_FIND_WR (S_WRITE | S_DUPFIRST | S_DELNO)
- #define S_INSERT (S_WRITE | S_DUPLAST | S_PAST_EOF | S_STACK)
- #define S_KEYFIRST (S_WRITE | S_DUPFIRST | S_PAST_EOF | S_STACK)
- #define S_KEYLAST (S_WRITE | S_DUPLAST | S_PAST_EOF | S_STACK)
- #define S_WRPAIR (S_WRITE | S_DUPLAST | S_PAST_EOF | S_PARENT)
- /*
- * Various routines pass around page references. A page reference is
- * a pointer to the page, and the indx indicates an item on the page.
- * Each page reference may include a lock.
- */
- struct __epg {
- PAGE *page; /* The page. */
- db_indx_t indx; /* The index on the page. */
- db_indx_t entries; /* The number of entries on page */
- DB_LOCK lock; /* The page's lock. */
- db_lockmode_t lock_mode; /* The lock mode. */
- };
- /*
- * We maintain a stack of the pages that we're locking in the tree. Grow
- * the stack as necessary.
- *
- * XXX
- * Temporary fix for #3243 -- clear the page and lock from the stack entry.
- * The correct fix is to never release a stack that doesn't hold items.
- */
- #define BT_STK_CLR(c) do {
- (c)->csp = (c)->sp;
- (c)->csp->page = NULL;
- (c)->csp->lock.off = LOCK_INVALID;
- } while (0)
- #define BT_STK_ENTER(dbenv, c, pagep, page_indx, l, mode, ret) do {
- if ((ret =
- (c)->csp == (c)->esp ? __bam_stkgrow(dbenv, c) : 0) == 0) {
- (c)->csp->page = pagep;
- (c)->csp->indx = page_indx;
- (c)->csp->entries = NUM_ENT(pagep);
- (c)->csp->lock = l;
- (c)->csp->lock_mode = mode;
- }
- } while (0)
- #define BT_STK_PUSH(dbenv, c, pagep, page_indx, lock, mode, ret) do {
- BT_STK_ENTER(dbenv, c, pagep, page_indx, lock, mode, ret);
- ++(c)->csp;
- } while (0)
- #define BT_STK_NUM(dbenv, c, pagep, page_indx, ret) do {
- if ((ret =
- (c)->csp == (c)->esp ? __bam_stkgrow(dbenv, c) : 0) == 0) {
- (c)->csp->page = NULL;
- (c)->csp->indx = page_indx;
- (c)->csp->entries = NUM_ENT(pagep);
- (c)->csp->lock.off = LOCK_INVALID;
- (c)->csp->lock_mode = DB_LOCK_NG;
- }
- } while (0)
- #define BT_STK_NUMPUSH(dbenv, c, pagep, page_indx,ret) do {
- BT_STK_NUM(dbenv, cp, pagep, page_indx, ret);
- ++(c)->csp;
- } while (0)
- #define BT_STK_POP(c)
- ((c)->csp == (c)->sp ? NULL : --(c)->csp)
- /* Btree/Recno cursor. */
- struct __cursor {
- /* struct __dbc_internal */
- __DBC_INTERNAL
- /* btree private part */
- EPG *sp; /* Stack pointer. */
- EPG *csp; /* Current stack entry. */
- EPG *esp; /* End stack pointer. */
- EPG stack[5];
- db_indx_t ovflsize; /* Maximum key/data on-page size. */
- db_recno_t recno; /* Current record number. */
- u_int32_t order; /* Relative order among deleted curs. */
- /*
- * Btree:
- * We set a flag in the cursor structure if the underlying object has
- * been deleted. It's not strictly necessary, we could get the same
- * information by looking at the page itself, but this method doesn't
- * require us to retrieve the page on cursor delete.
- *
- * Recno:
- * When renumbering recno databases during deletes, cursors referencing
- * "deleted" records end up positioned between two records, and so must
- * be specially adjusted on the next operation.
- */
- #define C_DELETED 0x0001 /* Record was deleted. */
- /*
- * There are three tree types that require maintaining record numbers.
- * Recno AM trees, Btree AM trees for which the DB_RECNUM flag was set,
- * and Btree off-page duplicate trees.
- */
- #define C_RECNUM 0x0002 /* Tree requires record counts. */
- /*
- * Recno trees have immutable record numbers by default, but optionally
- * support mutable record numbers. Off-page duplicate Recno trees have
- * mutable record numbers. All Btrees with record numbers (including
- * off-page duplicate trees) are mutable by design, no flag is needed.
- */
- #define C_RENUMBER 0x0004 /* Tree records are mutable. */
- u_int32_t flags;
- };
- /*
- * Threshhold value, as a function of bt_minkey, of the number of
- * bytes a key/data pair can use before being placed on an overflow
- * page. Assume every item requires the maximum alignment for
- * padding, out of sheer paranoia.
- */
- #define B_MINKEY_TO_OVFLSIZE(minkey, pgsize)
- ((u_int16_t)(((pgsize) - P_OVERHEAD) / ((minkey) * P_INDX) -
- (BKEYDATA_PSIZE(0) + ALIGN(1, sizeof(int32_t)))))
- /*
- * The maximum space that a single item can ever take up on one page.
- * Used by __bam_split to determine whether a split is still necessary.
- */
- #define B_MAX(a,b) (((a) > (b)) ? (a) : (b))
- #define B_MAXSIZEONPAGE(ovflsize)
- (B_MAX(BOVERFLOW_PSIZE, BKEYDATA_PSIZE(ovflsize)))
- /*
- * The in-memory, per-tree btree/recno data structure.
- */
- struct __btree { /* Btree access method. */
- /*
- * !!!
- * These fields are write-once (when the structure is created) and
- * so are ignored as far as multi-threading is concerned.
- */
- db_pgno_t bt_meta; /* Database meta-data page. */
- db_pgno_t bt_root; /* Database root page. */
- u_int32_t bt_maxkey; /* Maximum keys per page. */
- u_int32_t bt_minkey; /* Minimum keys per page. */
- /* Btree comparison function. */
- int (*bt_compare) __P((DB *, const DBT *, const DBT *));
- /* Btree prefix function. */
- size_t (*bt_prefix) __P((DB *, const DBT *, const DBT *));
- /* Recno access method. */
- int re_pad; /* Fixed-length padding byte. */
- int re_delim; /* Variable-length delimiting byte. */
- u_int32_t re_len; /* Length for fixed-length records. */
- char *re_source; /* Source file name. */
- /*
- * !!!
- * The bt_lpgno field is NOT protected by any mutex, and for this
- * reason must be advisory only, so, while it is read/written by
- * multiple threads, DB is completely indifferent to the quality
- * of its information.
- */
- db_pgno_t bt_lpgno; /* Last insert location. */
- /*
- * !!!
- * The re_modified field is NOT protected by any mutex, and for this
- * reason cannot be anything more complicated than a zero/non-zero
- * value. The actual writing of the backing source file cannot be
- * threaded, so clearing the flag isn't a problem.
- */
- int re_modified; /* If the tree was modified. */
- /*
- * !!!
- * These fields are ignored as far as multi-threading is concerned.
- * There are no transaction semantics associated with backing files,
- * nor is there any thread protection.
- */
- FILE *re_fp; /* Source file handle. */
- int re_eof; /* Backing source file EOF reached. */
- db_recno_t re_last; /* Last record number read. */
- };
- /*
- * Modes for the __bam_curadj recovery records (btree_curadj).
- * These appear in log records, so we wire the values and
- * do not leave it up to the compiler.
- */
- typedef enum {
- DB_CA_DI = 1,
- DB_CA_DUP = 2,
- DB_CA_RSPLIT = 3,
- DB_CA_SPLIT = 4
- } db_ca_mode;
- #include "btree_auto.h"
- #include "btree_ext.h"
- #include "db_am.h"