db_am.c
上传用户:tsgydb
上传日期:2007-04-14
资源大小:10674k
文件大小:11k
- /*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1998, 1999, 2000
- * Sleepycat Software. All rights reserved.
- */
- #include "db_config.h"
- #ifndef lint
- static const char revid[] = "$Id: db_am.c,v 11.42 2001/01/11 18:19:50 bostic Exp $";
- #endif /* not lint */
- #ifndef NO_SYSTEM_INCLUDES
- #include <sys/types.h>
- #include <string.h>
- #endif
- #include "db_int.h"
- #include "db_page.h"
- #include "db_shash.h"
- #include "btree.h"
- #include "hash.h"
- #include "qam.h"
- #include "lock.h"
- #include "mp.h"
- #include "txn.h"
- #include "db_am.h"
- #include "db_ext.h"
- /*
- * __db_cursor --
- * Allocate and return a cursor.
- *
- * PUBLIC: int __db_cursor __P((DB *, DB_TXN *, DBC **, u_int32_t));
- */
- int
- __db_cursor(dbp, txn, dbcp, flags)
- DB *dbp;
- DB_TXN *txn;
- DBC **dbcp;
- u_int32_t flags;
- {
- DB_ENV *dbenv;
- DBC *dbc;
- db_lockmode_t mode;
- u_int32_t op;
- int ret;
- dbenv = dbp->dbenv;
- PANIC_CHECK(dbenv);
- DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->cursor");
- /* Check for invalid flags. */
- if ((ret = __db_cursorchk(dbp, flags, F_ISSET(dbp, DB_AM_RDONLY))) != 0)
- return (ret);
- if ((ret =
- __db_icursor(dbp, txn, dbp->type, PGNO_INVALID, 0, dbcp)) != 0)
- return (ret);
- dbc = *dbcp;
- /*
- * If this is CDB, do all the locking in the interface, which is
- * right here.
- */
- if (CDB_LOCKING(dbenv)) {
- op = LF_ISSET(DB_OPFLAGS_MASK);
- mode = (op == DB_WRITELOCK) ? DB_LOCK_WRITE :
- ((op == DB_WRITECURSOR) ? DB_LOCK_IWRITE : DB_LOCK_READ);
- if ((ret = lock_get(dbenv, dbc->locker, 0,
- &dbc->lock_dbt, mode, &dbc->mylock)) != 0) {
- (void)__db_c_close(dbc);
- return (ret);
- }
- if (op == DB_WRITECURSOR)
- F_SET(dbc, DBC_WRITECURSOR);
- if (op == DB_WRITELOCK)
- F_SET(dbc, DBC_WRITER);
- }
- return (0);
- }
- /*
- * __db_icursor --
- * Internal version of __db_cursor. If dbcp is
- * non-NULL it is assumed to point to an area to
- * initialize as a cursor.
- *
- * PUBLIC: int __db_icursor
- * PUBLIC: __P((DB *, DB_TXN *, DBTYPE, db_pgno_t, int, DBC **));
- */
- int
- __db_icursor(dbp, txn, dbtype, root, is_opd, dbcp)
- DB *dbp;
- DB_TXN *txn;
- DBTYPE dbtype;
- db_pgno_t root;
- int is_opd;
- DBC **dbcp;
- {
- DBC *dbc, *adbc;
- DBC_INTERNAL *cp;
- DB_ENV *dbenv;
- int allocated, ret;
- dbenv = dbp->dbenv;
- allocated = 0;
- /*
- * Take one from the free list if it's available. Take only the
- * right type. With off page dups we may have different kinds
- * of cursors on the queue for a single database.
- */
- MUTEX_THREAD_LOCK(dbenv, dbp->mutexp);
- for (dbc = TAILQ_FIRST(&dbp->free_queue);
- dbc != NULL; dbc = TAILQ_NEXT(dbc, links))
- if (dbtype == dbc->dbtype) {
- TAILQ_REMOVE(&dbp->free_queue, dbc, links);
- dbc->flags = 0;
- break;
- }
- MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp);
- if (dbc == NULL) {
- if ((ret = __os_calloc(dbp->dbenv, 1, sizeof(DBC), &dbc)) != 0)
- return (ret);
- allocated = 1;
- dbc->flags = 0;
- dbc->dbp = dbp;
- /* Set up locking information. */
- if (LOCKING_ON(dbenv)) {
- /*
- * If we are not threaded, then there is no need to
- * create new locker ids. We know that no one else
- * is running concurrently using this DB, so we can
- * take a peek at any cursors on the active queue.
- */
- if (!DB_IS_THREADED(dbp) &&
- (adbc = TAILQ_FIRST(&dbp->active_queue)) != NULL)
- dbc->lid = adbc->lid;
- else
- if ((ret = lock_id(dbenv, &dbc->lid)) != 0)
- goto err;
- memcpy(dbc->lock.fileid, dbp->fileid, DB_FILE_ID_LEN);
- if (CDB_LOCKING(dbenv)) {
- if (F_ISSET(dbenv, DB_ENV_CDB_ALLDB)) {
- /*
- * If we are doing a single lock per
- * environment, set up the global
- * lock object just like we do to
- * single thread creates.
- */
- DB_ASSERT(sizeof(db_pgno_t) ==
- sizeof(u_int32_t));
- dbc->lock_dbt.size = sizeof(u_int32_t);
- dbc->lock_dbt.data = &dbc->lock.pgno;
- dbc->lock.pgno = 0;
- } else {
- dbc->lock_dbt.size = DB_FILE_ID_LEN;
- dbc->lock_dbt.data = dbc->lock.fileid;
- }
- } else {
- dbc->lock.type = DB_PAGE_LOCK;
- dbc->lock_dbt.size = sizeof(dbc->lock);
- dbc->lock_dbt.data = &dbc->lock;
- }
- }
- /* Init the DBC internal structure. */
- switch (dbtype) {
- case DB_BTREE:
- case DB_RECNO:
- if ((ret = __bam_c_init(dbc, dbtype)) != 0)
- goto err;
- break;
- case DB_HASH:
- if ((ret = __ham_c_init(dbc)) != 0)
- goto err;
- break;
- case DB_QUEUE:
- if ((ret = __qam_c_init(dbc)) != 0)
- goto err;
- break;
- default:
- ret = __db_unknown_type(dbp->dbenv,
- "__db_icursor", dbtype);
- goto err;
- }
- cp = dbc->internal;
- }
- /* Refresh the DBC structure. */
- dbc->dbtype = dbtype;
- if ((dbc->txn = txn) == NULL)
- dbc->locker = dbc->lid;
- else {
- dbc->locker = txn->txnid;
- txn->cursors++;
- }
- if (is_opd)
- F_SET(dbc, DBC_OPD);
- if (F_ISSET(dbp, DB_AM_RECOVER))
- F_SET(dbc, DBC_RECOVER);
- /* Refresh the DBC internal structure. */
- cp = dbc->internal;
- cp->opd = NULL;
- cp->indx = 0;
- cp->page = NULL;
- cp->pgno = PGNO_INVALID;
- cp->root = root;
- switch (dbtype) {
- case DB_BTREE:
- case DB_RECNO:
- if ((ret = __bam_c_refresh(dbc)) != 0)
- goto err;
- break;
- case DB_HASH:
- case DB_QUEUE:
- break;
- default:
- ret = __db_unknown_type(dbp->dbenv, "__db_icursor", dbp->type);
- goto err;
- }
- MUTEX_THREAD_LOCK(dbenv, dbp->mutexp);
- TAILQ_INSERT_TAIL(&dbp->active_queue, dbc, links);
- F_SET(dbc, DBC_ACTIVE);
- MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp);
- *dbcp = dbc;
- return (0);
- err: if (allocated)
- __os_free(dbc, sizeof(*dbc));
- return (ret);
- }
- #ifdef DEBUG
- /*
- * __db_cprint --
- * Display the current cursor list.
- *
- * PUBLIC: int __db_cprint __P((DB *));
- */
- int
- __db_cprint(dbp)
- DB *dbp;
- {
- static const FN fn[] = {
- { DBC_ACTIVE, "active" },
- { DBC_OPD, "off-page-dup" },
- { DBC_RECOVER, "recover" },
- { DBC_RMW, "read-modify-write" },
- { DBC_WRITECURSOR, "write cursor" },
- { DBC_WRITEDUP, "internally dup'ed write cursor" },
- { DBC_WRITER, "short-term write cursor" },
- { 0, NULL }
- };
- DBC *dbc;
- DBC_INTERNAL *cp;
- char *s;
- MUTEX_THREAD_LOCK(dbp->dbenv, dbp->mutexp);
- for (dbc = TAILQ_FIRST(&dbp->active_queue);
- dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
- switch (dbc->dbtype) {
- case DB_BTREE:
- s = "btree";
- break;
- case DB_HASH:
- s = "hash";
- break;
- case DB_RECNO:
- s = "recno";
- break;
- case DB_QUEUE:
- s = "queue";
- break;
- default:
- DB_ASSERT(0);
- return (1);
- }
- cp = dbc->internal;
- fprintf(stderr, "%s/%#0lx: opd: %#0lxn",
- s, P_TO_ULONG(dbc), P_TO_ULONG(cp->opd));
- fprintf(stderr, "ttxn: %#0lx lid: %lu locker: %lun",
- P_TO_ULONG(dbc->txn),
- (u_long)dbc->lid, (u_long)dbc->locker);
- fprintf(stderr, "troot: %lu page/index: %lu/%lu",
- (u_long)cp->root, (u_long)cp->pgno, (u_long)cp->indx);
- __db_prflags(dbc->flags, fn, stderr);
- fprintf(stderr, "n");
- if (dbp->type == DB_BTREE)
- __bam_cprint(dbc);
- }
- for (dbc = TAILQ_FIRST(&dbp->free_queue);
- dbc != NULL; dbc = TAILQ_NEXT(dbc, links))
- fprintf(stderr, "free: %#0lx ", P_TO_ULONG(dbc));
- fprintf(stderr, "n");
- MUTEX_THREAD_UNLOCK(dbp->dbenv, dbp->mutexp);
- return (0);
- }
- #endif /* DEBUG */
- /*
- * db_fd --
- * Return a file descriptor for flock'ing.
- *
- * PUBLIC: int __db_fd __P((DB *, int *));
- */
- int
- __db_fd(dbp, fdp)
- DB *dbp;
- int *fdp;
- {
- DB_FH *fhp;
- int ret;
- PANIC_CHECK(dbp->dbenv);
- DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->fd");
- /*
- * XXX
- * Truly spectacular layering violation.
- */
- if ((ret = __mp_xxx_fh(dbp->mpf, &fhp)) != 0)
- return (ret);
- if (F_ISSET(fhp, DB_FH_VALID)) {
- *fdp = fhp->fd;
- return (0);
- } else {
- *fdp = -1;
- __db_err(dbp->dbenv, "DB does not have a valid file handle.");
- return (ENOENT);
- }
- }
- /*
- * __db_get --
- * Return a key/data pair.
- *
- * PUBLIC: int __db_get __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
- */
- int
- __db_get(dbp, txn, key, data, flags)
- DB *dbp;
- DB_TXN *txn;
- DBT *key, *data;
- u_int32_t flags;
- {
- DBC *dbc;
- int mode, ret, t_ret;
- PANIC_CHECK(dbp->dbenv);
- DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->get");
- if ((ret = __db_getchk(dbp, key, data, flags)) != 0)
- return (ret);
- mode = 0;
- if (flags == DB_CONSUME || flags == DB_CONSUME_WAIT)
- mode = DB_WRITELOCK;
- if ((ret = dbp->cursor(dbp, txn, &dbc, mode)) != 0)
- return (ret);
- DEBUG_LREAD(dbc, txn, "__db_get", key, NULL, flags);
- /*
- * The DBC_TRANSIENT flag indicates that we're just doing a
- * single operation with this cursor, and that in case of
- * error we don't need to restore it to its old position--we're
- * going to close it right away. Thus, we can perform the get
- * without duplicating the cursor, saving some cycles in this
- * common case.
- */
- F_SET(dbc, DBC_TRANSIENT);
- ret = dbc->c_get(dbc, key, data,
- flags == 0 || flags == DB_RMW ? flags | DB_SET : flags);
- if ((t_ret = __db_c_close(dbc)) != 0 && ret == 0)
- ret = t_ret;
- return (ret);
- }
- /*
- * __db_put --
- * Store a key/data pair.
- *
- * PUBLIC: int __db_put __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
- */
- int
- __db_put(dbp, txn, key, data, flags)
- DB *dbp;
- DB_TXN *txn;
- DBT *key, *data;
- u_int32_t flags;
- {
- DBC *dbc;
- DBT tdata;
- int ret, t_ret;
- PANIC_CHECK(dbp->dbenv);
- DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->put");
- if ((ret = __db_putchk(dbp, key, data,
- flags, F_ISSET(dbp, DB_AM_RDONLY),
- F_ISSET(dbp, DB_AM_DUP) || F_ISSET(key, DB_DBT_DUPOK))) != 0)
- return (ret);
- DB_CHECK_TXN(dbp, txn);
- if ((ret = dbp->cursor(dbp, txn, &dbc, DB_WRITELOCK)) != 0)
- return (ret);
- /*
- * See the comment in __db_get().
- *
- * Note that the c_get in the DB_NOOVERWRITE case is safe to
- * do with this flag set; if it errors in any way other than
- * DB_NOTFOUND, we're going to close the cursor without doing
- * anything else, and if it returns DB_NOTFOUND then it's safe
- * to do a c_put(DB_KEYLAST) even if an access method moved the
- * cursor, since that's not position-dependent.
- */
- F_SET(dbc, DBC_TRANSIENT);
- DEBUG_LWRITE(dbc, txn, "__db_put", key, data, flags);
- if (flags == DB_NOOVERWRITE) {
- flags = 0;
- /*
- * Set DB_DBT_USERMEM, this might be a threaded application and
- * the flags checking will catch us. We don't want the actual
- * data, so request a partial of length 0.
- */
- memset(&tdata, 0, sizeof(tdata));
- F_SET(&tdata, DB_DBT_USERMEM | DB_DBT_PARTIAL);
- /*
- * If we're doing page-level locking, set the read-modify-write
- * flag, we're going to overwrite immediately.
- */
- if ((ret = dbc->c_get(dbc, key, &tdata,
- DB_SET | (STD_LOCKING(dbc) ? DB_RMW : 0))) == 0)
- ret = DB_KEYEXIST;
- else if (ret == DB_NOTFOUND)
- ret = 0;
- }
- if (ret == 0)
- ret = dbc->c_put(dbc,
- key, data, flags == 0 ? DB_KEYLAST : flags);
- if ((t_ret = __db_c_close(dbc)) != 0 && ret == 0)
- ret = t_ret;
- return (ret);
- }
- /*
- * __db_sync --
- * Flush the database cache.
- *
- * PUBLIC: int __db_sync __P((DB *, u_int32_t));
- */
- int
- __db_sync(dbp, flags)
- DB *dbp;
- u_int32_t flags;
- {
- int ret, t_ret;
- PANIC_CHECK(dbp->dbenv);
- DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->sync");
- if ((ret = __db_syncchk(dbp, flags)) != 0)
- return (ret);
- /* Read-only trees never need to be sync'd. */
- if (F_ISSET(dbp, DB_AM_RDONLY))
- return (0);
- /* If it's a Recno tree, write the backing source text file. */
- if (dbp->type == DB_RECNO)
- ret = __ram_writeback(dbp);
- /* If the tree was never backed by a database file, we're done. */
- if (F_ISSET(dbp, DB_AM_INMEM))
- return (0);
- /* Flush any dirty pages from the cache to the backing file. */
- if ((t_ret = memp_fsync(dbp->mpf)) != 0 && ret == 0)
- ret = t_ret;
- return (ret);
- }