hash.c
上传用户:romrleung
上传日期:2022-05-23
资源大小:18897k
文件大小:52k
- /*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996-2002
- * Sleepycat Software. All rights reserved.
- */
- /*
- * Copyright (c) 1990, 1993, 1994
- * Margo Seltzer. All rights reserved.
- */
- /*
- * Copyright (c) 1990, 1993, 1994
- * The Regents of the University of California. All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Margo Seltzer.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
- #include "db_config.h"
- #ifndef lint
- static const char revid[] = "$Id: hash.c,v 11.166 2002/08/06 06:11:25 bostic Exp $";
- #endif /* not lint */
- #ifndef NO_SYSTEM_INCLUDES
- #include <sys/types.h>
- #include <stdlib.h>
- #include <string.h>
- #endif
- #include "db_int.h"
- #include "dbinc/db_page.h"
- #include "dbinc/db_shash.h"
- #include "dbinc/btree.h"
- #include "dbinc/hash.h"
- #include "dbinc/lock.h"
- static int __ham_bulk __P((DBC *, DBT *, u_int32_t));
- static int __ham_c_close __P((DBC *, db_pgno_t, int *));
- static int __ham_c_del __P((DBC *));
- static int __ham_c_destroy __P((DBC *));
- static int __ham_c_get __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *));
- static int __ham_c_put __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *));
- static int __ham_c_writelock __P((DBC *));
- static int __ham_dup_return __P((DBC *, DBT *, u_int32_t));
- static int __ham_expand_table __P((DBC *));
- static int __ham_lookup __P((DBC *,
- const DBT *, u_int32_t, db_lockmode_t, db_pgno_t *));
- static int __ham_overwrite __P((DBC *, DBT *, u_int32_t));
- /*
- * __ham_quick_delete --
- * When performing a DB->del operation that does not involve secondary
- * indices and is not removing an off-page duplicate tree, we can
- * speed things up substantially by removing the entire duplicate
- * set, if any is present, in one operation, rather than by conjuring
- * up and deleting each of the items individually. (All are stored
- * in one big HKEYDATA structure.) We don't bother to distinguish
- * on-page duplicate sets from single, non-dup items; they're deleted
- * in exactly the same way.
- *
- * This function is called by __db_delete when the appropriate
- * conditions are met, and it performs the delete in the optimized way.
- *
- * The cursor should be set to the first item in the duplicate
- * set, or to the sole key/data pair when the key does not have a
- * duplicate set, before the function is called.
- *
- * PUBLIC: int __ham_quick_delete __P((DBC *));
- */
- int
- __ham_quick_delete(dbc)
- DBC *dbc;
- {
- int ret, t_ret;
- if ((ret = __ham_get_meta(dbc)) != 0)
- return (ret);
- /* Assert that we're not using secondary indices. */
- DB_ASSERT(!F_ISSET(dbc->dbp, DB_AM_SECONDARY));
- /*
- * We should assert that we're not a primary either, but that
- * would require grabbing the dbp's mutex, so we don't bother.
- */
- /* Assert that we're set, but not to an off-page duplicate. */
- DB_ASSERT(IS_INITIALIZED(dbc));
- DB_ASSERT(((HASH_CURSOR *)dbc->internal)->opd == NULL);
- ret = __ham_del_pair(dbc, 1);
- if ((t_ret = __ham_release_meta(dbc)) != 0 && ret == 0)
- ret = t_ret;
- return (ret);
- }
- /* ****************** CURSORS ********************************** */
- /*
- * __ham_c_init --
- * Initialize the hash-specific portion of a cursor.
- *
- * PUBLIC: int __ham_c_init __P((DBC *));
- */
- int
- __ham_c_init(dbc)
- DBC *dbc;
- {
- DB_ENV *dbenv;
- HASH_CURSOR *new_curs;
- int ret;
- dbenv = dbc->dbp->dbenv;
- if ((ret = __os_calloc(dbenv,
- 1, sizeof(struct cursor_t), &new_curs)) != 0)
- return (ret);
- if ((ret = __os_malloc(dbenv,
- dbc->dbp->pgsize, &new_curs->split_buf)) != 0) {
- __os_free(dbenv, new_curs);
- return (ret);
- }
- dbc->internal = (DBC_INTERNAL *) new_curs;
- dbc->c_close = __db_c_close;
- dbc->c_count = __db_c_count;
- dbc->c_del = __db_c_del;
- dbc->c_dup = __db_c_dup;
- dbc->c_get = dbc->c_real_get = __db_c_get;
- dbc->c_pget = __db_c_pget;
- dbc->c_put = __db_c_put;
- dbc->c_am_bulk = __ham_bulk;
- dbc->c_am_close = __ham_c_close;
- dbc->c_am_del = __ham_c_del;
- dbc->c_am_destroy = __ham_c_destroy;
- dbc->c_am_get = __ham_c_get;
- dbc->c_am_put = __ham_c_put;
- dbc->c_am_writelock = __ham_c_writelock;
- __ham_item_init(dbc);
- return (0);
- }
- /*
- * __ham_c_close --
- * Close down the cursor from a single use.
- */
- static int
- __ham_c_close(dbc, root_pgno, rmroot)
- DBC *dbc;
- db_pgno_t root_pgno;
- int *rmroot;
- {
- DB_MPOOLFILE *mpf;
- HASH_CURSOR *hcp;
- HKEYDATA *dp;
- int doroot, gotmeta, ret, t_ret;
- u_int32_t dirty;
- COMPQUIET(rmroot, 0);
- mpf = dbc->dbp->mpf;
- dirty = 0;
- doroot = gotmeta = ret = 0;
- hcp = (HASH_CURSOR *) dbc->internal;
- /* Check for off page dups. */
- if (dbc->internal->opd != NULL) {
- if ((ret = __ham_get_meta(dbc)) != 0)
- goto done;
- gotmeta = 1;
- if ((ret = __ham_get_cpage(dbc, DB_LOCK_READ)) != 0)
- goto out;
- dp = (HKEYDATA *)H_PAIRDATA(dbc->dbp, hcp->page, hcp->indx);
- /* If its not a dup we aborted before we changed it. */
- if (HPAGE_PTYPE(dp) == H_OFFDUP)
- memcpy(&root_pgno,
- HOFFPAGE_PGNO(dp), sizeof(db_pgno_t));
- else
- root_pgno = PGNO_INVALID;
- if ((ret =
- hcp->opd->c_am_close(hcp->opd, root_pgno, &doroot)) != 0)
- goto out;
- if (doroot != 0) {
- if ((ret = __ham_del_pair(dbc, 1)) != 0)
- goto out;
- dirty = DB_MPOOL_DIRTY;
- }
- }
- out: if (hcp->page != NULL && (t_ret =
- mpf->put(mpf, hcp->page, dirty)) != 0 && ret == 0)
- ret = t_ret;
- if (gotmeta != 0 && (t_ret = __ham_release_meta(dbc)) != 0 && ret == 0)
- ret = t_ret;
- done:
- __ham_item_init(dbc);
- return (ret);
- }
- /*
- * __ham_c_destroy --
- * Cleanup the access method private part of a cursor.
- */
- static int
- __ham_c_destroy(dbc)
- DBC *dbc;
- {
- HASH_CURSOR *hcp;
- hcp = (HASH_CURSOR *)dbc->internal;
- if (hcp->split_buf != NULL)
- __os_free(dbc->dbp->dbenv, hcp->split_buf);
- __os_free(dbc->dbp->dbenv, hcp);
- return (0);
- }
- /*
- * __ham_c_count --
- * Return a count of on-page duplicates.
- *
- * PUBLIC: int __ham_c_count __P((DBC *, db_recno_t *));
- */
- int
- __ham_c_count(dbc, recnop)
- DBC *dbc;
- db_recno_t *recnop;
- {
- DB *dbp;
- DB_MPOOLFILE *mpf;
- HASH_CURSOR *hcp;
- db_indx_t len;
- db_recno_t recno;
- int ret, t_ret;
- u_int8_t *p, *pend;
- dbp = dbc->dbp;
- mpf = dbp->mpf;
- hcp = (HASH_CURSOR *)dbc->internal;
- recno = 0;
- if ((ret = __ham_get_cpage(dbc, DB_LOCK_READ)) != 0)
- return (ret);
- switch (HPAGE_PTYPE(H_PAIRDATA(dbp, hcp->page, hcp->indx))) {
- case H_KEYDATA:
- case H_OFFPAGE:
- recno = 1;
- break;
- case H_DUPLICATE:
- p = HKEYDATA_DATA(H_PAIRDATA(dbp, hcp->page, hcp->indx));
- pend = p +
- LEN_HDATA(dbp, hcp->page, dbp->pgsize, hcp->indx);
- for (; p < pend; recno++) {
- /* p may be odd, so copy rather than just dereffing */
- memcpy(&len, p, sizeof(db_indx_t));
- p += 2 * sizeof(db_indx_t) + len;
- }
- break;
- default:
- ret = __db_pgfmt(dbp->dbenv, hcp->pgno);
- goto err;
- }
- *recnop = recno;
- err: if ((t_ret = mpf->put(mpf, hcp->page, 0)) != 0 && ret == 0)
- ret = t_ret;
- hcp->page = NULL;
- return (ret);
- }
- static int
- __ham_c_del(dbc)
- DBC *dbc;
- {
- DB *dbp;
- DBT repldbt;
- DB_MPOOLFILE *mpf;
- HASH_CURSOR *hcp;
- int ret, t_ret;
- dbp = dbc->dbp;
- mpf = dbp->mpf;
- hcp = (HASH_CURSOR *)dbc->internal;
- if (F_ISSET(hcp, H_DELETED))
- return (DB_NOTFOUND);
- if ((ret = __ham_get_meta(dbc)) != 0)
- goto out;
- if ((ret = __ham_get_cpage(dbc, DB_LOCK_WRITE)) != 0)
- goto out;
- /* Off-page duplicates. */
- if (HPAGE_TYPE(dbp, hcp->page, H_DATAINDEX(hcp->indx)) == H_OFFDUP)
- goto out;
- if (F_ISSET(hcp, H_ISDUP)) { /* On-page duplicate. */
- if (hcp->dup_off == 0 &&
- DUP_SIZE(hcp->dup_len) == LEN_HDATA(dbp, hcp->page,
- hcp->hdr->dbmeta.pagesize, hcp->indx))
- ret = __ham_del_pair(dbc, 1);
- else {
- repldbt.flags = 0;
- F_SET(&repldbt, DB_DBT_PARTIAL);
- repldbt.doff = hcp->dup_off;
- repldbt.dlen = DUP_SIZE(hcp->dup_len);
- repldbt.size = 0;
- repldbt.data = HKEYDATA_DATA(H_PAIRDATA(dbp, hcp->page,
- hcp->indx));
- if ((ret = __ham_replpair(dbc, &repldbt, 0)) == 0) {
- hcp->dup_tlen -= DUP_SIZE(hcp->dup_len);
- F_SET(hcp, H_DELETED);
- ret = __ham_c_update(dbc,
- DUP_SIZE(hcp->dup_len), 0, 1);
- }
- }
- } else /* Not a duplicate */
- ret = __ham_del_pair(dbc, 1);
- out: if (hcp->page != NULL) {
- if ((t_ret = mpf->put(mpf,
- hcp->page, ret == 0 ? DB_MPOOL_DIRTY : 0)) && ret == 0)
- ret = t_ret;
- hcp->page = NULL;
- }
- if ((t_ret = __ham_release_meta(dbc)) != 0 && ret == 0)
- ret = t_ret;
- return (ret);
- }
- /*
- * __ham_c_dup --
- * Duplicate a hash cursor, such that the new one holds appropriate
- * locks for the position of the original.
- *
- * PUBLIC: int __ham_c_dup __P((DBC *, DBC *));
- */
- int
- __ham_c_dup(orig_dbc, new_dbc)
- DBC *orig_dbc, *new_dbc;
- {
- HASH_CURSOR *orig, *new;
- orig = (HASH_CURSOR *)orig_dbc->internal;
- new = (HASH_CURSOR *)new_dbc->internal;
- new->bucket = orig->bucket;
- new->lbucket = orig->lbucket;
- new->dup_off = orig->dup_off;
- new->dup_len = orig->dup_len;
- new->dup_tlen = orig->dup_tlen;
- if (F_ISSET(orig, H_DELETED))
- F_SET(new, H_DELETED);
- if (F_ISSET(orig, H_ISDUP))
- F_SET(new, H_ISDUP);
- /*
- * If the old cursor held a lock and we're not in transactions, get one
- * for the new one. The reason that we don't need a new lock if we're
- * in a transaction is because we already hold a lock and will continue
- * to do so until commit, so there is no point in reaquiring it. We
- * don't know if the old lock was a read or write lock, but it doesn't
- * matter. We'll get a read lock. We know that this locker already
- * holds a lock of the correct type, so if we need a write lock and
- * request it, we know that we'll get it.
- */
- if (!LOCK_ISSET(orig->lock) || orig_dbc->txn != NULL)
- return (0);
- return (__ham_lock_bucket(new_dbc, DB_LOCK_READ));
- }
- static int
- __ham_c_get(dbc, key, data, flags, pgnop)
- DBC *dbc;
- DBT *key;
- DBT *data;
- u_int32_t flags;
- db_pgno_t *pgnop;
- {
- DB *dbp;
- DB_MPOOLFILE *mpf;
- HASH_CURSOR *hcp;
- db_lockmode_t lock_type;
- int get_key, ret, t_ret;
- hcp = (HASH_CURSOR *)dbc->internal;
- dbp = dbc->dbp;
- mpf = dbp->mpf;
- /* Clear OR'd in additional bits so we can check for flag equality. */
- if (F_ISSET(dbc, DBC_RMW))
- lock_type = DB_LOCK_WRITE;
- else
- lock_type = DB_LOCK_READ;
- if ((ret = __ham_get_meta(dbc)) != 0)
- return (ret);
- hcp->seek_size = 0;
- ret = 0;
- get_key = 1;
- switch (flags) {
- case DB_PREV_NODUP:
- F_SET(hcp, H_NEXT_NODUP);
- /* FALLTHROUGH */
- case DB_PREV:
- if (IS_INITIALIZED(dbc)) {
- ret = __ham_item_prev(dbc, lock_type, pgnop);
- break;
- }
- /* FALLTHROUGH */
- case DB_LAST:
- ret = __ham_item_last(dbc, lock_type, pgnop);
- break;
- case DB_NEXT_NODUP:
- F_SET(hcp, H_NEXT_NODUP);
- /* FALLTHROUGH */
- case DB_NEXT:
- if (IS_INITIALIZED(dbc)) {
- ret = __ham_item_next(dbc, lock_type, pgnop);
- break;
- }
- /* FALLTHROUGH */
- case DB_FIRST:
- ret = __ham_item_first(dbc, lock_type, pgnop);
- break;
- case DB_NEXT_DUP:
- /* cgetchk has already determined that the cursor is set. */
- F_SET(hcp, H_DUPONLY);
- ret = __ham_item_next(dbc, lock_type, pgnop);
- break;
- case DB_SET:
- case DB_SET_RANGE:
- case DB_GET_BOTH:
- case DB_GET_BOTH_RANGE:
- ret = __ham_lookup(dbc, key, 0, lock_type, pgnop);
- get_key = 0;
- break;
- case DB_GET_BOTHC:
- F_SET(hcp, H_DUPONLY);
- ret = __ham_item_next(dbc, lock_type, pgnop);
- get_key = 0;
- break;
- case DB_CURRENT:
- /* cgetchk has already determined that the cursor is set. */
- if (F_ISSET(hcp, H_DELETED)) {
- ret = DB_KEYEMPTY;
- goto err;
- }
- ret = __ham_item(dbc, lock_type, pgnop);
- break;
- }
- /*
- * Must always enter this loop to do error handling and
- * check for big key/data pair.
- */
- for (;;) {
- if (ret != 0 && ret != DB_NOTFOUND)
- goto err;
- else if (F_ISSET(hcp, H_OK)) {
- if (*pgnop == PGNO_INVALID)
- ret = __ham_dup_return(dbc, data, flags);
- break;
- } else if (!F_ISSET(hcp, H_NOMORE)) {
- __db_err(dbp->dbenv,
- "H_NOMORE returned to __ham_c_get");
- ret = EINVAL;
- break;
- }
- /*
- * Ran out of entries in a bucket; change buckets.
- */
- switch (flags) {
- case DB_LAST:
- case DB_PREV:
- case DB_PREV_NODUP:
- ret = mpf->put(mpf, hcp->page, 0);
- hcp->page = NULL;
- if (hcp->bucket == 0) {
- ret = DB_NOTFOUND;
- hcp->pgno = PGNO_INVALID;
- goto err;
- }
- F_CLR(hcp, H_ISDUP);
- hcp->bucket--;
- hcp->indx = NDX_INVALID;
- hcp->pgno = BUCKET_TO_PAGE(hcp, hcp->bucket);
- if (ret == 0)
- ret = __ham_item_prev(dbc,
- lock_type, pgnop);
- break;
- case DB_FIRST:
- case DB_NEXT:
- case DB_NEXT_NODUP:
- ret = mpf->put(mpf, hcp->page, 0);
- hcp->page = NULL;
- hcp->indx = NDX_INVALID;
- hcp->bucket++;
- F_CLR(hcp, H_ISDUP);
- hcp->pgno = BUCKET_TO_PAGE(hcp, hcp->bucket);
- if (hcp->bucket > hcp->hdr->max_bucket) {
- ret = DB_NOTFOUND;
- hcp->pgno = PGNO_INVALID;
- goto err;
- }
- if (ret == 0)
- ret = __ham_item_next(dbc,
- lock_type, pgnop);
- break;
- case DB_GET_BOTH:
- case DB_GET_BOTHC:
- case DB_GET_BOTH_RANGE:
- case DB_NEXT_DUP:
- case DB_SET:
- case DB_SET_RANGE:
- /* Key not found. */
- ret = DB_NOTFOUND;
- goto err;
- case DB_CURRENT:
- /*
- * This should only happen if you are doing
- * deletes and reading with concurrent threads
- * and not doing proper locking. We return
- * the same error code as we would if the
- * cursor were deleted.
- */
- ret = DB_KEYEMPTY;
- goto err;
- default:
- DB_ASSERT(0);
- }
- }
- if (get_key == 0)
- F_SET(key, DB_DBT_ISSET);
- err: if ((t_ret = __ham_release_meta(dbc)) != 0 && ret == 0)
- ret = t_ret;
- F_CLR(hcp, H_DUPONLY);
- F_CLR(hcp, H_NEXT_NODUP);
- return (ret);
- }
- /*
- * __ham_bulk -- Return bulk data from a hash table.
- */
- static int
- __ham_bulk(dbc, data, flags)
- DBC *dbc;
- DBT *data;
- u_int32_t flags;
- {
- DB *dbp;
- DB_MPOOLFILE *mpf;
- HASH_CURSOR *cp;
- PAGE *pg;
- db_indx_t dup_len, dup_off, dup_tlen, indx, *inp;
- db_lockmode_t lock_mode;
- db_pgno_t pgno;
- int32_t *endp, key_off, *offp, *saveoff;
- u_int32_t key_size, size, space;
- u_int8_t *dbuf, *dp, *hk, *np, *tmp;
- int is_dup, is_key;
- int need_pg, next_key, no_dup, pagesize, ret, t_ret;
- ret = 0;
- key_off = 0;
- dup_len = dup_off = dup_tlen = 0;
- size = 0;
- dbp = dbc->dbp;
- pagesize = dbp->pgsize;
- mpf = dbp->mpf;
- cp = (HASH_CURSOR *)dbc->internal;
- is_key = LF_ISSET(DB_MULTIPLE_KEY) ? 1 : 0;
- next_key = is_key && LF_ISSET(DB_OPFLAGS_MASK) != DB_NEXT_DUP;
- no_dup = LF_ISSET(DB_OPFLAGS_MASK) == DB_NEXT_NODUP;
- dbuf = data->data;
- np = dp = dbuf;
- /* Keep track of space that is left. There is an termination entry */
- space = data->ulen;
- space -= sizeof(*offp);
- /* Build the offset/size table from the end up. */
- endp = (int32_t *) ((u_int8_t *)dbuf + data->ulen);
- endp--;
- offp = endp;
- key_size = 0;
- lock_mode = F_ISSET(dbc, DBC_RMW) ? DB_LOCK_WRITE: DB_LOCK_READ;
- next_pg:
- need_pg = 1;
- indx = cp->indx;
- pg = cp->page;
- inp = P_INP(dbp, pg);
- do {
- if (is_key) {
- hk = H_PAIRKEY(dbp, pg, indx);
- if (HPAGE_PTYPE(hk) == H_OFFPAGE) {
- memcpy(&key_size,
- HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
- memcpy(&pgno,
- HOFFPAGE_PGNO(hk), sizeof(db_pgno_t));
- size = key_size;
- if (key_size > space)
- goto get_key_space;
- if ((ret = __bam_bulk_overflow(
- dbc, key_size, pgno, np)) != 0)
- return (ret);
- space -= key_size;
- key_off = (int32_t)(np - dbuf);
- np += key_size;
- } else {
- if (need_pg) {
- dp = np;
- size = pagesize - HOFFSET(pg);
- if (space < size) {
- get_key_space:
- if (offp == endp) {
- data->size =
- ALIGN(size +
- pagesize,
- sizeof(u_int32_t));
- return (ENOMEM);
- }
- goto back_up;
- }
- memcpy(dp,
- (u_int8_t *)pg + HOFFSET(pg), size);
- need_pg = 0;
- space -= size;
- np += size;
- }
- key_size = LEN_HKEY(dbp, pg, pagesize, indx);
- key_off = (int32_t)(inp[indx] - HOFFSET(pg)
- + dp - dbuf + SSZA(HKEYDATA, data));
- }
- }
- hk = H_PAIRDATA(dbp, pg, indx);
- switch (HPAGE_PTYPE(hk)) {
- case H_DUPLICATE:
- case H_KEYDATA:
- if (need_pg) {
- dp = np;
- size = pagesize - HOFFSET(pg);
- if (space < size) {
- back_up:
- if (indx != 0) {
- indx -= 2;
- /* XXX
- * It's not clear that this is
- * the right way to fix this,
- * but here goes.
- * If we are backing up onto a
- * duplicate, then we need to
- * position ourselves at the
- * end of the duplicate set.
- * We probably need to make
- * this work for H_OFFDUP too.
- * It might be worth making a
- * dummy cursor and calling
- * __ham_item_prev.
- */
- tmp = H_PAIRDATA(dbp, pg, indx);
- if (HPAGE_PTYPE(tmp) ==
- H_DUPLICATE) {
- dup_off = dup_tlen =
- LEN_HDATA(dbp, pg,
- pagesize, indx + 1);
- memcpy(&dup_len,
- HKEYDATA_DATA(tmp),
- sizeof(db_indx_t));
- }
- goto get_space;
- }
- /* indx == 0 */
- if ((ret = __ham_item_prev(dbc,
- lock_mode, &pgno)) != 0) {
- if (ret != DB_NOTFOUND)
- return (ret);
- if ((ret = mpf->put(mpf,
- cp->page, 0)) != 0)
- return (ret);
- cp->page = NULL;
- if (cp->bucket == 0) {
- cp->indx = indx =
- NDX_INVALID;
- goto get_space;
- }
- if ((ret =
- __ham_get_meta(dbc)) != 0)
- return (ret);
- cp->bucket--;
- cp->pgno = BUCKET_TO_PAGE(cp,
- cp->bucket);
- cp->indx = NDX_INVALID;
- if ((ret = __ham_release_meta(
- dbc)) != 0)
- return (ret);
- if ((ret = __ham_item_prev(dbc,
- lock_mode, &pgno)) != 0)
- return (ret);
- }
- indx = cp->indx;
- get_space:
- /*
- * See if we put any data in the buffer.
- */
- if (offp >= endp ||
- F_ISSET(dbc, DBC_TRANSIENT)) {
- data->size = ALIGN(size +
- data->ulen - space,
- sizeof(u_int32_t));
- return (ENOMEM);
- }
- /*
- * Don't continue; we're all out
- * of space, even though we're
- * returning success.
- */
- next_key = 0;
- break;
- }
- memcpy(dp, (u_int8_t *)pg + HOFFSET(pg), size);
- need_pg = 0;
- space -= size;
- np += size;
- }
- /*
- * We're about to crack the offset(s) and length(s)
- * out of an H_KEYDATA or H_DUPLICATE item.
- * There are three cases:
- * 1. We were moved into a duplicate set by
- * the standard hash cursor code. Respect
- * the dup_off and dup_tlen we were given.
- * 2. We stumbled upon a duplicate set while
- * walking the page on our own. We need to
- * recognize it as a dup and set dup_off and
- * dup_tlen.
- * 3. The current item is not a dup.
- */
- if (F_ISSET(cp, H_ISDUP)) {
- /* Case 1 */
- is_dup = 1;
- dup_len = cp->dup_len;
- dup_off = cp->dup_off;
- dup_tlen = cp->dup_tlen;
- } else if (HPAGE_PTYPE(hk) == H_DUPLICATE) {
- /* Case 2 */
- is_dup = 1;
- /*
- * If we run out of memory and bail,
- * make sure the fact we're in a dup set
- * isn't ignored later.
- */
- F_SET(cp, H_ISDUP);
- dup_off = 0;
- memcpy(&dup_len,
- HKEYDATA_DATA(hk), sizeof(db_indx_t));
- dup_tlen = LEN_HDATA(dbp, pg, pagesize, indx);
- } else
- /* Case 3 */
- is_dup = dup_len = dup_off = dup_tlen = 0;
- do {
- space -= (is_key ? 4 : 2) * sizeof(*offp);
- size += (is_key ? 4 : 2) * sizeof(*offp);
- /*
- * Since space is an unsigned, if we happen
- * to wrap, then this comparison will turn out
- * to be true. XXX Wouldn't it be better to
- * simply check above that space is greater than
- * the value we're about to subtract???
- */
- if (space > data->ulen) {
- if (!is_dup || dup_off == 0)
- goto back_up;
- dup_off -= (db_indx_t)DUP_SIZE(offp[1]);
- goto get_space;
- }
- if (is_key) {
- *offp-- = key_off;
- *offp-- = key_size;
- }
- if (is_dup) {
- *offp-- = (int32_t)(
- inp[indx + 1] - HOFFSET(pg) +
- dp - dbuf + SSZA(HKEYDATA, data) +
- dup_off + sizeof(db_indx_t));
- memcpy(&dup_len,
- HKEYDATA_DATA(hk) + dup_off,
- sizeof(db_indx_t));
- dup_off += DUP_SIZE(dup_len);
- *offp-- = dup_len;
- } else {
- *offp-- = (int32_t)(
- inp[indx + 1] - HOFFSET(pg) +
- dp - dbuf + SSZA(HKEYDATA, data));
- *offp-- = LEN_HDATA(dbp, pg,
- pagesize, indx);
- }
- } while (is_dup && dup_off < dup_tlen && no_dup == 0);
- F_CLR(cp, H_ISDUP);
- break;
- case H_OFFDUP:
- memcpy(&pgno, HOFFPAGE_PGNO(hk), sizeof(db_pgno_t));
- space -= 2 * sizeof(*offp);
- if (space > data->ulen)
- goto back_up;
- if (is_key) {
- space -= 2 * sizeof(*offp);
- if (space > data->ulen)
- goto back_up;
- *offp-- = key_off;
- *offp-- = key_size;
- }
- saveoff = offp;
- if ((ret = __bam_bulk_duplicates(dbc,
- pgno, dbuf, is_key ? offp + 2 : NULL,
- &offp, &np, &space, no_dup)) != 0) {
- if (ret == ENOMEM) {
- size = space;
- if (is_key && saveoff == offp) {
- offp += 2;
- goto back_up;
- }
- goto get_space;
- }
- return (ret);
- }
- break;
- case H_OFFPAGE:
- space -= (is_key ? 4 : 2) * sizeof(*offp);
- if (space > data->ulen)
- goto back_up;
- memcpy(&size, HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
- memcpy(&pgno, HOFFPAGE_PGNO(hk), sizeof(db_pgno_t));
- if (size > space)
- goto back_up;
- if ((ret =
- __bam_bulk_overflow(dbc, size, pgno, np)) != 0)
- return (ret);
- if (is_key) {
- *offp-- = key_off;
- *offp-- = key_size;
- }
- *offp-- = (int32_t)(np - dbuf);
- *offp-- = size;
- np += size;
- space -= size;
- break;
- }
- } while (next_key && (indx += 2) < NUM_ENT(pg));
- cp->indx = indx;
- cp->dup_len = dup_len;
- cp->dup_off = dup_off;
- cp->dup_tlen = dup_tlen;
- /* If we are off the page then try to the next page. */
- if (ret == 0 && next_key && indx >= NUM_ENT(pg)) {
- if ((ret = __ham_item_next(dbc, lock_mode, &pgno)) == 0)
- goto next_pg;
- if (ret != DB_NOTFOUND)
- return (ret);
- if ((ret = mpf->put(dbc->dbp->mpf, cp->page, 0)) != 0)
- return (ret);
- cp->page = NULL;
- if ((ret = __ham_get_meta(dbc)) != 0)
- return (ret);
- cp->bucket++;
- if (cp->bucket > cp->hdr->max_bucket) {
- /*
- * Restore cursor to its previous state. We're past
- * the last item in the last bucket, so the next
- * DBC->c_get(DB_NEXT) will return DB_NOTFOUND.
- */
- cp->bucket--;
- ret = DB_NOTFOUND;
- } else {
- /*
- * Start on the next bucket.
- *
- * Note that if this new bucket happens to be empty,
- * but there's another non-empty bucket after it,
- * we'll return early. This is a rare case, and we
- * don't guarantee any particular number of keys
- * returned on each call, so just let the next call
- * to bulk get move forward by yet another bucket.
- */
- cp->pgno = BUCKET_TO_PAGE(cp, cp->bucket);
- cp->indx = NDX_INVALID;
- F_CLR(cp, H_ISDUP);
- ret = __ham_item_next(dbc, lock_mode, &pgno);
- }
- if ((t_ret = __ham_release_meta(dbc)) != 0)
- return (t_ret);
- if (ret == 0)
- goto next_pg;
- if (ret != DB_NOTFOUND)
- return (ret);
- }
- *offp = (u_int32_t) -1;
- return (0);
- }
- static int
- __ham_c_put(dbc, key, data, flags, pgnop)
- DBC *dbc;
- DBT *key;
- DBT *data;
- u_int32_t flags;
- db_pgno_t *pgnop;
- {
- DB *dbp;
- DB_MPOOLFILE *mpf;
- DBT tmp_val, *myval;
- HASH_CURSOR *hcp;
- u_int32_t nbytes;
- int ret, t_ret;
- /*
- * The compiler doesn't realize that we only use this when ret is
- * equal to 0 and that if ret is equal to 0, that we must have set
- * myval. So, we initialize it here to shut the compiler up.
- */
- COMPQUIET(myval, NULL);
- dbp = dbc->dbp;
- mpf = dbp->mpf;
- hcp = (HASH_CURSOR *)dbc->internal;
- if (F_ISSET(hcp, H_DELETED) &&
- flags != DB_KEYFIRST && flags != DB_KEYLAST)
- return (DB_NOTFOUND);
- if ((ret = __ham_get_meta(dbc)) != 0)
- goto err1;
- switch (flags) {
- case DB_KEYLAST:
- case DB_KEYFIRST:
- case DB_NODUPDATA:
- nbytes = (ISBIG(hcp, key->size) ? HOFFPAGE_PSIZE :
- HKEYDATA_PSIZE(key->size)) +
- (ISBIG(hcp, data->size) ? HOFFPAGE_PSIZE :
- HKEYDATA_PSIZE(data->size));
- if ((ret = __ham_lookup(dbc,
- key, nbytes, DB_LOCK_WRITE, pgnop)) == DB_NOTFOUND) {
- ret = 0;
- if (hcp->seek_found_page != PGNO_INVALID &&
- hcp->seek_found_page != hcp->pgno) {
- if ((ret = mpf->put(mpf, hcp->page, 0)) != 0)
- goto err2;
- hcp->page = NULL;
- hcp->pgno = hcp->seek_found_page;
- hcp->indx = NDX_INVALID;
- }
- if (F_ISSET(data, DB_DBT_PARTIAL) && data->doff != 0) {
- /*
- * A partial put, but the key does not exist
- * and we are not beginning the write at 0.
- * We must create a data item padded up to doff
- * and then write the new bytes represented by
- * val.
- */
- if ((ret = __ham_init_dbt(dbp->dbenv, &tmp_val,
- data->size + data->doff,
- &dbc->my_rdata.data,
- &dbc->my_rdata.ulen)) == 0) {
- memset(tmp_val.data, 0, data->doff);
- memcpy((u_int8_t *)tmp_val.data +
- data->doff, data->data, data->size);
- myval = &tmp_val;
- }
- } else
- myval = (DBT *)data;
- if (ret == 0)
- ret = __ham_add_el(dbc, key, myval, H_KEYDATA);
- goto done;
- }
- break;
- case DB_BEFORE:
- case DB_AFTER:
- case DB_CURRENT:
- ret = __ham_item(dbc, DB_LOCK_WRITE, pgnop);
- break;
- }
- if (*pgnop == PGNO_INVALID && ret == 0) {
- if (flags == DB_CURRENT ||
- ((flags == DB_KEYFIRST ||
- flags == DB_KEYLAST || flags == DB_NODUPDATA) &&
- !(F_ISSET(dbp, DB_AM_DUP) || F_ISSET(key, DB_DBT_DUPOK))))
- ret = __ham_overwrite(dbc, data, flags);
- else
- ret = __ham_add_dup(dbc, data, flags, pgnop);
- }
- done: if (ret == 0 && F_ISSET(hcp, H_EXPAND)) {
- ret = __ham_expand_table(dbc);
- F_CLR(hcp, H_EXPAND);
- }
- if (hcp->page != NULL &&
- (t_ret = mpf->set(mpf, hcp->page, DB_MPOOL_DIRTY)) != 0 && ret == 0)
- ret = t_ret;
- err2: if ((t_ret = __ham_release_meta(dbc)) != 0 && ret == 0)
- ret = t_ret;
- err1: return (ret);
- }
- /********************************* UTILITIES ************************/
- /*
- * __ham_expand_table --
- */
- static int
- __ham_expand_table(dbc)
- DBC *dbc;
- {
- DB *dbp;
- DB_LOCK metalock;
- DB_LSN lsn;
- DB_MPOOLFILE *mpf;
- DBMETA *mmeta;
- HASH_CURSOR *hcp;
- PAGE *h;
- db_pgno_t pgno, mpgno;
- u_int32_t newalloc, new_bucket, old_bucket;
- int dirty_meta, got_meta, logn, new_double, ret;
- dbp = dbc->dbp;
- mpf = dbp->mpf;
- hcp = (HASH_CURSOR *)dbc->internal;
- if ((ret = __ham_dirty_meta(dbc)) != 0)
- return (ret);
- LOCK_INIT(metalock);
- mmeta = (DBMETA *) hcp->hdr;
- mpgno = mmeta->pgno;
- h = NULL;
- dirty_meta = 0;
- got_meta = 0;
- newalloc = 0;
- /*
- * If the split point is about to increase, make sure that we
- * have enough extra pages. The calculation here is weird.
- * We'd like to do this after we've upped max_bucket, but it's
- * too late then because we've logged the meta-data split. What
- * we'll do between then and now is increment max bucket and then
- * see what the log of one greater than that is; here we have to
- * look at the log of max + 2. VERY NASTY STUFF.
- *
- * We figure out what we need to do, then we log it, then request
- * the pages from mpool. We don't want to fail after extending
- * the file.
- *
- * If the page we are about to split into has already been allocated,
- * then we simply need to get it to get its LSN. If it hasn't yet
- * been allocated, then we know it's LSN (0,0).
- */
- new_bucket = hcp->hdr->max_bucket + 1;
- old_bucket = new_bucket & hcp->hdr->low_mask;
- new_double = hcp->hdr->max_bucket == hcp->hdr->high_mask;
- logn = __db_log2(new_bucket);
- if (!new_double || hcp->hdr->spares[logn + 1] != PGNO_INVALID) {
- /* Page exists; get it so we can get its LSN */
- pgno = BUCKET_TO_PAGE(hcp, new_bucket);
- if ((ret =
- mpf->get(mpf, &pgno, DB_MPOOL_CREATE, &h)) != 0)
- goto err;
- lsn = h->lsn;
- } else {
- /* Get the master meta-data page to do allocation. */
- if (F_ISSET(dbp, DB_AM_SUBDB)) {
- mpgno = PGNO_BASE_MD;
- if ((ret = __db_lget(dbc,
- 0, mpgno, DB_LOCK_WRITE, 0, &metalock)) != 0)
- goto err;
- if ((ret =
- mpf->get(mpf, &mpgno, 0, (PAGE **)&mmeta)) != 0)
- goto err;
- got_meta = 1;
- }
- pgno = mmeta->last_pgno + 1;
- ZERO_LSN(lsn);
- newalloc = 1;
- }
- /* Log the meta-data split first. */
- if (DBC_LOGGING(dbc)) {
- /*
- * We always log the page number of the first page of
- * the allocation group. However, the LSN that we log
- * is either the LSN on the first page (if we did not
- * do the actual allocation here) or the LSN on the last
- * page of the unit (if we did do the allocation here).
- */
- if ((ret = __ham_metagroup_log(dbp, dbc->txn,
- &lsn, 0, hcp->hdr->max_bucket, mpgno, &mmeta->lsn,
- hcp->hdr->dbmeta.pgno, &hcp->hdr->dbmeta.lsn,
- pgno, &lsn, newalloc)) != 0)
- goto err;
- } else
- LSN_NOT_LOGGED(lsn);
- hcp->hdr->dbmeta.lsn = lsn;
- if (new_double && hcp->hdr->spares[logn + 1] == PGNO_INVALID) {
- /*
- * We need to begin a new doubling and we have not allocated
- * any pages yet. Read the last page in and initialize it to
- * make the allocation contiguous. The pgno we calculated
- * above is the first page allocated. The entry in spares is
- * that page number minus any buckets already allocated (it
- * simplifies bucket to page transaction). After we've set
- * that, we calculate the last pgno.
- */
- hcp->hdr->spares[logn + 1] = pgno - new_bucket;
- pgno += hcp->hdr->max_bucket;
- mmeta->last_pgno = pgno;
- mmeta->lsn = lsn;
- dirty_meta = DB_MPOOL_DIRTY;
- if ((ret = mpf->get(mpf, &pgno, DB_MPOOL_CREATE, &h)) != 0)
- goto err;
- P_INIT(h, dbp->pgsize,
- pgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH);
- }
- /* Write out whatever page we ended up modifying. */
- h->lsn = lsn;
- if ((ret = mpf->put(mpf, h, DB_MPOOL_DIRTY)) != 0)
- goto err;
- h = NULL;
- /*
- * Update the meta-data page of this hash database.
- */
- hcp->hdr->max_bucket = new_bucket;
- if (new_double) {
- hcp->hdr->low_mask = hcp->hdr->high_mask;
- hcp->hdr->high_mask = new_bucket | hcp->hdr->low_mask;
- }
- /* Relocate records to the new bucket */
- ret = __ham_split_page(dbc, old_bucket, new_bucket);
- err: if (got_meta)
- (void)mpf->put(mpf, mmeta, dirty_meta);
- if (LOCK_ISSET(metalock))
- (void)__TLPUT(dbc, metalock);
- if (h != NULL)
- (void)mpf->put(mpf, h, 0);
- return (ret);
- }
- /*
- * PUBLIC: u_int32_t __ham_call_hash __P((DBC *, u_int8_t *, int32_t));
- */
- u_int32_t
- __ham_call_hash(dbc, k, len)
- DBC *dbc;
- u_int8_t *k;
- int32_t len;
- {
- DB *dbp;
- u_int32_t n, bucket;
- HASH_CURSOR *hcp;
- HASH *hashp;
- dbp = dbc->dbp;
- hcp = (HASH_CURSOR *)dbc->internal;
- hashp = dbp->h_internal;
- n = (u_int32_t)(hashp->h_hash(dbp, k, len));
- bucket = n & hcp->hdr->high_mask;
- if (bucket > hcp->hdr->max_bucket)
- bucket = bucket & hcp->hdr->low_mask;
- return (bucket);
- }
- /*
- * Check for duplicates, and call __db_ret appropriately. Release
- * everything held by the cursor.
- */
- static int
- __ham_dup_return(dbc, val, flags)
- DBC *dbc;
- DBT *val;
- u_int32_t flags;
- {
- DB *dbp;
- HASH_CURSOR *hcp;
- PAGE *pp;
- DBT *myval, tmp_val;
- db_indx_t ndx;
- db_pgno_t pgno;
- u_int32_t off, tlen;
- u_int8_t *hk, type;
- int cmp, ret;
- db_indx_t len;
- /* Check for duplicate and return the first one. */
- dbp = dbc->dbp;
- hcp = (HASH_CURSOR *)dbc->internal;
- ndx = H_DATAINDEX(hcp->indx);
- type = HPAGE_TYPE(dbp, hcp->page, ndx);
- pp = hcp->page;
- myval = val;
- /*
- * There are 4 cases:
- * 1. We are not in duplicate, simply return; the upper layer
- * will do the right thing.
- * 2. We are looking at keys and stumbled onto a duplicate.
- * 3. We are in the middle of a duplicate set. (ISDUP set)
- * 4. We need to check for particular data match.
- */
- /* We should never get here with off-page dups. */
- DB_ASSERT(type != H_OFFDUP);
- /* Case 1 */
- if (type != H_DUPLICATE && flags != DB_GET_BOTH &&
- flags != DB_GET_BOTHC && flags != DB_GET_BOTH_RANGE)
- return (0);
- /*
- * Here we check for the case where we just stumbled onto a
- * duplicate. In this case, we do initialization and then
- * let the normal duplicate code handle it. (Case 2)
- */
- if (!F_ISSET(hcp, H_ISDUP) && type == H_DUPLICATE) {
- F_SET(hcp, H_ISDUP);
- hcp->dup_tlen = LEN_HDATA(dbp, hcp->page,
- hcp->hdr->dbmeta.pagesize, hcp->indx);
- hk = H_PAIRDATA(dbp, hcp->page, hcp->indx);
- if (flags == DB_LAST ||
- flags == DB_PREV || flags == DB_PREV_NODUP) {
- hcp->dup_off = 0;
- do {
- memcpy(&len,
- HKEYDATA_DATA(hk) + hcp->dup_off,
- sizeof(db_indx_t));
- hcp->dup_off += DUP_SIZE(len);
- } while (hcp->dup_off < hcp->dup_tlen);
- hcp->dup_off -= DUP_SIZE(len);
- } else {
- memcpy(&len,
- HKEYDATA_DATA(hk), sizeof(db_indx_t));
- hcp->dup_off = 0;
- }
- hcp->dup_len = len;
- }
- /*
- * If we are retrieving a specific key/data pair, then we
- * may need to adjust the cursor before returning data.
- * Case 4
- */
- if (flags == DB_GET_BOTH ||
- flags == DB_GET_BOTHC || flags == DB_GET_BOTH_RANGE) {
- if (F_ISSET(hcp, H_ISDUP)) {
- /*
- * If we're doing a join, search forward from the
- * current position, not the beginning of the dup set.
- */
- if (flags == DB_GET_BOTHC)
- F_SET(hcp, H_CONTINUE);
- __ham_dsearch(dbc, val, &off, &cmp, flags);
- /*
- * This flag is set nowhere else and is safe to
- * clear unconditionally.
- */
- F_CLR(hcp, H_CONTINUE);
- hcp->dup_off = off;
- } else {
- hk = H_PAIRDATA(dbp, hcp->page, hcp->indx);
- if (((HKEYDATA *)hk)->type == H_OFFPAGE) {
- memcpy(&tlen,
- HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
- memcpy(&pgno,
- HOFFPAGE_PGNO(hk), sizeof(db_pgno_t));
- if ((ret = __db_moff(dbp, val,
- pgno, tlen, dbp->dup_compare, &cmp)) != 0)
- return (ret);
- } else {
- /*
- * We do not zero tmp_val since the comparison
- * routines may only look at data and size.
- */
- tmp_val.data = HKEYDATA_DATA(hk);
- tmp_val.size = LEN_HDATA(dbp, hcp->page,
- dbp->pgsize, hcp->indx);
- cmp = dbp->dup_compare == NULL ?
- __bam_defcmp(dbp, &tmp_val, val) :
- dbp->dup_compare(dbp, &tmp_val, val);
- }
- }
- if (cmp != 0)
- return (DB_NOTFOUND);
- }
- /*
- * If we're doing a bulk get, we don't want to actually return
- * the data: __ham_bulk will take care of cracking out the
- * duplicates appropriately.
- *
- * The rest of this function calculates partial offsets and
- * handles the actual __db_ret, so just return if
- * DB_MULTIPLE(_KEY) is set.
- */
- if (F_ISSET(dbc, DBC_MULTIPLE | DBC_MULTIPLE_KEY))
- return (0);
- /*
- * Now, everything is initialized, grab a duplicate if
- * necessary.
- */
- if (F_ISSET(hcp, H_ISDUP)) { /* Case 3 */
- /*
- * Copy the DBT in case we are retrieving into user
- * memory and we need the parameters for it. If the
- * user requested a partial, then we need to adjust
- * the user's parameters to get the partial of the
- * duplicate which is itself a partial.
- */
- memcpy(&tmp_val, val, sizeof(*val));
- if (F_ISSET(&tmp_val, DB_DBT_PARTIAL)) {
- /*
- * Take the user's length unless it would go
- * beyond the end of the duplicate.
- */
- if (tmp_val.doff + hcp->dup_off > hcp->dup_len)
- tmp_val.dlen = 0;
- else if (tmp_val.dlen + tmp_val.doff >
- hcp->dup_len)
- tmp_val.dlen =
- hcp->dup_len - tmp_val.doff;
- /*
- * Calculate the new offset.
- */
- tmp_val.doff += hcp->dup_off;
- } else {
- F_SET(&tmp_val, DB_DBT_PARTIAL);
- tmp_val.dlen = hcp->dup_len;
- tmp_val.doff = hcp->dup_off + sizeof(db_indx_t);
- }
- myval = &tmp_val;
- }
- /*
- * Finally, if we had a duplicate, pp, ndx, and myval should be
- * set appropriately.
- */
- if ((ret = __db_ret(dbp, pp, ndx, myval, &dbc->rdata->data,
- &dbc->rdata->ulen)) != 0)
- return (ret);
- /*
- * In case we sent a temporary off to db_ret, set the real
- * return values.
- */
- val->data = myval->data;
- val->size = myval->size;
- F_SET(val, DB_DBT_ISSET);
- return (0);
- }
- static int
- __ham_overwrite(dbc, nval, flags)
- DBC *dbc;
- DBT *nval;
- u_int32_t flags;
- {
- DB *dbp;
- DB_ENV *dbenv;
- HASH_CURSOR *hcp;
- DBT *myval, tmp_val, tmp_val2;
- void *newrec;
- u_int8_t *hk, *p;
- u_int32_t len, nondup_size;
- db_indx_t newsize;
- int ret;
- dbp = dbc->dbp;
- dbenv = dbp->dbenv;
- hcp = (HASH_CURSOR *)dbc->internal;
- if (F_ISSET(hcp, H_ISDUP)) {
- /*
- * This is an overwrite of a duplicate. We should never
- * be off-page at this point.
- */
- DB_ASSERT(hcp->opd == NULL);
- /* On page dups */
- if (F_ISSET(nval, DB_DBT_PARTIAL)) {
- /*
- * We're going to have to get the current item, then
- * construct the record, do any padding and do a
- * replace.
- */
- memset(&tmp_val, 0, sizeof(tmp_val));
- if ((ret =
- __ham_dup_return(dbc, &tmp_val, DB_CURRENT)) != 0)
- return (ret);
- /* Figure out new size. */
- nondup_size = tmp_val.size;
- newsize = nondup_size;
- /*
- * Three cases:
- * 1. strictly append (may need to allocate space
- * for pad bytes; really gross).
- * 2. overwrite some and append.
- * 3. strictly overwrite.
- */
- if (nval->doff > nondup_size)
- newsize +=
- (nval->doff - nondup_size + nval->size);
- else if (nval->doff + nval->dlen > nondup_size)
- newsize += nval->size -
- (nondup_size - nval->doff);
- else
- newsize += nval->size - nval->dlen;
- /*
- * Make sure that the new size doesn't put us over
- * the onpage duplicate size in which case we need
- * to convert to off-page duplicates.
- */
- if (ISBIG(hcp, hcp->dup_tlen - nondup_size + newsize)) {
- if ((ret = __ham_dup_convert(dbc)) != 0)
- return (ret);
- return (hcp->opd->c_am_put(hcp->opd,
- NULL, nval, flags, NULL));
- }
- if ((ret = __os_malloc(dbp->dbenv,
- DUP_SIZE(newsize), &newrec)) != 0)
- return (ret);
- memset(&tmp_val2, 0, sizeof(tmp_val2));
- F_SET(&tmp_val2, DB_DBT_PARTIAL);
- /* Construct the record. */
- p = newrec;
- /* Initial size. */
- memcpy(p, &newsize, sizeof(db_indx_t));
- p += sizeof(db_indx_t);
- /* First part of original record. */
- len = nval->doff > tmp_val.size
- ? tmp_val.size : nval->doff;
- memcpy(p, tmp_val.data, len);
- p += len;
- if (nval->doff > tmp_val.size) {
- /* Padding */
- memset(p, 0, nval->doff - tmp_val.size);
- p += nval->doff - tmp_val.size;
- }
- /* New bytes */
- memcpy(p, nval->data, nval->size);
- p += nval->size;
- /* End of original record (if there is any) */
- if (nval->doff + nval->dlen < tmp_val.size) {
- len = tmp_val.size - nval->doff - nval->dlen;
- memcpy(p, (u_int8_t *)tmp_val.data +
- nval->doff + nval->dlen, len);
- p += len;
- }
- /* Final size. */
- memcpy(p, &newsize, sizeof(db_indx_t));
- /*
- * Make sure that the caller isn't corrupting
- * the sort order.
- */
- if (dbp->dup_compare != NULL) {
- tmp_val2.data =
- (u_int8_t *)newrec + sizeof(db_indx_t);
- tmp_val2.size = newsize;
- if (dbp->dup_compare(
- dbp, &tmp_val, &tmp_val2) != 0) {
- (void)__os_free(dbenv, newrec);
- return (__db_duperr(dbp, flags));
- }
- }
- tmp_val2.data = newrec;
- tmp_val2.size = DUP_SIZE(newsize);
- tmp_val2.doff = hcp->dup_off;
- tmp_val2.dlen = DUP_SIZE(hcp->dup_len);
- ret = __ham_replpair(dbc, &tmp_val2, 0);
- (void)__os_free(dbenv, newrec);
- /* Update cursor */
- if (ret != 0)
- return (ret);
- if (newsize > nondup_size)
- hcp->dup_tlen += (newsize - nondup_size);
- else
- hcp->dup_tlen -= (nondup_size - newsize);
- hcp->dup_len = DUP_SIZE(newsize);
- return (0);
- } else {
- /* Check whether we need to convert to off page. */
- if (ISBIG(hcp,
- hcp->dup_tlen - hcp->dup_len + nval->size)) {
- if ((ret = __ham_dup_convert(dbc)) != 0)
- return (ret);
- return (hcp->opd->c_am_put(hcp->opd,
- NULL, nval, flags, NULL));
- }
- /* Make sure we maintain sort order. */
- if (dbp->dup_compare != NULL) {
- tmp_val2.data =
- HKEYDATA_DATA(H_PAIRDATA(dbp, hcp->page,
- hcp->indx)) + hcp->dup_off +
- sizeof(db_indx_t);
- tmp_val2.size = hcp->dup_len;
- if (dbp->dup_compare(dbp, nval, &tmp_val2) != 0)
- return (EINVAL);
- }
- /* Overwriting a complete duplicate. */
- if ((ret =
- __ham_make_dup(dbp->dbenv, nval, &tmp_val,
- &dbc->my_rdata.data, &dbc->my_rdata.ulen)) != 0)
- return (ret);
- /* Now fix what we are replacing. */
- tmp_val.doff = hcp->dup_off;
- tmp_val.dlen = DUP_SIZE(hcp->dup_len);
- /* Update cursor */
- if (nval->size > hcp->dup_len)
- hcp->dup_tlen += (nval->size - hcp->dup_len);
- else
- hcp->dup_tlen -= (hcp->dup_len - nval->size);
- hcp->dup_len = (db_indx_t)DUP_SIZE(nval->size);
- }
- myval = &tmp_val;
- } else if (!F_ISSET(nval, DB_DBT_PARTIAL)) {
- /* Put/overwrite */
- memcpy(&tmp_val, nval, sizeof(*nval));
- F_SET(&tmp_val, DB_DBT_PARTIAL);
- tmp_val.doff = 0;
- hk = H_PAIRDATA(dbp, hcp->page, hcp->indx);
- if (HPAGE_PTYPE(hk) == H_OFFPAGE)
- memcpy(&tmp_val.dlen,
- HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
- else
- tmp_val.dlen = LEN_HDATA(dbp, hcp->page,
- hcp->hdr->dbmeta.pagesize, hcp->indx);
- myval = &tmp_val;
- } else
- /* Regular partial put */
- myval = nval;
- return (__ham_replpair(dbc, myval, 0));
- }
- /*
- * Given a key and a cursor, sets the cursor to the page/ndx on which
- * the key resides. If the key is found, the cursor H_OK flag is set
- * and the pagep, bndx, pgno (dpagep, dndx, dpgno) fields are set.
- * If the key is not found, the H_OK flag is not set. If the sought
- * field is non-0, the pagep, bndx, pgno (dpagep, dndx, dpgno) fields
- * are set indicating where an add might take place. If it is 0,
- * non of the cursor pointer field are valid.
- */
- static int
- __ham_lookup(dbc, key, sought, mode, pgnop)
- DBC *dbc;
- const DBT *key;
- u_int32_t sought;
- db_lockmode_t mode;
- db_pgno_t *pgnop;
- {
- DB *dbp;
- HASH_CURSOR *hcp;
- db_pgno_t pgno;
- u_int32_t tlen;
- int match, ret;
- u_int8_t *hk, *dk;
- dbp = dbc->dbp;
- hcp = (HASH_CURSOR *)dbc->internal;
- /*
- * Set up cursor so that we're looking for space to add an item
- * as we cycle through the pages looking for the key.
- */
- if ((ret = __ham_item_reset(dbc)) != 0)
- return (ret);
- hcp->seek_size = sought;
- hcp->bucket = __ham_call_hash(dbc, (u_int8_t *)key->data, key->size);
- hcp->pgno = BUCKET_TO_PAGE(hcp, hcp->bucket);
- for (;;) {
- *pgnop = PGNO_INVALID;
- if ((ret = __ham_item_next(dbc, mode, pgnop)) != 0)
- return (ret);
- if (F_ISSET(hcp, H_NOMORE))
- break;
- hk = H_PAIRKEY(dbp, hcp->page, hcp->indx);
- switch (HPAGE_PTYPE(hk)) {
- case H_OFFPAGE:
- memcpy(&tlen, HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
- if (tlen == key->size) {
- memcpy(&pgno,
- HOFFPAGE_PGNO(hk), sizeof(db_pgno_t));
- if ((ret = __db_moff(dbp,
- key, pgno, tlen, NULL, &match)) != 0)
- return (ret);
- if (match == 0)
- goto found_key;
- }
- break;
- case H_KEYDATA:
- if (key->size ==
- LEN_HKEY(dbp, hcp->page, dbp->pgsize, hcp->indx) &&
- memcmp(key->data,
- HKEYDATA_DATA(hk), key->size) == 0) {
- /* Found the key, check for data type. */
- found_key: F_SET(hcp, H_OK);
- dk = H_PAIRDATA(dbp, hcp->page, hcp->indx);
- if (HPAGE_PTYPE(dk) == H_OFFDUP)
- memcpy(pgnop, HOFFDUP_PGNO(dk),
- sizeof(db_pgno_t));
- return (0);
- }
- break;
- case H_DUPLICATE:
- case H_OFFDUP:
- /*
- * These are errors because keys are never
- * duplicated, only data items are.
- */
- return (__db_pgfmt(dbp->dbenv, PGNO(hcp->page)));
- }
- }
- /*
- * Item was not found.
- */
- if (sought != 0)
- return (ret);
- return (ret);
- }
- /*
- * __ham_init_dbt --
- * Initialize a dbt using some possibly already allocated storage
- * for items.
- *
- * PUBLIC: int __ham_init_dbt __P((DB_ENV *,
- * PUBLIC: DBT *, u_int32_t, void **, u_int32_t *));
- */
- int
- __ham_init_dbt(dbenv, dbt, size, bufp, sizep)
- DB_ENV *dbenv;
- DBT *dbt;
- u_int32_t size;
- void **bufp;
- u_int32_t *sizep;
- {
- int ret;
- memset(dbt, 0, sizeof(*dbt));
- if (*sizep < size) {
- if ((ret = __os_realloc(dbenv, size, bufp)) != 0) {
- *sizep = 0;
- return (ret);
- }
- *sizep = size;
- }
- dbt->data = *bufp;
- dbt->size = size;
- return (0);
- }
- /*
- * Adjust the cursor after an insert or delete. The cursor passed is
- * the one that was operated upon; we just need to check any of the
- * others.
- *
- * len indicates the length of the item added/deleted
- * add indicates if the item indicated by the cursor has just been
- * added (add == 1) or deleted (add == 0).
- * dup indicates if the addition occurred into a duplicate set.
- *
- * PUBLIC: int __ham_c_update
- * PUBLIC: __P((DBC *, u_int32_t, int, int));
- */
- int
- __ham_c_update(dbc, len, add, is_dup)
- DBC *dbc;
- u_int32_t len;
- int add, is_dup;
- {
- DB *dbp, *ldbp;
- DBC *cp;
- DB_ENV *dbenv;
- DB_LSN lsn;
- DB_TXN *my_txn;
- HASH_CURSOR *hcp, *lcp;
- int found, ret;
- u_int32_t order;
- dbp = dbc->dbp;
- dbenv = dbp->dbenv;
- hcp = (HASH_CURSOR *)dbc->internal;
- /*
- * Adjustment will only be logged if this is a subtransaction.
- * Only subtransactions can abort and effect their parent
- * transactions cursors.
- */
- my_txn = IS_SUBTRANSACTION(dbc->txn) ? dbc->txn : NULL;
- found = 0;
- MUTEX_THREAD_LOCK(dbenv, dbenv->dblist_mutexp);
- /*
- * Calculate the order of this deleted record.
- * This will be one greater than any cursor that is pointing
- * at this record and already marked as deleted.
- */
- order = 0;
- if (!add) {
- order = 1;
- for (ldbp = __dblist_get(dbenv, dbp->adj_fileid);
- ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
- ldbp = LIST_NEXT(ldbp, dblistlinks)) {
- MUTEX_THREAD_LOCK(dbenv, dbp->mutexp);
- for (cp = TAILQ_FIRST(&ldbp->active_queue); cp != NULL;
- cp = TAILQ_NEXT(cp, links)) {
- if (cp == dbc || cp->dbtype != DB_HASH)
- continue;
- lcp = (HASH_CURSOR *)cp->internal;
- if (F_ISSET(lcp, H_DELETED) &&
- hcp->pgno == lcp->pgno &&
- hcp->indx == lcp->indx &&
- order <= lcp->order &&
- (!is_dup || hcp->dup_off == lcp->dup_off))
- order = lcp->order + 1;
- }
- MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp);
- }
- hcp->order = order;
- }
- for (ldbp = __dblist_get(dbenv, dbp->adj_fileid);
- ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
- ldbp = LIST_NEXT(ldbp, dblistlinks)) {
- MUTEX_THREAD_LOCK(dbenv, dbp->mutexp);
- for (cp = TAILQ_FIRST(&ldbp->active_queue); cp != NULL;
- cp = TAILQ_NEXT(cp, links)) {
- if (cp == dbc || cp->dbtype != DB_HASH)
- continue;
- lcp = (HASH_CURSOR *)cp->internal;
- if (lcp->pgno != hcp->pgno || lcp->indx == NDX_INVALID)
- continue;
- if (my_txn != NULL && cp->txn != my_txn)
- found = 1;
- if (!is_dup) {
- if (add) {
- /*
- * This routine is not called to add
- * non-dup records which are always put
- * at the end. It is only called from
- * recovery in this case and the
- * cursor will be marked deleted.
- * We are "undeleting" so unmark all
- * cursors with the same order.
- */
- if (lcp->indx == hcp->indx &&
- F_ISSET(lcp, H_DELETED)) {
- if (lcp->order == hcp->order)
- F_CLR(lcp, H_DELETED);
- else if (lcp->order >
- hcp->order) {
- /*
- * If we've moved this cursor's
- * index, split its order
- * number--i.e., decrement it by
- * enough so that the lowest
- * cursor moved has order 1.
- * cp_arg->order is the split
- * point, so decrement by one
- * less than that.
- */
- lcp->order -=
- (hcp->order - 1);
- lcp->indx += 2;
- }
- } else if (lcp->indx >= hcp->indx)
- lcp->indx += 2;
- } else {
- if (lcp->indx > hcp->indx) {
- lcp->indx -= 2;
- if (lcp->indx == hcp->indx &&
- F_ISSET(lcp, H_DELETED))
- lcp->order += order;
- } else if (lcp->indx == hcp->indx &&
- !F_ISSET(lcp, H_DELETED)) {
- F_SET(lcp, H_DELETED);
- F_CLR(lcp, H_ISDUP);
- lcp->order = order;
- }
- }
- } else if (lcp->indx == hcp->indx) {
- /*
- * Handle duplicates. This routine is
- * only called for on page dups.
- * Off page dups are handled by btree/rtree
- * code.
- */
- if (add) {
- lcp->dup_tlen += len;
- if (lcp->dup_off == hcp->dup_off &&
- F_ISSET(hcp, H_DELETED) &&
- F_ISSET(lcp, H_DELETED)) {
- /* Abort of a delete. */
- if (lcp->order == hcp->order)
- F_CLR(lcp, H_DELETED);
- else if (lcp->order >
- hcp->order) {
- lcp->order -=
- (hcp->order -1);
- lcp->dup_off += len;
- }
- } else if (lcp->dup_off >= hcp->dup_off)
- lcp->dup_off += len;
- } else {
- lcp->dup_tlen -= len;
- if (lcp->dup_off > hcp->dup_off) {
- lcp->dup_off -= len;
- if (lcp->dup_off ==
- hcp->dup_off &&
- F_ISSET(lcp, H_DELETED))
- lcp->order += order;
- } else if (lcp->dup_off ==
- hcp->dup_off &&
- !F_ISSET(lcp, H_DELETED)) {
- F_SET(lcp, H_DELETED);
- lcp->order = order;
- }
- }
- }
- }
- MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp);
- }
- MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp);
- if (found != 0 && DBC_LOGGING(dbc)) {
- if ((ret = __ham_curadj_log(dbp, my_txn, &lsn, 0, hcp->pgno,
- hcp->indx, len, hcp->dup_off, add, is_dup, order)) != 0)
- return (ret);
- }
- return (0);
- }
- /*
- * __ham_get_clist --
- *
- * Get a list of cursors either on a particular bucket or on a particular
- * page and index combination. The former is so that we can update
- * cursors on a split. The latter is so we can update cursors when we
- * move items off page.
- *
- * PUBLIC: int __ham_get_clist __P((DB *, db_pgno_t, u_int32_t, DBC ***));
- */
- int
- __ham_get_clist(dbp, pgno, indx, listp)
- DB *dbp;
- db_pgno_t pgno;
- u_int32_t indx;
- DBC ***listp;
- {
- DB *ldbp;
- DBC *cp;
- DB_ENV *dbenv;
- int nalloc, nused, ret;
- /*
- * Assume that finding anything is the exception, so optimize for
- * the case where there aren't any.
- */
- nalloc = nused = 0;
- *listp = NULL;
- dbenv = dbp->dbenv;
- MUTEX_THREAD_LOCK(dbenv, dbenv->dblist_mutexp);
- for (ldbp = __dblist_get(dbenv, dbp->adj_fileid);
- ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
- ldbp = LIST_NEXT(ldbp, dblistlinks)) {
- MUTEX_THREAD_LOCK(dbenv, dbp->mutexp);
- for (cp = TAILQ_FIRST(&ldbp->active_queue); cp != NULL;
- cp = TAILQ_NEXT(cp, links))
- /*
- * We match if cp->pgno matches the specified
- * pgno, and if either the cp->indx matches
- * or we weren't given an index.
- */
- if (cp->internal->pgno == pgno &&
- (indx == NDX_INVALID ||
- cp->internal->indx == indx)) {
- if (nused >= nalloc) {
- nalloc += 10;
- if ((ret = __os_realloc(dbp->dbenv,
- nalloc * sizeof(HASH_CURSOR *),
- listp)) != 0)
- goto err;
- }
- (*listp)[nused++] = cp;
- }
- MUTEX_THREAD_UNLOCK(dbp->dbenv, dbp->mutexp);
- }
- MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp);
- if (listp != NULL) {
- if (nused >= nalloc) {
- nalloc++;
- if ((ret = __os_realloc(dbp->dbenv,
- nalloc * sizeof(HASH_CURSOR *), listp)) != 0)
- return (ret);
- }
- (*listp)[nused] = NULL;
- }
- return (0);
- err:
- MUTEX_THREAD_UNLOCK(dbp->dbenv, dbp->mutexp);
- MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp);
- return (ret);
- }
- static int
- __ham_c_writelock(dbc)
- DBC *dbc;
- {
- DB_ENV *dbenv;
- DB_LOCK tmp_lock;
- HASH_CURSOR *hcp;
- int ret;
- /*
- * All we need do is acquire the lock and let the off-page
- * dup tree do its thing.
- */
- if (!STD_LOCKING(dbc))
- return (0);
- hcp = (HASH_CURSOR *)dbc->internal;
- if ((!LOCK_ISSET(hcp->lock) || hcp->lock_mode == DB_LOCK_READ)) {
- tmp_lock = hcp->lock;
- if ((ret = __ham_lock_bucket(dbc, DB_LOCK_WRITE)) != 0)
- return (ret);
- dbenv = dbc->dbp->dbenv;
- if (LOCK_ISSET(tmp_lock) &&
- (ret = dbenv->lock_put(dbenv, &tmp_lock)) != 0)
- return (ret);
- }
- return (0);
- }