bt_verify.c
上传用户:romrleung
上传日期:2022-05-23
资源大小:18897k
文件大小:61k
- /*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1999-2002
- * Sleepycat Software. All rights reserved.
- *
- * $Id: bt_verify.c,v 1.76 2002/07/03 19:03:51 bostic Exp $
- */
- #include "db_config.h"
- #ifndef lint
- static const char revid[] = "$Id: bt_verify.c,v 1.76 2002/07/03 19:03:51 bostic Exp $";
- #endif /* not lint */
- #ifndef NO_SYSTEM_INCLUDES
- #include <sys/types.h>
- #include <string.h>
- #endif
- #include "db_int.h"
- #include "dbinc/db_page.h"
- #include "dbinc/db_verify.h"
- #include "dbinc/btree.h"
- static int __bam_safe_getdata __P((DB *, PAGE *, u_int32_t, int, DBT *, int *));
- static int __bam_vrfy_inp __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t,
- db_indx_t *, u_int32_t));
- static int __bam_vrfy_treeorder __P((DB *, db_pgno_t, PAGE *, BINTERNAL *,
- BINTERNAL *, int (*)(DB *, const DBT *, const DBT *), u_int32_t));
- static int __ram_vrfy_inp __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t,
- db_indx_t *, u_int32_t));
- #define OKFLAGS (DB_AGGRESSIVE | DB_NOORDERCHK | DB_SALVAGE)
- /*
- * __bam_vrfy_meta --
- * Verify the btree-specific part of a metadata page.
- *
- * PUBLIC: int __bam_vrfy_meta __P((DB *, VRFY_DBINFO *, BTMETA *,
- * PUBLIC: db_pgno_t, u_int32_t));
- */
- int
- __bam_vrfy_meta(dbp, vdp, meta, pgno, flags)
- DB *dbp;
- VRFY_DBINFO *vdp;
- BTMETA *meta;
- db_pgno_t pgno;
- u_int32_t flags;
- {
- VRFY_PAGEINFO *pip;
- int isbad, t_ret, ret;
- db_indx_t ovflsize;
- if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
- return (ret);
- isbad = 0;
- /*
- * If VRFY_INCOMPLETE is not set, then we didn't come through
- * __db_vrfy_pagezero and didn't incompletely
- * check this page--we haven't checked it at all.
- * Thus we need to call __db_vrfy_meta and check the common fields.
- *
- * If VRFY_INCOMPLETE is set, we've already done all the same work
- * in __db_vrfy_pagezero, so skip the check.
- */
- if (!F_ISSET(pip, VRFY_INCOMPLETE) &&
- (ret = __db_vrfy_meta(dbp, vdp, &meta->dbmeta, pgno, flags)) != 0) {
- if (ret == DB_VERIFY_BAD)
- isbad = 1;
- else
- goto err;
- }
- /* bt_minkey: must be >= 2; must produce sensible ovflsize */
- /* avoid division by zero */
- ovflsize = meta->minkey > 0 ?
- B_MINKEY_TO_OVFLSIZE(dbp, meta->minkey, dbp->pgsize) : 0;
- if (meta->minkey < 2 ||
- ovflsize > B_MINKEY_TO_OVFLSIZE(dbp, DEFMINKEYPAGE, dbp->pgsize)) {
- pip->bt_minkey = 0;
- isbad = 1;
- EPRINT((dbp->dbenv,
- "Page %lu: nonsensical bt_minkey value %lu on metadata page",
- (u_long)pgno, (u_long)meta->minkey));
- } else
- pip->bt_minkey = meta->minkey;
- /* bt_maxkey: no constraints (XXX: right?) */
- pip->bt_maxkey = meta->maxkey;
- /* re_len: no constraints on this (may be zero or huge--we make rope) */
- pip->re_len = meta->re_len;
- /*
- * The root must not be current page or 0 and it must be within
- * database. If this metadata page is the master meta data page
- * of the file, then the root page had better be page 1.
- */
- pip->root = 0;
- if (meta->root == PGNO_INVALID ||
- meta->root == pgno || !IS_VALID_PGNO(meta->root) ||
- (pgno == PGNO_BASE_MD && meta->root != 1)) {
- isbad = 1;
- EPRINT((dbp->dbenv,
- "Page %lu: nonsensical root page %lu on metadata page",
- (u_long)pgno, (u_long)meta->root));
- } else
- pip->root = meta->root;
- /* Flags. */
- if (F_ISSET(&meta->dbmeta, BTM_RENUMBER))
- F_SET(pip, VRFY_IS_RRECNO);
- if (F_ISSET(&meta->dbmeta, BTM_SUBDB)) {
- /*
- * If this is a master db meta page, it had better not have
- * duplicates.
- */
- if (F_ISSET(&meta->dbmeta, BTM_DUP) && pgno == PGNO_BASE_MD) {
- isbad = 1;
- EPRINT((dbp->dbenv,
- "Page %lu: Btree metadata page has both duplicates and multiple databases",
- (u_long)pgno));
- }
- F_SET(pip, VRFY_HAS_SUBDBS);
- }
- if (F_ISSET(&meta->dbmeta, BTM_DUP))
- F_SET(pip, VRFY_HAS_DUPS);
- if (F_ISSET(&meta->dbmeta, BTM_DUPSORT))
- F_SET(pip, VRFY_HAS_DUPSORT);
- if (F_ISSET(&meta->dbmeta, BTM_RECNUM))
- F_SET(pip, VRFY_HAS_RECNUMS);
- if (F_ISSET(pip, VRFY_HAS_RECNUMS) && F_ISSET(pip, VRFY_HAS_DUPS)) {
- EPRINT((dbp->dbenv,
- "Page %lu: Btree metadata page illegally has both recnums and dups",
- (u_long)pgno));
- isbad = 1;
- }
- if (F_ISSET(&meta->dbmeta, BTM_RECNO)) {
- F_SET(pip, VRFY_IS_RECNO);
- dbp->type = DB_RECNO;
- } else if (F_ISSET(pip, VRFY_IS_RRECNO)) {
- isbad = 1;
- EPRINT((dbp->dbenv,
- "Page %lu: metadata page has renumber flag set but is not recno",
- (u_long)pgno));
- }
- if (F_ISSET(pip, VRFY_IS_RECNO) && F_ISSET(pip, VRFY_HAS_DUPS)) {
- EPRINT((dbp->dbenv,
- "Page %lu: recno metadata page specifies duplicates",
- (u_long)pgno));
- isbad = 1;
- }
- if (F_ISSET(&meta->dbmeta, BTM_FIXEDLEN))
- F_SET(pip, VRFY_IS_FIXEDLEN);
- else if (pip->re_len > 0) {
- /*
- * It's wrong to have an re_len if it's not a fixed-length
- * database
- */
- isbad = 1;
- EPRINT((dbp->dbenv,
- "Page %lu: re_len of %lu in non-fixed-length database",
- (u_long)pgno, (u_long)pip->re_len));
- }
- /*
- * We do not check that the rest of the page is 0, because it may
- * not be and may still be correct.
- */
- err: if ((t_ret =
- __db_vrfy_putpageinfo(dbp->dbenv, vdp, pip)) != 0 && ret == 0)
- ret = t_ret;
- return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
- }
- /*
- * __ram_vrfy_leaf --
- * Verify a recno leaf page.
- *
- * PUBLIC: int __ram_vrfy_leaf __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t,
- * PUBLIC: u_int32_t));
- */
- int
- __ram_vrfy_leaf(dbp, vdp, h, pgno, flags)
- DB *dbp;
- VRFY_DBINFO *vdp;
- PAGE *h;
- db_pgno_t pgno;
- u_int32_t flags;
- {
- BKEYDATA *bk;
- VRFY_PAGEINFO *pip;
- db_indx_t i;
- int ret, t_ret, isbad;
- u_int32_t re_len_guess, len;
- isbad = 0;
- if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
- return (ret);
- if ((ret = __db_fchk(dbp->dbenv,
- "__ram_vrfy_leaf", flags, OKFLAGS)) != 0)
- goto err;
- if (TYPE(h) != P_LRECNO) {
- /* We should not have been called. */
- TYPE_ERR_PRINT(dbp->dbenv, "__ram_vrfy_leaf", pgno, TYPE(h));
- DB_ASSERT(0);
- ret = EINVAL;
- goto err;
- }
- /*
- * Verify (and, if relevant, save off) page fields common to
- * all PAGEs.
- */
- if ((ret = __db_vrfy_datapage(dbp, vdp, h, pgno, flags)) != 0) {
- if (ret == DB_VERIFY_BAD)
- isbad = 1;
- else
- goto err;
- }
- /*
- * Verify inp[]. Return immediately if it returns DB_VERIFY_BAD;
- * further checks are dangerous.
- */
- if ((ret = __bam_vrfy_inp(dbp,
- vdp, h, pgno, &pip->entries, flags)) != 0)
- goto err;
- if (F_ISSET(pip, VRFY_HAS_DUPS)) {
- EPRINT((dbp->dbenv,
- "Page %lu: Recno database has dups", (u_long)pgno));
- ret = DB_VERIFY_BAD;
- goto err;
- }
- /*
- * Walk through inp and see if the lengths of all the records are the
- * same--if so, this may be a fixed-length database, and we want to
- * save off this value. We know inp to be safe if we've gotten this
- * far.
- */
- re_len_guess = 0;
- for (i = 0; i < NUM_ENT(h); i++) {
- bk = GET_BKEYDATA(dbp, h, i);
- /* KEYEMPTY. Go on. */
- if (B_DISSET(bk->type))
- continue;
- if (bk->type == B_OVERFLOW)
- len = ((BOVERFLOW *)bk)->tlen;
- else if (bk->type == B_KEYDATA)
- len = bk->len;
- else {
- isbad = 1;
- EPRINT((dbp->dbenv,
- "Page %lu: nonsensical type for item %lu",
- (u_long)pgno, (u_long)i));
- continue;
- }
- if (re_len_guess == 0)
- re_len_guess = len;
- /*
- * Is this item's len the same as the last one's? If not,
- * reset to 0 and break--we don't have a single re_len.
- * Otherwise, go on to the next item.
- */
- if (re_len_guess != len) {
- re_len_guess = 0;
- break;
- }
- }
- pip->re_len = re_len_guess;
- /* Save off record count. */
- pip->rec_cnt = NUM_ENT(h);
- err: if ((t_ret =
- __db_vrfy_putpageinfo(dbp->dbenv, vdp, pip)) != 0 && ret == 0)
- ret = t_ret;
- return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
- }
- /*
- * __bam_vrfy --
- * Verify a btree leaf or internal page.
- *
- * PUBLIC: int __bam_vrfy __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t,
- * PUBLIC: u_int32_t));
- */
- int
- __bam_vrfy(dbp, vdp, h, pgno, flags)
- DB *dbp;
- VRFY_DBINFO *vdp;
- PAGE *h;
- db_pgno_t pgno;
- u_int32_t flags;
- {
- VRFY_PAGEINFO *pip;
- int ret, t_ret, isbad;
- isbad = 0;
- if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
- return (ret);
- switch (TYPE(h)) {
- case P_IBTREE:
- case P_IRECNO:
- case P_LBTREE:
- case P_LDUP:
- break;
- default:
- TYPE_ERR_PRINT(dbp->dbenv, "__bam_vrfy", pgno, TYPE(h));
- DB_ASSERT(0);
- ret = EINVAL;
- goto err;
- }
- /*
- * Verify (and, if relevant, save off) page fields common to
- * all PAGEs.
- */
- if ((ret = __db_vrfy_datapage(dbp, vdp, h, pgno, flags)) != 0) {
- if (ret == DB_VERIFY_BAD)
- isbad = 1;
- else
- goto err;
- }
- /*
- * The record count is, on internal pages, stored in an overloaded
- * next_pgno field. Save it off; we'll verify it when we check
- * overall database structure. We could overload the field
- * in VRFY_PAGEINFO, too, but this seems gross, and space
- * is not at such a premium.
- */
- pip->rec_cnt = RE_NREC(h);
- /*
- * Verify inp[].
- */
- if (TYPE(h) == P_IRECNO) {
- if ((ret = __ram_vrfy_inp(dbp,
- vdp, h, pgno, &pip->entries, flags)) != 0)
- goto err;
- } else if ((ret = __bam_vrfy_inp(dbp,
- vdp, h, pgno, &pip->entries, flags)) != 0) {
- if (ret == DB_VERIFY_BAD)
- isbad = 1;
- else
- goto err;
- EPRINT((dbp->dbenv,
- "Page %lu: item order check unsafe: skipping",
- (u_long)pgno));
- } else if (!LF_ISSET(DB_NOORDERCHK) && (ret =
- __bam_vrfy_itemorder(dbp, vdp, h, pgno, 0, 0, 0, flags)) != 0) {
- /*
- * We know that the elements of inp are reasonable.
- *
- * Check that elements fall in the proper order.
- */
- if (ret == DB_VERIFY_BAD)
- isbad = 1;
- else
- goto err;
- }
- err: if ((t_ret =
- __db_vrfy_putpageinfo(dbp->dbenv, vdp, pip)) != 0 && ret == 0)
- ret = t_ret;
- return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
- }
- /*
- * __ram_vrfy_inp --
- * Verify that all entries in a P_IRECNO inp[] array are reasonable,
- * and count them. Note that P_LRECNO uses __bam_vrfy_inp;
- * P_IRECNOs are a special, and simpler, case, since they have
- * RINTERNALs rather than BKEYDATA/BINTERNALs.
- */
- static int
- __ram_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
- DB *dbp;
- VRFY_DBINFO *vdp;
- PAGE *h;
- db_pgno_t pgno;
- db_indx_t *nentriesp;
- u_int32_t flags;
- {
- RINTERNAL *ri;
- VRFY_CHILDINFO child;
- VRFY_PAGEINFO *pip;
- int ret, t_ret, isbad;
- u_int32_t himark, i, offset, nentries;
- db_indx_t *inp;
- u_int8_t *pagelayout, *p;
- isbad = 0;
- memset(&child, 0, sizeof(VRFY_CHILDINFO));
- nentries = 0;
- pagelayout = NULL;
- if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
- return (ret);
- if (TYPE(h) != P_IRECNO) {
- TYPE_ERR_PRINT(dbp->dbenv, "__ram_vrfy_inp", pgno, TYPE(h));
- DB_ASSERT(0);
- ret = EINVAL;
- goto err;
- }
- himark = dbp->pgsize;
- if ((ret =
- __os_malloc(dbp->dbenv, dbp->pgsize, &pagelayout)) != 0)
- goto err;
- memset(pagelayout, 0, dbp->pgsize);
- inp = P_INP(dbp, h);
- for (i = 0; i < NUM_ENT(h); i++) {
- if ((u_int8_t *)inp + i >= (u_int8_t *)h + himark) {
- EPRINT((dbp->dbenv,
- "Page %lu: entries listing %lu overlaps data",
- (u_long)pgno, (u_long)i));
- ret = DB_VERIFY_BAD;
- goto err;
- }
- offset = inp[i];
- /*
- * Check that the item offset is reasonable: it points
- * somewhere after the inp array and before the end of the
- * page.
- */
- if (offset <= (u_int32_t)((u_int8_t *)inp + i -
- (u_int8_t *)h) ||
- offset > (u_int32_t)(dbp->pgsize - RINTERNAL_SIZE)) {
- isbad = 1;
- EPRINT((dbp->dbenv,
- "Page %lu: bad offset %lu at index %lu",
- (u_long)pgno, (u_long)offset, (u_long)i));
- continue;
- }
- /* Update the high-water mark (what HOFFSET should be) */
- if (offset < himark)
- himark = offset;
- nentries++;
- /* Make sure this RINTERNAL is not multiply referenced. */
- ri = GET_RINTERNAL(dbp, h, i);
- if (pagelayout[offset] == 0) {
- pagelayout[offset] = 1;
- child.pgno = ri->pgno;
- child.type = V_RECNO;
- child.nrecs = ri->nrecs;
- if ((ret = __db_vrfy_childput(vdp, pgno, &child)) != 0)
- goto err;
- } else {
- EPRINT((dbp->dbenv,
- "Page %lu: RINTERNAL structure at offset %lu referenced twice",
- (u_long)pgno, (u_long)offset));
- isbad = 1;
- }
- }
- for (p = pagelayout + himark;
- p < pagelayout + dbp->pgsize;
- p += RINTERNAL_SIZE)
- if (*p != 1) {
- EPRINT((dbp->dbenv,
- "Page %lu: gap between items at offset %lu",
- (u_long)pgno, (u_long)(p - pagelayout)));
- isbad = 1;
- }
- if ((db_indx_t)himark != HOFFSET(h)) {
- EPRINT((dbp->dbenv,
- "Page %lu: bad HOFFSET %lu, appears to be %lu",
- (u_long)pgno, (u_long)(HOFFSET(h)), (u_long)himark));
- isbad = 1;
- }
- *nentriesp = nentries;
- err: if ((t_ret =
- __db_vrfy_putpageinfo(dbp->dbenv, vdp, pip)) != 0 && ret == 0)
- ret = t_ret;
- if (pagelayout != NULL)
- __os_free(dbp->dbenv, pagelayout);
- return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
- }
- /*
- * __bam_vrfy_inp --
- * Verify that all entries in inp[] array are reasonable;
- * count them.
- */
- static int
- __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
- DB *dbp;
- VRFY_DBINFO *vdp;
- PAGE *h;
- db_pgno_t pgno;
- db_indx_t *nentriesp;
- u_int32_t flags;
- {
- BKEYDATA *bk;
- BOVERFLOW *bo;
- VRFY_CHILDINFO child;
- VRFY_PAGEINFO *pip;
- int isbad, initem, isdupitem, ret, t_ret;
- u_int32_t himark, offset; /* These would be db_indx_ts but for algnmt.*/
- u_int32_t i, endoff, nentries;
- u_int8_t *pagelayout;
- isbad = isdupitem = 0;
- nentries = 0;
- memset(&child, 0, sizeof(VRFY_CHILDINFO));
- if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
- return (ret);
- switch (TYPE(h)) {
- case P_IBTREE:
- case P_LBTREE:
- case P_LDUP:
- case P_LRECNO:
- break;
- default:
- /*
- * In the salvager, we might call this from a page which
- * we merely suspect is a btree page. Otherwise, it
- * shouldn't get called--if it is, that's a verifier bug.
- */
- if (LF_ISSET(DB_SALVAGE))
- break;
- TYPE_ERR_PRINT(dbp->dbenv, "__bam_vrfy_inp", pgno, TYPE(h));
- DB_ASSERT(0);
- ret = EINVAL;
- goto err;
- }
- /*
- * Loop through inp[], the array of items, until we either
- * run out of entries or collide with the data. Keep track
- * of h_offset in himark.
- *
- * For each element in inp[i], make sure it references a region
- * that starts after the end of the inp array (as defined by
- * NUM_ENT(h)), ends before the beginning of the page, doesn't
- * overlap any other regions, and doesn't have a gap between
- * it and the region immediately after it.
- */
- himark = dbp->pgsize;
- if ((ret = __os_malloc(dbp->dbenv, dbp->pgsize, &pagelayout)) != 0)
- goto err;
- memset(pagelayout, 0, dbp->pgsize);
- for (i = 0; i < NUM_ENT(h); i++) {
- switch (ret = __db_vrfy_inpitem(dbp,
- h, pgno, i, 1, flags, &himark, &offset)) {
- case 0:
- break;
- case DB_VERIFY_BAD:
- isbad = 1;
- continue;
- case DB_VERIFY_FATAL:
- isbad = 1;
- goto err;
- default:
- DB_ASSERT(ret != 0);
- break;
- }
- /*
- * We now have a plausible beginning for the item, and we know
- * its length is safe.
- *
- * Mark the beginning and end in pagelayout so we can make sure
- * items have no overlaps or gaps.
- */
- bk = GET_BKEYDATA(dbp, h, i);
- #define ITEM_BEGIN 1
- #define ITEM_END 2
- if (pagelayout[offset] == 0)
- pagelayout[offset] = ITEM_BEGIN;
- else if (pagelayout[offset] == ITEM_BEGIN) {
- /*
- * Having two inp entries that point at the same patch
- * of page is legal if and only if the page is
- * a btree leaf and they're onpage duplicate keys--
- * that is, if (i % P_INDX) == 0.
- */
- if ((i % P_INDX == 0) && (TYPE(h) == P_LBTREE)) {
- /* Flag for later. */
- F_SET(pip, VRFY_HAS_DUPS);
- /* Bump up nentries so we don't undercount. */
- nentries++;
- /*
- * We'll check to make sure the end is
- * equal, too.
- */
- isdupitem = 1;
- } else {
- isbad = 1;
- EPRINT((dbp->dbenv,
- "Page %lu: duplicated item %lu",
- (u_long)pgno, (u_long)i));
- }
- }
- /*
- * Mark the end. Its location varies with the page type
- * and the item type.
- *
- * If the end already has a sign other than 0, do nothing--
- * it's an overlap that we'll catch later.
- */
- switch(B_TYPE(bk->type)) {
- case B_KEYDATA:
- if (TYPE(h) == P_IBTREE)
- /* It's a BINTERNAL. */
- endoff = offset + BINTERNAL_SIZE(bk->len) - 1;
- else
- endoff = offset + BKEYDATA_SIZE(bk->len) - 1;
- break;
- case B_DUPLICATE:
- /*
- * Flag that we have dups; we'll check whether
- * that's okay during the structure check.
- */
- F_SET(pip, VRFY_HAS_DUPS);
- /* FALLTHROUGH */
- case B_OVERFLOW:
- /*
- * Overflow entries on internal pages are stored
- * as the _data_ of a BINTERNAL; overflow entries
- * on leaf pages are stored as the entire entry.
- */
- endoff = offset +
- ((TYPE(h) == P_IBTREE) ?
- BINTERNAL_SIZE(BOVERFLOW_SIZE) :
- BOVERFLOW_SIZE) - 1;
- break;
- default:
- /*
- * We'll complain later; for now, just mark
- * a minimum.
- */
- endoff = offset + BKEYDATA_SIZE(0) - 1;
- break;
- }
- /*
- * If this is an onpage duplicate key we've seen before,
- * the end had better coincide too.
- */
- if (isdupitem && pagelayout[endoff] != ITEM_END) {
- EPRINT((dbp->dbenv,
- "Page %lu: duplicated item %lu",
- (u_long)pgno, (u_long)i));
- isbad = 1;
- } else if (pagelayout[endoff] == 0)
- pagelayout[endoff] = ITEM_END;
- isdupitem = 0;
- /*
- * There should be no deleted items in a quiescent tree,
- * except in recno.
- */
- if (B_DISSET(bk->type) && TYPE(h) != P_LRECNO) {
- isbad = 1;
- EPRINT((dbp->dbenv,
- "Page %lu: item %lu marked deleted",
- (u_long)pgno, (u_long)i));
- }
- /*
- * Check the type and such of bk--make sure it's reasonable
- * for the pagetype.
- */
- switch (B_TYPE(bk->type)) {
- case B_KEYDATA:
- /*
- * This is a normal, non-overflow BKEYDATA or BINTERNAL.
- * The only thing to check is the len, and that's
- * already been done.
- */
- break;
- case B_DUPLICATE:
- if (TYPE(h) == P_IBTREE) {
- isbad = 1;
- EPRINT((dbp->dbenv,
- "Page %lu: duplicate page referenced by internal btree page at item %lu",
- (u_long)pgno, (u_long)i));
- break;
- } else if (TYPE(h) == P_LRECNO) {
- isbad = 1;
- EPRINT((dbp->dbenv,
- "Page %lu: duplicate page referenced by recno page at item %lu",
- (u_long)pgno, (u_long)i));
- break;
- }
- /* FALLTHROUGH */
- case B_OVERFLOW:
- bo = (TYPE(h) == P_IBTREE) ?
- (BOVERFLOW *)(((BINTERNAL *)bk)->data) :
- (BOVERFLOW *)bk;
- if (B_TYPE(bk->type) == B_OVERFLOW)
- /* Make sure tlen is reasonable. */
- if (bo->tlen > dbp->pgsize * vdp->last_pgno) {
- isbad = 1;
- EPRINT((dbp->dbenv,
- "Page %lu: impossible tlen %lu, item %lu",
- (u_long)pgno,
- (u_long)bo->tlen, (u_long)i));
- /* Don't save as a child. */
- break;
- }
- if (!IS_VALID_PGNO(bo->pgno) || bo->pgno == pgno ||
- bo->pgno == PGNO_INVALID) {
- isbad = 1;
- EPRINT((dbp->dbenv,
- "Page %lu: offpage item %lu has bad pgno %lu",
- (u_long)pgno, (u_long)i, (u_long)bo->pgno));
- /* Don't save as a child. */
- break;
- }
- child.pgno = bo->pgno;
- child.type = (B_TYPE(bk->type) == B_OVERFLOW ?
- V_OVERFLOW : V_DUPLICATE);
- child.tlen = bo->tlen;
- if ((ret = __db_vrfy_childput(vdp, pgno, &child)) != 0)
- goto err;
- break;
- default:
- isbad = 1;
- EPRINT((dbp->dbenv,
- "Page %lu: item %lu of invalid type %lu",
- (u_long)pgno, (u_long)i));
- break;
- }
- }
- /*
- * Now, loop through and make sure the items are contiguous and
- * non-overlapping.
- */
- initem = 0;
- for (i = himark; i < dbp->pgsize; i++)
- if (initem == 0)
- switch (pagelayout[i]) {
- case 0:
- /* May be just for alignment. */
- if (i != ALIGN(i, sizeof(u_int32_t)))
- continue;
- isbad = 1;
- EPRINT((dbp->dbenv,
- "Page %lu: gap between items at offset %lu",
- (u_long)pgno, (u_long)i));
- /* Find the end of the gap */
- for ( ; pagelayout[i + 1] == 0 &&
- (size_t)(i + 1) < dbp->pgsize; i++)
- ;
- break;
- case ITEM_BEGIN:
- /* We've found an item. Check its alignment. */
- if (i != ALIGN(i, sizeof(u_int32_t))) {
- isbad = 1;
- EPRINT((dbp->dbenv,
- "Page %lu: offset %lu unaligned",
- (u_long)pgno, (u_long)i));
- }
- initem = 1;
- nentries++;
- break;
- case ITEM_END:
- /*
- * We've hit the end of an item even though
- * we don't think we're in one; must
- * be an overlap.
- */
- isbad = 1;
- EPRINT((dbp->dbenv,
- "Page %lu: overlapping items at offset %lu",
- (u_long)pgno, (u_long)i));
- break;
- default:
- /* Should be impossible. */
- DB_ASSERT(0);
- ret = EINVAL;
- goto err;
- }
- else
- switch (pagelayout[i]) {
- case 0:
- /* In the middle of an item somewhere. Okay. */
- break;
- case ITEM_END:
- /* End of an item; switch to out-of-item mode.*/
- initem = 0;
- break;
- case ITEM_BEGIN:
- /*
- * Hit a second item beginning without an
- * end. Overlap.
- */
- isbad = 1;
- EPRINT((dbp->dbenv,
- "Page %lu: overlapping items at offset %lu",
- (u_long)pgno, (u_long)i));
- break;
- }
- (void)__os_free(dbp->dbenv, pagelayout);
- /* Verify HOFFSET. */
- if ((db_indx_t)himark != HOFFSET(h)) {
- EPRINT((dbp->dbenv,
- "Page %lu: bad HOFFSET %lu, appears to be %lu",
- (u_long)pgno, (u_long)HOFFSET(h), (u_long)himark));
- isbad = 1;
- }
- err: if (nentriesp != NULL)
- *nentriesp = nentries;
- if ((t_ret =
- __db_vrfy_putpageinfo(dbp->dbenv, vdp, pip)) != 0 && ret == 0)
- ret = t_ret;
- return ((isbad == 1 && ret == 0) ? DB_VERIFY_BAD : ret);
- }
- /*
- * __bam_vrfy_itemorder --
- * Make sure the items on a page sort correctly.
- *
- * Assumes that NUM_ENT(h) and inp[0]..inp[NUM_ENT(h) - 1] are
- * reasonable; be sure that __bam_vrfy_inp has been called first.
- *
- * If ovflok is set, it also assumes that overflow page chains
- * hanging off the current page have been sanity-checked, and so we
- * can use __bam_cmp to verify their ordering. If it is not set,
- * and we run into an overflow page, carp and return DB_VERIFY_BAD;
- * we shouldn't be called if any exist.
- *
- * PUBLIC: int __bam_vrfy_itemorder __P((DB *, VRFY_DBINFO *, PAGE *,
- * PUBLIC: db_pgno_t, u_int32_t, int, int, u_int32_t));
- */
- int
- __bam_vrfy_itemorder(dbp, vdp, h, pgno, nentries, ovflok, hasdups, flags)
- DB *dbp;
- VRFY_DBINFO *vdp;
- PAGE *h;
- db_pgno_t pgno;
- u_int32_t nentries;
- int ovflok, hasdups;
- u_int32_t flags;
- {
- DBT dbta, dbtb, dup_1, dup_2, *p1, *p2, *tmp;
- BTREE *bt;
- BINTERNAL *bi;
- BKEYDATA *bk;
- BOVERFLOW *bo;
- VRFY_PAGEINFO *pip;
- db_indx_t i;
- int cmp, freedup_1, freedup_2, isbad, ret, t_ret;
- int (*dupfunc) __P((DB *, const DBT *, const DBT *));
- int (*func) __P((DB *, const DBT *, const DBT *));
- void *buf1, *buf2, *tmpbuf;
- /*
- * We need to work in the ORDERCHKONLY environment where we might
- * not have a pip, but we also may need to work in contexts where
- * NUM_ENT isn't safe.
- */
- if (vdp != NULL) {
- if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
- return (ret);
- nentries = pip->entries;
- } else
- pip = NULL;
- ret = isbad = 0;
- bo = NULL; /* Shut up compiler. */
- memset(&dbta, 0, sizeof(DBT));
- F_SET(&dbta, DB_DBT_REALLOC);
- memset(&dbtb, 0, sizeof(DBT));
- F_SET(&dbtb, DB_DBT_REALLOC);
- buf1 = buf2 = NULL;
- DB_ASSERT(!LF_ISSET(DB_NOORDERCHK));
- dupfunc = (dbp->dup_compare == NULL) ? __bam_defcmp : dbp->dup_compare;
- if (TYPE(h) == P_LDUP)
- func = dupfunc;
- else {
- func = __bam_defcmp;
- if (dbp->bt_internal != NULL) {
- bt = (BTREE *)dbp->bt_internal;
- if (bt->bt_compare != NULL)
- func = bt->bt_compare;
- }
- }
- /*
- * We alternate our use of dbta and dbtb so that we can walk
- * through the page key-by-key without copying a dbt twice.
- * p1 is always the dbt for index i - 1, and p2 for index i.
- */
- p1 = &dbta;
- p2 = &dbtb;
- /*
- * Loop through the entries. nentries ought to contain the
- * actual count, and so is a safe way to terminate the loop; whether
- * we inc. by one or two depends on whether we're a leaf page--
- * on a leaf page, we care only about keys. On internal pages
- * and LDUP pages, we want to check the order of all entries.
- *
- * Note that on IBTREE pages, we start with item 1, since item
- * 0 doesn't get looked at by __bam_cmp.
- */
- for (i = (TYPE(h) == P_IBTREE) ? 1 : 0; i < nentries;
- i += (TYPE(h) == P_LBTREE) ? P_INDX : O_INDX) {
- /*
- * Put key i-1, now in p2, into p1, by swapping DBTs and bufs.
- */
- tmp = p1;
- p1 = p2;
- p2 = tmp;
- tmpbuf = buf1;
- buf1 = buf2;
- buf2 = tmpbuf;
- /*
- * Get key i into p2.
- */
- switch (TYPE(h)) {
- case P_IBTREE:
- bi = GET_BINTERNAL(dbp, h, i);
- if (B_TYPE(bi->type) == B_OVERFLOW) {
- bo = (BOVERFLOW *)(bi->data);
- goto overflow;
- } else {
- p2->data = bi->data;
- p2->size = bi->len;
- }
- /*
- * The leftmost key on an internal page must be
- * len 0, since it's just a placeholder and
- * automatically sorts less than all keys.
- *
- * XXX
- * This criterion does not currently hold!
- * See todo list item #1686. Meanwhile, it's harmless
- * to just not check for it.
- */
- #if 0
- if (i == 0 && bi->len != 0) {
- isbad = 1;
- EPRINT((dbp->dbenv,
- "Page %lu: lowest key on internal page of nonzero length",
- (u_long)pgno));
- }
- #endif
- break;
- case P_LBTREE:
- case P_LDUP:
- bk = GET_BKEYDATA(dbp, h, i);
- if (B_TYPE(bk->type) == B_OVERFLOW) {
- bo = (BOVERFLOW *)bk;
- goto overflow;
- } else {
- p2->data = bk->data;
- p2->size = bk->len;
- }
- break;
- default:
- /*
- * This means our caller screwed up and sent us
- * an inappropriate page.
- */
- TYPE_ERR_PRINT(dbp->dbenv,
- "__bam_vrfy_itemorder", pgno, TYPE(h))
- DB_ASSERT(0);
- ret = EINVAL;
- goto err;
- }
- if (0) {
- /*
- * If ovflok != 1, we can't safely go chasing
- * overflow pages with the normal routines now;
- * they might be unsafe or nonexistent. Mark this
- * page as incomplete and return.
- *
- * Note that we don't need to worry about freeing
- * buffers, since they can't have been allocated
- * if overflow items are unsafe.
- */
- overflow: if (!ovflok) {
- F_SET(pip, VRFY_INCOMPLETE);
- goto err;
- }
- /*
- * Overflow items are safe to chase. Do so.
- * Fetch the overflow item into p2->data,
- * NULLing it or reallocing it as appropriate.
- *
- * (We set p2->data to buf2 before the call
- * so we're sure to realloc if we can and if p2
- * was just pointing at a non-overflow item.)
- */
- p2->data = buf2;
- if ((ret = __db_goff(dbp,
- p2, bo->tlen, bo->pgno, NULL, NULL)) != 0) {
- isbad = 1;
- EPRINT((dbp->dbenv,
- "Page %lu: error %lu in fetching overflow item %lu",
- (u_long)pgno, (u_long)ret, (u_long)i));
- }
- /* In case it got realloc'ed and thus changed. */
- buf2 = p2->data;
- }
- /* Compare with the last key. */
- if (p1->data != NULL && p2->data != NULL) {
- cmp = func(dbp, p1, p2);
- /* comparison succeeded */
- if (cmp > 0) {
- isbad = 1;
- EPRINT((dbp->dbenv,
- "Page %lu: out-of-order key at entry %lu",
- (u_long)pgno, (u_long)i));
- /* proceed */
- } else if (cmp == 0) {
- /*
- * If they compared equally, this
- * had better be a (sub)database with dups.
- * Mark it so we can check during the
- * structure check.
- */
- if (pip != NULL)
- F_SET(pip, VRFY_HAS_DUPS);
- else if (hasdups == 0) {
- isbad = 1;
- EPRINT((dbp->dbenv,
- "Page %lu: database with no duplicates has duplicated keys",
- (u_long)pgno));
- }
- /*
- * If we're a btree leaf, check to see
- * if the data items of these on-page dups are
- * in sorted order. If not, flag this, so
- * that we can make sure during the
- * structure checks that the DUPSORT flag
- * is unset.
- *
- * At this point i points to a duplicate key.
- * Compare the datum before it (same key)
- * to the datum after it, i.e. i-1 to i+1.
- */
- if (TYPE(h) == P_LBTREE) {
- /*
- * Unsafe; continue and we'll pick
- * up the bogus nentries later.
- */
- if (i + 1 >= (db_indx_t)nentries)
- continue;
- /*
- * We don't bother with clever memory
- * management with on-page dups,
- * as it's only really a big win
- * in the overflow case, and overflow
- * dups are probably (?) rare.
- */
- if (((ret = __bam_safe_getdata(dbp,
- h, i - 1, ovflok, &dup_1,
- &freedup_1)) != 0) ||
- ((ret = __bam_safe_getdata(dbp,
- h, i + 1, ovflok, &dup_2,
- &freedup_2)) != 0))
- goto err;
- /*
- * If either of the data are NULL,
- * it's because they're overflows and
- * it's not safe to chase them now.
- * Mark an incomplete and return.
- */
- if (dup_1.data == NULL ||
- dup_2.data == NULL) {
- DB_ASSERT(!ovflok);
- F_SET(pip, VRFY_INCOMPLETE);
- goto err;
- }
- /*
- * If the dups are out of order,
- * flag this. It's not an error
- * until we do the structure check
- * and see whether DUPSORT is set.
- */
- if (dupfunc(dbp, &dup_1, &dup_2) > 0)
- F_SET(pip, VRFY_DUPS_UNSORTED);
- if (freedup_1)
- __os_ufree(dbp->dbenv,
- dup_1.data);
- if (freedup_2)
- __os_ufree(dbp->dbenv,
- dup_2.data);
- }
- }
- }
- }
- err: if (pip != NULL && ((t_ret =
- __db_vrfy_putpageinfo(dbp->dbenv, vdp, pip)) != 0) && ret == 0)
- ret = t_ret;
- if (buf1 != NULL)
- __os_ufree(dbp->dbenv, buf1);
- if (buf2 != NULL)
- __os_ufree(dbp->dbenv, buf2);
- return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
- }
- /*
- * __bam_vrfy_structure --
- * Verify the tree structure of a btree database (including the master
- * database containing subdbs).
- *
- * PUBLIC: int __bam_vrfy_structure __P((DB *, VRFY_DBINFO *, db_pgno_t,
- * PUBLIC: u_int32_t));
- */
- int
- __bam_vrfy_structure(dbp, vdp, meta_pgno, flags)
- DB *dbp;
- VRFY_DBINFO *vdp;
- db_pgno_t meta_pgno;
- u_int32_t flags;
- {
- DB *pgset;
- VRFY_PAGEINFO *mip, *rip;
- db_pgno_t root, p;
- int t_ret, ret;
- u_int32_t nrecs, level, relen, stflags;
- mip = rip = 0;
- pgset = vdp->pgset;
- if ((ret = __db_vrfy_getpageinfo(vdp, meta_pgno, &mip)) != 0)
- return (ret);
- if ((ret = __db_vrfy_pgset_get(pgset, meta_pgno, (int *)&p)) != 0)
- goto err;
- if (p != 0) {
- EPRINT((dbp->dbenv,
- "Page %lu: btree metadata page observed twice",
- (u_long)meta_pgno));
- ret = DB_VERIFY_BAD;
- goto err;
- }
- if ((ret = __db_vrfy_pgset_inc(pgset, meta_pgno)) != 0)
- goto err;
- root = mip->root;
- if (root == 0) {
- EPRINT((dbp->dbenv,
- "Page %lu: btree metadata page has no root",
- (u_long)meta_pgno));
- ret = DB_VERIFY_BAD;
- goto err;
- }
- if ((ret = __db_vrfy_getpageinfo(vdp, root, &rip)) != 0)
- goto err;
- switch (rip->type) {
- case P_IBTREE:
- case P_LBTREE:
- stflags = flags | ST_TOPLEVEL;
- if (F_ISSET(mip, VRFY_HAS_DUPS))
- stflags |= ST_DUPOK;
- if (F_ISSET(mip, VRFY_HAS_DUPSORT))
- stflags |= ST_DUPSORT;
- if (F_ISSET(mip, VRFY_HAS_RECNUMS))
- stflags |= ST_RECNUM;
- ret = __bam_vrfy_subtree(dbp,
- vdp, root, NULL, NULL, stflags, NULL, NULL, NULL);
- break;
- case P_IRECNO:
- case P_LRECNO:
- stflags = flags | ST_RECNUM | ST_IS_RECNO | ST_TOPLEVEL;
- if (mip->re_len > 0)
- stflags |= ST_RELEN;
- if ((ret = __bam_vrfy_subtree(dbp, vdp,
- root, NULL, NULL, stflags, &level, &nrecs, &relen)) != 0)
- goto err;
- /*
- * Even if mip->re_len > 0, re_len may come back zero if the
- * tree is empty. It should be okay to just skip the check in
- * this case, as if there are any non-deleted keys at all,
- * that should never happen.
- */
- if (mip->re_len > 0 && relen > 0 && mip->re_len != relen) {
- EPRINT((dbp->dbenv,
- "Page %lu: recno database has bad re_len %lu",
- (u_long)meta_pgno, (u_long)relen));
- ret = DB_VERIFY_BAD;
- goto err;
- }
- ret = 0;
- break;
- case P_LDUP:
- EPRINT((dbp->dbenv,
- "Page %lu: duplicate tree referenced from metadata page",
- (u_long)meta_pgno));
- ret = DB_VERIFY_BAD;
- break;
- default:
- EPRINT((dbp->dbenv,
- "Page %lu: btree root of incorrect type %lu on metadata page",
- (u_long)meta_pgno, (u_long)rip->type));
- ret = DB_VERIFY_BAD;
- break;
- }
- err: if (mip != NULL && ((t_ret =
- __db_vrfy_putpageinfo(dbp->dbenv, vdp, mip)) != 0) && ret == 0)
- ret = t_ret;
- if (rip != NULL && ((t_ret =
- __db_vrfy_putpageinfo(dbp->dbenv, vdp, rip)) != 0) && ret == 0)
- ret = t_ret;
- return (ret);
- }
- /*
- * __bam_vrfy_subtree--
- * Verify a subtree (or entire) btree with specified root.
- *
- * Note that this is public because it must be called to verify
- * offpage dup trees, including from hash.
- *
- * PUBLIC: int __bam_vrfy_subtree __P((DB *, VRFY_DBINFO *, db_pgno_t, void *,
- * PUBLIC: void *, u_int32_t, u_int32_t *, u_int32_t *, u_int32_t *));
- */
- int
- __bam_vrfy_subtree(dbp,
- vdp, pgno, l, r, flags, levelp, nrecsp, relenp)
- DB *dbp;
- VRFY_DBINFO *vdp;
- db_pgno_t pgno;
- void *l, *r;
- u_int32_t flags, *levelp, *nrecsp, *relenp;
- {
- BINTERNAL *li, *ri, *lp, *rp;
- DB *pgset;
- DB_MPOOLFILE *mpf;
- DBC *cc;
- PAGE *h;
- VRFY_CHILDINFO *child;
- VRFY_PAGEINFO *pip;
- db_indx_t i;
- db_pgno_t next_pgno, prev_pgno;
- db_recno_t child_nrecs, nrecs;
- u_int32_t child_level, child_relen, level, relen, stflags;
- u_int8_t leaf_type;
- int (*func) __P((DB *, const DBT *, const DBT *));
- int isbad, p, ret, t_ret, toplevel;
- mpf = dbp->mpf;
- ret = isbad = 0;
- nrecs = 0;
- h = NULL;
- relen = 0;
- leaf_type = P_INVALID;
- next_pgno = prev_pgno = PGNO_INVALID;
- rp = (BINTERNAL *)r;
- lp = (BINTERNAL *)l;
- /* Provide feedback on our progress to the application. */
- if (!LF_ISSET(DB_SALVAGE))
- __db_vrfy_struct_feedback(dbp, vdp);
- if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
- return (ret);
- cc = NULL;
- level = pip->bt_level;
- toplevel = LF_ISSET(ST_TOPLEVEL) ? 1 : 0;
- LF_CLR(ST_TOPLEVEL);
- /*
- * If this is the root, initialize the vdp's prev- and next-pgno
- * accounting.
- *
- * For each leaf page we hit, we'll want to make sure that
- * vdp->prev_pgno is the same as pip->prev_pgno and vdp->next_pgno is
- * our page number. Then, we'll set vdp->next_pgno to pip->next_pgno
- * and vdp->prev_pgno to our page number, and the next leaf page in
- * line should be able to do the same verification.
- */
- if (toplevel) {
- /*
- * Cache the values stored in the vdp so that if we're an
- * auxiliary tree such as an off-page duplicate set, our
- * caller's leaf page chain doesn't get lost.
- */
- prev_pgno = vdp->prev_pgno;
- next_pgno = vdp->next_pgno;
- leaf_type = vdp->leaf_type;
- vdp->next_pgno = vdp->prev_pgno = PGNO_INVALID;
- vdp->leaf_type = P_INVALID;
- }
- /*
- * We are recursively descending a btree, starting from the root
- * and working our way out to the leaves.
- *
- * There are four cases we need to deal with:
- * 1. pgno is a recno leaf page. Any children are overflows.
- * 2. pgno is a duplicate leaf page. Any children
- * are overflow pages; traverse them, and then return
- * level and nrecs.
- * 3. pgno is an ordinary leaf page. Check whether dups are
- * allowed, and if so, traverse any off-page dups or
- * overflows. Then return nrecs and level.
- * 4. pgno is a recno internal page. Recursively check any
- * child pages, making sure their levels are one lower
- * and their nrecs sum to ours.
- * 5. pgno is a btree internal page. Same as #4, plus we
- * must verify that for each pair of BINTERNAL entries
- * N and N+1, the leftmost item on N's child sorts
- * greater than N, and the rightmost item on N's child
- * sorts less than N+1.
- *
- * Furthermore, in any sorted page type (P_LDUP, P_LBTREE, P_IBTREE),
- * we need to verify the internal sort order is correct if,
- * due to overflow items, we were not able to do so earlier.
- */
- switch (pip->type) {
- case P_LRECNO:
- case P_LDUP:
- case P_LBTREE:
- /*
- * Cases 1, 2 and 3.
- *
- * We're some sort of leaf page; verify
- * that our linked list of leaves is consistent.
- */
- if (vdp->leaf_type == P_INVALID) {
- /*
- * First leaf page. Set the type that all its
- * successors should be, and verify that our prev_pgno
- * is PGNO_INVALID.
- */
- vdp->leaf_type = pip->type;
- if (pip->prev_pgno != PGNO_INVALID)
- goto bad_prev;
- } else {
- /*
- * Successor leaf page. Check our type, the previous
- * page's next_pgno, and our prev_pgno.
- */
- if (pip->type != vdp->leaf_type) {
- EPRINT((dbp->dbenv,
- "Page %lu: unexpected page type %lu found in leaf chain (expected %lu)",
- (u_long)pip->pgno, (u_long)pip->type,
- (u_long)vdp->leaf_type));
- isbad = 1;
- }
- if (pip->pgno != vdp->next_pgno) {
- EPRINT((dbp->dbenv,
- "Page %lu: incorrect next_pgno %lu found in leaf chain (should be %lu)",
- (u_long)vdp->prev_pgno,
- (u_long)vdp->next_pgno, (u_long)pip->pgno));
- isbad = 1;
- }
- if (pip->prev_pgno != vdp->prev_pgno) {
- bad_prev: EPRINT((dbp->dbenv,
- "Page %lu: incorrect prev_pgno %lu found in leaf chain (should be %lu)",
- (u_long)pip->pgno, (u_long)pip->prev_pgno,
- (u_long)vdp->prev_pgno));
- isbad = 1;
- }
- }
- vdp->prev_pgno = pip->pgno;
- vdp->next_pgno = pip->next_pgno;
- /*
- * Overflow pages are common to all three leaf types;
- * traverse the child list, looking for overflows.
- */
- if ((ret = __db_vrfy_childcursor(vdp, &cc)) != 0)
- goto err;
- for (ret = __db_vrfy_ccset(cc, pgno, &child); ret == 0;
- ret = __db_vrfy_ccnext(cc, &child))
- if (child->type == V_OVERFLOW &&
- (ret = __db_vrfy_ovfl_structure(dbp, vdp,
- child->pgno, child->tlen,
- flags | ST_OVFL_LEAF)) != 0) {
- if (ret == DB_VERIFY_BAD)
- isbad = 1;
- else
- goto done;
- }
- if ((ret = __db_vrfy_ccclose(cc)) != 0)
- goto err;
- cc = NULL;
- /* Case 1 */
- if (pip->type == P_LRECNO) {
- if (!LF_ISSET(ST_IS_RECNO) &&
- !(LF_ISSET(ST_DUPOK) && !LF_ISSET(ST_DUPSORT))) {
- isbad = 1;
- EPRINT((dbp->dbenv,
- "Page %lu: recno leaf page non-recno tree",
- (u_long)pgno));
- goto done;
- }
- goto leaf;
- } else if (LF_ISSET(ST_IS_RECNO)) {
- /*
- * It's a non-recno leaf. Had better not be a recno
- * subtree.
- */
- isbad = 1;
- EPRINT((dbp->dbenv,
- "Page %lu: non-recno leaf page in recno tree",
- (u_long)pgno));
- goto done;
- }
- /* Case 2--no more work. */
- if (pip->type == P_LDUP)
- goto leaf;
- /* Case 3 */
- /* Check if we have any dups. */
- if (F_ISSET(pip, VRFY_HAS_DUPS)) {
- /* If dups aren't allowed in this btree, trouble. */
- if (!LF_ISSET(ST_DUPOK)) {
- isbad = 1;
- EPRINT((dbp->dbenv,
- "Page %lu: duplicates in non-dup btree",
- (u_long)pgno));
- } else {
- /*
- * We correctly have dups. If any are off-page,
- * traverse those btrees recursively.
- */
- if ((ret =
- __db_vrfy_childcursor(vdp, &cc)) != 0)
- goto err;
- for (ret = __db_vrfy_ccset(cc, pgno, &child);
- ret == 0;
- ret = __db_vrfy_ccnext(cc, &child)) {
- stflags = flags | ST_RECNUM | ST_DUPSET;
- /* Skip any overflow entries. */
- if (child->type == V_DUPLICATE) {
- if ((ret = __db_vrfy_duptype(
- dbp, vdp, child->pgno,
- stflags)) != 0) {
- isbad = 1;
- /* Next child. */
- continue;
- }
- if ((ret = __bam_vrfy_subtree(
- dbp, vdp, child->pgno, NULL,
- NULL, stflags | ST_TOPLEVEL,
- NULL, NULL, NULL)) != 0) {
- if (ret !=
- DB_VERIFY_BAD)
- goto err;
- else
- isbad = 1;
- }
- }
- }
- if ((ret = __db_vrfy_ccclose(cc)) != 0)
- goto err;
- cc = NULL;
- /*
- * If VRFY_DUPS_UNSORTED is set,
- * ST_DUPSORT had better not be.
- */
- if (F_ISSET(pip, VRFY_DUPS_UNSORTED) &&
- LF_ISSET(ST_DUPSORT)) {
- EPRINT((dbp->dbenv,
- "Page %lu: unsorted duplicate set in sorted-dup database",
- (u_long)pgno));
- isbad = 1;
- }
- }
- }
- goto leaf;
- case P_IBTREE:
- case P_IRECNO:
- /* We handle these below. */
- break;
- default:
- /*
- * If a P_IBTREE or P_IRECNO contains a reference to an
- * invalid page, we'll wind up here; handle it gracefully.
- * Note that the code at the "done" label assumes that the
- * current page is a btree/recno one of some sort; this
- * is not the case here, so we goto err.
- *
- * If the page is entirely zeroed, its pip->type will be a lie
- * (we assumed it was a hash page, as they're allowed to be
- * zeroed); handle this case specially.
- */
- if (F_ISSET(pip, VRFY_IS_ALLZEROES))
- ZEROPG_ERR_PRINT(dbp->dbenv,
- pgno, "btree or recno page");
- else
- EPRINT((dbp->dbenv,
- "Page %lu: btree or recno page is of inappropriate type %lu",
- (u_long)pgno, (u_long)pip->type));
- ret = DB_VERIFY_BAD;
- goto err;
- }
- /*
- * Cases 4 & 5: This is a btree or recno internal page. For each child,
- * recurse, keeping a running count of nrecs and making sure the level
- * is always reasonable.
- */
- if ((ret = __db_vrfy_childcursor(vdp, &cc)) != 0)
- goto err;
- for (ret = __db_vrfy_ccset(cc, pgno, &child); ret == 0;
- ret = __db_vrfy_ccnext(cc, &child))
- if (child->type == V_RECNO) {
- if (pip->type != P_IRECNO) {
- TYPE_ERR_PRINT(dbp->dbenv, "__bam_vrfy_subtree",
- pgno, pip->type);
- DB_ASSERT(0);
- ret = EINVAL;
- goto err;
- }
- if ((ret = __bam_vrfy_subtree(dbp, vdp, child->pgno,
- NULL, NULL, flags, &child_level, &child_nrecs,
- &child_relen)) != 0) {
- if (ret != DB_VERIFY_BAD)
- goto done;
- else
- isbad = 1;
- }
- if (LF_ISSET(ST_RELEN)) {
- if (relen == 0)
- relen = child_relen;
- /*
- * child_relen may be zero if the child subtree
- * is empty.
- */
- else if (child_relen > 0 &&
- relen != child_relen) {
- isbad = 1;
- EPRINT((dbp->dbenv,
- "Page %lu: recno page returned bad re_len %lu",
- (u_long)child->pgno,
- (u_long)child_relen));
- }
- if (relenp)
- *relenp = relen;
- }
- if (LF_ISSET(ST_RECNUM))
- nrecs += child_nrecs;
- if (level != child_level + 1) {
- isbad = 1;
- EPRINT((dbp->dbenv, "Page %lu: recno level incorrect: got %lu, expected %lu",
- (u_long)child->pgno, (u_long)child_level,
- (u_long)(level - 1)));
- }
- } else if (child->type == V_OVERFLOW &&
- (ret = __db_vrfy_ovfl_structure(dbp, vdp,
- child->pgno, child->tlen, flags)) != 0) {
- if (ret == DB_VERIFY_BAD)
- isbad = 1;
- else
- goto done;
- }
- if ((ret = __db_vrfy_ccclose(cc)) != 0)
- goto err;
- cc = NULL;
- /* We're done with case 4. */
- if (pip->type == P_IRECNO)
- goto done;
- /*
- * Case 5. Btree internal pages.
- * As described above, we need to iterate through all the
- * items on the page and make sure that our children sort appropriately
- * with respect to them.
- *
- * For each entry, li will be the "left-hand" key for the entry
- * itself, which must sort lower than all entries on its child;
- * ri will be the key to its right, which must sort greater.
- */
- if (h == NULL && (ret = mpf->get(mpf, &pgno, 0, &h)) != 0)
- goto err;
- for (i = 0; i < pip->entries; i += O_INDX) {
- li = GET_BINTERNAL(dbp, h, i);
- ri = (i + O_INDX < pip->entries) ?
- GET_BINTERNAL(dbp, h, i + O_INDX) : NULL;
- /*
- * The leftmost key is forcibly sorted less than all entries,
- * so don't bother passing it.
- */
- if ((ret = __bam_vrfy_subtree(dbp, vdp, li->pgno,
- i == 0 ? NULL : li, ri, flags, &child_level,
- &child_nrecs, NULL)) != 0) {
- if (ret != DB_VERIFY_BAD)
- goto done;
- else
- isbad = 1;
- }
- if (LF_ISSET(ST_RECNUM)) {
- /*
- * Keep a running tally on the actual record count so
- * we can return it to our parent (if we have one) or
- * compare it to the NRECS field if we're a root page.
- */
- nrecs += child_nrecs;
- /*
- * Make sure the actual record count of the child
- * is equal to the value in the BINTERNAL structure.
- */
- if (li->nrecs != child_nrecs) {
- isbad = 1;
- EPRINT((dbp->dbenv,
- "Page %lu: item %lu has incorrect record count of %lu, should be %lu",
- (u_long)pgno, (u_long)i, (u_long)li->nrecs,
- (u_long)child_nrecs));
- }
- }
- if (level != child_level + 1) {
- isbad = 1;
- EPRINT((dbp->dbenv,
- "Page %lu: Btree level incorrect: got %lu, expected %lu",
- (u_long)li->pgno,
- (u_long)child_level, (u_long)(level - 1)));
- }
- }
- if (0) {
- leaf: level = LEAFLEVEL;
- if (LF_ISSET(ST_RECNUM))
- nrecs = pip->rec_cnt;
- /* XXX
- * We should verify that the record count on a leaf page
- * is the sum of the number of keys and the number of
- * records in its off-page dups. This requires looking
- * at the page again, however, and it may all be changing
- * soon, so for now we don't bother.
- */
- if (LF_ISSET(ST_RELEN) && relenp)
- *relenp = pip->re_len;
- }
- done: if (F_ISSET(pip, VRFY_INCOMPLETE) && isbad == 0 && ret == 0) {
- /*
- * During the page-by-page pass, item order verification was
- * not finished due to the presence of overflow items. If
- * isbad == 0, though, it's now safe to do so, as we've
- * traversed any child overflow pages. Do it.
- */
- if (h == NULL && (ret = mpf->get(mpf, &pgno, 0, &h)) != 0)
- goto err;
- if ((ret = __bam_vrfy_itemorder(dbp,
- vdp, h, pgno, 0, 1, 0, flags)) != 0)
- goto err;
- F_CLR(pip, VRFY_INCOMPLETE);
- }
- /*
- * It's possible to get to this point with a page that has no
- * items, but without having detected any sort of failure yet.
- * Having zero items is legal if it's a leaf--it may be the
- * root page in an empty tree, or the tree may have been
- * modified with the DB_REVSPLITOFF flag set (there's no way
- * to tell from what's on disk). For an internal page,
- * though, having no items is a problem (all internal pages
- * must have children).
- */
- if (isbad == 0 && ret == 0) {
- if (h == NULL && (ret = mpf->get(mpf, &pgno, 0, &h)) != 0)
- goto err;
- if (NUM_ENT(h) == 0 && ISINTERNAL(h)) {
- EPRINT((dbp->dbenv,
- "Page %lu: internal page is empty and should not be",
- (u_long)pgno));
- isbad = 1;
- goto err;
- }
- }
- /*
- * Our parent has sent us BINTERNAL pointers to parent records
- * so that we can verify our place with respect to them. If it's
- * appropriate--we have a default sort function--verify this.
- */
- if (isbad == 0 && ret == 0 && !LF_ISSET(DB_NOORDERCHK) && lp != NULL) {
- if (h == NULL && (ret = mpf->get(mpf, &pgno, 0, &h)) != 0)
- goto err;
- /*
- * __bam_vrfy_treeorder needs to know what comparison function
- * to use. If ST_DUPSET is set, we're in a duplicate tree
- * and we use the duplicate comparison function; otherwise,
- * use the btree one. If unset, use the default, of course.
- */
- func = LF_ISSET(ST_DUPSET) ? dbp->dup_compare :
- ((BTREE *)dbp->bt_internal)->bt_compare;
- if (func == NULL)
- func = __bam_defcmp;
- if ((ret = __bam_vrfy_treeorder(
- dbp, pgno, h, lp, rp, func, flags)) != 0) {
- if (ret == DB_VERIFY_BAD)
- isbad = 1;
- else
- goto err;
- }
- }
- /*
- * This is guaranteed to succeed for leaf pages, but no harm done.
- *
- * Internal pages below the top level do not store their own
- * record numbers, so we skip them.
- */
- if (LF_ISSET(ST_RECNUM) && nrecs != pip->rec_cnt && toplevel) {
- isbad = 1;
- EPRINT((dbp->dbenv,
- "Page %lu: bad record count: has %lu records, claims %lu",
- (u_long)pgno, (u_long)nrecs, (u_long)pip->rec_cnt));
- }
- if (levelp)
- *levelp = level;
- if (nrecsp)
- *nrecsp = nrecs;
- pgset = vdp->pgset;
- if ((ret = __db_vrfy_pgset_get(pgset, pgno, &p)) != 0)
- goto err;
- if (p != 0) {
- isbad = 1;
- EPRINT((dbp->dbenv, "Page %lu: linked twice", (u_long)pgno));
- } else if ((ret = __db_vrfy_pgset_inc(pgset, pgno)) != 0)
- goto err;
- if (toplevel)
- /*
- * The last page's next_pgno in the leaf chain should have been
- * PGNO_INVALID.
- */
- if (vdp->next_pgno != PGNO_INVALID) {
- EPRINT((dbp->dbenv, "Page %lu: unterminated leaf chain",
- (u_long)vdp->prev_pgno));
- isbad = 1;
- }
- err: if (toplevel) {
- /* Restore our caller's settings. */
- vdp->next_pgno = next_pgno;
- vdp->prev_pgno = prev_pgno;
- vdp->leaf_type = leaf_type;
- }
- if (h != NULL && (t_ret = mpf->put(mpf, h, 0)) != 0 && ret == 0)
- ret = t_ret;
- if ((t_ret =
- __db_vrfy_putpageinfo(dbp->dbenv, vdp, pip)) != 0 && ret == 0)
- ret = t_ret;
- if (cc != NULL && ((t_ret = __db_vrfy_ccclose(cc)) != 0) && ret == 0)
- ret = t_ret;
- return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
- }
- /*
- * __bam_vrfy_treeorder --
- * Verify that the lowest key on a page sorts greater than the
- * BINTERNAL which points to it (lp), and the highest key
- * sorts less than the BINTERNAL above that (rp).
- *
- * If lp is NULL, this means that it was the leftmost key on the
- * parent, which (regardless of sort function) sorts less than
- * all keys. No need to check it.
- *
- * If rp is NULL, lp was the highest key on the parent, so there's
- * no higher key we must sort less than.
- */
- static int
- __bam_vrfy_treeorder(dbp, pgno, h, lp, rp, func, flags)
- DB *dbp;
- db_pgno_t pgno;
- PAGE *h;
- BINTERNAL *lp, *rp;
- int (*func) __P((DB *, const DBT *, const DBT *));
- u_int32_t flags;
- {
- BOVERFLOW *bo;
- DBT dbt;
- db_indx_t last;
- int ret, cmp;
- memset(&dbt, 0, sizeof(DBT));
- F_SET(&dbt, DB_DBT_MALLOC);
- ret = 0;
- /*
- * Empty pages are sorted correctly by definition. We check
- * to see whether they ought to be empty elsewhere; leaf
- * pages legally may be.
- */
- if (NUM_ENT(h) == 0)
- return (0);
- switch (TYPE(h)) {
- case P_IBTREE:
- case P_LDUP:
- last = NUM_ENT(h) - O_INDX;
- break;
- case P_LBTREE:
- last = NUM_ENT(h) - P_INDX;
- break;
- default:
- TYPE_ERR_PRINT(dbp->dbenv,
- "__bam_vrfy_treeorder", pgno, TYPE(h));
- DB_ASSERT(0);
- return (EINVAL);
- }
- /*
- * The key on page h, the child page, is more likely to be
- * an overflow page, so we pass its offset, rather than lp/rp's,
- * into __bam_cmp. This will take advantage of __db_moff.
- */
- /*
- * Skip first-item check if we're an internal page--the first
- * entry on an internal page is treated specially by __bam_cmp,
- * so what's on the page shouldn't matter. (Plus, since we're passing
- * our page and item 0 as to __bam_cmp, we'll sort before our
- * parent and falsely report a failure.)
- */
- if (lp != NULL && TYPE(h) != P_IBTREE) {
- if (lp->type == B_KEYDATA) {
- dbt.data = lp->data;
- dbt.size = lp->len;
- } else if (lp->type == B_OVERFLOW) {
- bo = (BOVERFLOW *)lp->data;
- if ((ret = __db_goff(dbp, &dbt, bo->tlen, bo->pgno,
- NULL, NULL)) != 0)
- return (ret);
- } else {
- DB_ASSERT(0);
- EPRINT((dbp->dbenv,
- "Page %lu: unknown type for internal record",
- (u_long)PGNO(h)));
- return (EINVAL);
- }
- /* On error, fall through, free if neeeded, and return. */
- if ((ret = __bam_cmp(dbp, &dbt, h, 0, func, &cmp)) == 0) {
- if (cmp > 0) {
- EPRINT((dbp->dbenv,
- "Page %lu: first item on page sorted greater than parent entry",
- (u_long)PGNO(h)));
- ret = DB_VERIFY_BAD;
- }
- } else
- EPRINT((dbp->dbenv,
- "Page %lu: first item on page had comparison error",
- (u_long)PGNO(h)));
- if (dbt.data != lp->data)
- __os_ufree(dbp->dbenv, dbt.data);
- if (ret != 0)
- return (ret);
- }
- if (rp != NULL) {
- if (rp->type == B_KEYDATA) {
- dbt.data = rp->data;
- dbt.size = rp->len;
- } else if (rp->type == B_OVERFLOW) {
- bo = (BOVERFLOW *)rp->data;
- if ((ret = __db_goff(dbp, &dbt, bo->tlen, bo->pgno,
- NULL, NULL)) != 0)
- return (ret);
- } else {
- DB_ASSERT(0);
- EPRINT((dbp->dbenv,
- "Page %lu: unknown type for internal record",
- (u_long)PGNO(h)));
- return (EINVAL);
- }
- /* On error, fall through, free if neeeded, and return. */
- if ((ret = __bam_cmp(dbp, &dbt, h, last, func, &cmp)) == 0) {
- if (cmp < 0) {
- EPRINT((dbp->dbenv,
- "Page %lu: last item on page sorted greater than parent entry",
- (u_long)PGNO(h)));
- ret = DB_VERIFY_BAD;
- }
- } else
- EPRINT((dbp->dbenv,
- "Page %lu: last item on page had comparison error",
- (u_long)PGNO(h)));
- if (dbt.data != rp->data)
- __os_ufree(dbp->dbenv, dbt.data);
- }
- return (ret);
- }
- /*
- * __bam_salvage --
- * Safely dump out anything that looks like a key on an alleged
- * btree leaf page.
- *
- * PUBLIC: int __bam_salvage __P((DB *, VRFY_DBINFO *, db_pgno_t, u_int32_t,
- * PUBLIC: PAGE *, void *, int (*)(void *, const void *), DBT *,
- * PUBLIC: u_int32_t));
- */
- int
- __bam_salvage(dbp, vdp, pgno, pgtype, h, handle, callback, key, flags)
- DB *dbp;
- VRFY_DBINFO *vdp;
- db_pgno_t pgno;
- u_int32_t pgtype;
- PAGE *h;
- void *handle;
- int (*callback) __P((void *, const void *));
- DBT *key;
- u_int32_t flags;
- {
- DBT dbt, unkdbt;
- BKEYDATA *bk;
- BOVERFLOW *bo;
- db_indx_t i, beg, end, *inp;
- u_int32_t himark;
- u_int8_t *pgmap;
- void *ovflbuf;
- int t_ret, ret, err_ret;
- /* Shut up lint. */
- COMPQUIET(end, 0);
- ovflbuf = pgmap = NULL;
- err_ret = ret = 0;
- inp = P_INP(dbp, h);
- memset(&dbt, 0, sizeof(DBT));
- dbt.flags = DB_DBT_REALLOC;
- memset(&unkdbt, 0, sizeof(DBT));
- unkdbt.size = (u_int32_t)(strlen("UNKNOWN") + 1);
- unkdbt.data = "UNKNOWN";
- /*
- * Allocate a buffer for overflow items. Start at one page;
- * __db_safe_goff will realloc as needed.
- */
- if ((ret = __os_malloc(dbp->dbenv, dbp->pgsize, &ovflbuf)) != 0)
- return (ret);
- if (LF_ISSET(DB_AGGRESSIVE)) {
- if ((ret =
- __os_malloc(dbp->dbenv, dbp->pgsize, &pgmap)) != 0)
- goto err;
- memset(pgmap, 0, dbp->pgsize);
- }
- /*
- * Loop through the inp array, spitting out key/data pairs.
- *
- * If we're salvaging normally, loop from 0 through NUM_ENT(h).
- * If we're being aggressive, loop until we hit the end of the page--
- * NUM_ENT() may be bogus.
- */
- himark = dbp->pgsize;
- for (i = 0;; i += O_INDX) {
- /* If we're not aggressive, break when we hit NUM_ENT(h). */
- if (!LF_ISSET(DB_AGGRESSIVE) && i >= NUM_ENT(h))
- break;
- /* Verify the current item. */
- ret = __db_vrfy_inpitem(dbp,
- h, pgno, i, 1, flags, &himark, NULL);
- /* If this returned a fatality, it's time to break. */
- if (ret == DB_VERIFY_FATAL) {
- /*
- * Don't return DB_VERIFY_FATAL; it's private
- * and means only that we can't go on with this
- * page, not with the whole database. It's
- * not even an error if we've run into it
- * after NUM_ENT(h).
- */
- ret = (i < NUM_ENT(h)) ? DB_VERIFY_BAD : 0;
- break;
- }
- /*
- * If this returned 0, it's safe to print or (carefully)
- * try to fetch.
- */
- if (ret == 0) {
- /*
- * We only want to print deleted items if
- * DB_AGGRESSIVE is set.
- */
- bk = GET_BKEYDATA(dbp, h, i);
- if (!LF_ISSET(DB_AGGRESSIVE) && B_DISSET(bk->type))
- continue;
- /*
- * We're going to go try to print the next item. If
- * key is non-NULL, we're a dup page, so we've got to
- * print the key first, unless SA_SKIPFIRSTKEY is set
- * and we're on the first entry.
- */
- if (key != NULL &&
- (i != 0 || !LF_ISSET(SA_SKIPFIRSTKEY)))
- if ((ret = __db_prdbt(key,
- 0, " ", handle, callback, 0, vdp)) != 0)
- err_ret = ret;
- beg = inp[i];
- switch (B_TYPE(bk->type)) {
- case B_DUPLICATE:
- end = beg + BOVERFLOW_SIZE - 1;
- /*
- * If we're not on a normal btree leaf page,
- * there shouldn't be off-page
- * dup sets. Something's confused; just
- * drop it, and the code to pick up unlinked
- * offpage dup sets will print it out
- * with key "UNKNOWN" later.
- */
- if (pgtype != P_LBTREE)
- break;
- bo = (BOVERFLOW *)bk;
- /*
- * If the page number is unreasonable, or
- * if this is supposed to be a key item,
- * just spit out "UNKNOWN"--the best we
- * can do is run into the data items in the
- * unlinked offpage dup pass.
- */
- if (!IS_VALID_PGNO(bo->pgno) ||
- (i % P_INDX == 0)) {
- /* Not much to do on failure. */
- if ((ret = __db_prdbt(&unkdbt, 0, " ",
- handle, callback, 0, vdp)) != 0)
- err_ret = ret;
- break;
- }
- if ((ret = __db_salvage_duptree(dbp,
- vdp, bo->pgno, &dbt, handle, callback,
- flags | SA_SKIPFIRSTKEY)) != 0)
- err_ret = ret;
- break;
- case B_KEYDATA:
- end =
- ALIGN(beg + bk->len, sizeof(u_int32_t)) - 1;
- dbt.data = bk->data;
- dbt.size = bk->len;
- if ((ret = __db_prdbt(&dbt,
- 0, " ", handle, callback, 0, vdp)) != 0)
- err_ret = ret;
- break;
- case B_OVERFLOW:
- end = beg + BOVERFLOW_SIZE - 1;
- bo = (BOVERFLOW *)bk;
- if ((ret = __db_safe_goff(dbp, vdp,
- bo->pgno, &dbt, &ovflbuf, flags)) != 0) {
- err_ret = ret;
- /* We care about err_ret more. */
- (void)__db_prdbt(&unkdbt, 0, " ",
- handle, callback, 0, vdp);
- break;
- }
- if ((ret = __db_prdbt(&dbt,
- 0, " ", handle, callback, 0, vdp)) != 0)
- err_ret = ret;
- break;
- default:
- /*
- * We should never get here; __db_vrfy_inpitem
- * should not be returning 0 if bk->type
- * is unrecognizable.
- */
- DB_ASSERT(0);
- return (EINVAL);
- }
- /*
- * If we're being aggressive, mark the beginning
- * and end of the item; we'll come back and print
- * whatever "junk" is in the gaps in case we had
- * any bogus inp elements and thereby missed stuff.
- */
- if (LF_ISSET(DB_AGGRESSIVE)) {
- pgmap[beg] = ITEM_BEGIN;
- pgmap[end] = ITEM_END;
- }
- }
- }
- /*
- * If i is odd and this is a btree leaf, we've printed out a key but not
- * a datum; fix this imbalance by printing an "UNKNOWN".
- */
- if (pgtype == P_LBTREE && (i % P_INDX == 1) && ((ret =
- __db_prdbt(&unkdbt, 0, " ", handle, callback, 0, vdp)) != 0))
- err_ret = ret;
- err: if (pgmap != NULL)
- __os_free(dbp->dbenv, pgmap);
- __os_free(dbp->dbenv, ovflbuf);
- /* Mark this page as done. */
- if ((t_ret = __db_salvage_markdone(vdp, pgno)) != 0)
- return (t_ret);
- return ((err_ret != 0) ? err_ret : ret);
- }
- /*
- * __bam_salvage_walkdupint --
- * Walk a known-good btree or recno internal page which is part of
- * a dup tree, calling __db_salvage_duptree on each child page.
- *
- * PUBLIC: int __bam_salvage_walkdupint __P((DB *, VRFY_DBINFO *, PAGE *,
- * PUBLIC: DBT *, void *, int (*)(void *, const void *), u_int32_t));
- */
- int
- __bam_salvage_walkdupint(dbp, vdp, h, key, handle, callback, flags)
- DB *dbp;
- VRFY_DBINFO *vdp;
- PAGE *h;
- DBT *key;
- void *handle;
- int (*callback) __P((void *, const void *));
- u_int32_t flags;
- {
- RINTERNAL *ri;
- BINTERNAL *bi;
- int ret, t_ret;
- db_indx_t i;
- ret = 0;
- for (i = 0; i < NUM_ENT(h); i++) {
- switch (TYPE(h)) {
- case P_IBTREE:
- bi = GET_BINTERNAL(dbp, h, i);
- if ((t_ret = __db_salvage_duptree(dbp,
- vdp, bi->pgno, key, handle, callback, flags)) != 0)
- ret = t_ret;
- break;
- case P_IRECNO:
- ri = GET_RINTERNAL(dbp, h, i);
- if ((t_ret = __db_salvage_duptree(dbp,
- vdp, ri->pgno, key, handle, callback, flags)) != 0)
- ret = t_ret;
- break;
- default:
- __db_err(dbp->dbenv,
- "__bam_salvage_walkdupint called on non-int. page");
- DB_ASSERT(0);
- return (EINVAL);
- }
- /* Pass SA_SKIPFIRSTKEY, if set, on to the 0th child only. */
- flags &= ~LF_ISSET(SA_SKIPFIRSTKEY);
- }
- return (ret);
- }
- /*
- * __bam_meta2pgset --
- * Given a known-good meta page, return in pgsetp a 0-terminated list of
- * db_pgno_t's corresponding to the pages in the btree.
- *
- * We do this by a somewhat sleazy method, to avoid having to traverse the
- * btree structure neatly: we walk down the left side to the very
- * first leaf page, then we mark all the pages in the chain of
- * NEXT_PGNOs (being wary of cycles and invalid ones), then we
- * consolidate our scratch array into a nice list, and return. This
- * avoids the memory management hassles of recursion and the
- * trouble of walking internal pages--they just don't matter, except
- * for the left branch.
- *
- * PUBLIC: int __bam_meta2pgset __P((DB *, VRFY_DBINFO *, BTMETA *,
- * PUBLIC: u_int32_t, DB *));
- */
- int
- __bam_meta2pgset(dbp, vdp, btmeta, flags, pgset)
- DB *dbp;
- VRFY_DBINFO *vdp;
- BTMETA *btmeta;
- u_int32_t flags;
- DB *pgset;
- {
- BINTERNAL *bi;
- DB_MPOOLFILE *mpf;
- PAGE *h;
- RINTERNAL *ri;
- db_pgno_t current, p;
- int err_ret, ret;
- mpf = dbp->mpf;
- h = NULL;
- ret = err_ret = 0;
- DB_ASSERT(pgset != NULL);
- for (current = btmeta->root;;) {
- if (!IS_VALID_PGNO(current) || current == PGNO(btmeta)) {
- err_ret = DB_VERIFY_BAD;
- goto err;
- }
- if ((ret = mpf->get(mpf, ¤t, 0, &h)) != 0) {
- err_ret = ret;
- goto err;
- }
- switch (TYPE(h)) {
- case P_IBTREE:
- case P_IRECNO:
- if ((ret = __bam_vrfy(dbp,
- vdp, h, current, flags | DB_NOORDERCHK)) != 0) {
- err_ret = ret;
- goto err;
- }
- if (TYPE(h) == P_IBTREE) {
- bi = GET_BINTERNAL(dbp, h, 0);
- current = bi->pgno;
- } else { /* P_IRECNO */
- ri = GET_RINTERNAL(dbp, h, 0);
- current = ri->pgno;
- }
- break;
- case P_LBTREE:
- case P_LRECNO:
- goto traverse;
- default:
- err_ret = DB_VERIFY_BAD;
- goto err;
- }
- if ((ret = mpf->put(mpf, h, 0)) != 0)
- err_ret = ret;
- h = NULL;
- }
- /*
- * At this point, current is the pgno of leaf page h, the 0th in the
- * tree we're concerned with.
- */
- traverse:
- while (IS_VALID_PGNO(current) && current != PGNO_INVALID) {
- if (h == NULL && (ret = mpf->get(mpf, ¤t, 0, &h)) != 0) {
- err_ret = ret;
- break;
- }
- if ((ret = __db_vrfy_pgset_get(pgset, current, (int *)&p)) != 0)
- goto err;
- if (p != 0) {
- /*
- * We've found a cycle. Return success anyway--
- * our caller may as well use however much of
- * the pgset we've come up with.
- */
- break;
- }
- if ((ret = __db_vrfy_pgset_inc(pgset, current)) != 0)
- goto err;
- current = NEXT_PGNO(h);
- if ((ret = mpf->put(mpf, h, 0)) != 0)
- err_ret = ret;
- h = NULL;
- }
- err: if (h != NULL)
- (void)mpf->put(mpf, h, 0);
- return (ret == 0 ? err_ret : ret);
- }
- /*
- * __bam_safe_getdata --
- *
- * Utility function for __bam_vrfy_itemorder. Safely gets the datum at
- * index i, page h, and sticks it in DBT dbt. If ovflok is 1 and i's an
- * overflow item, we do a safe_goff to get the item and signal that we need
- * to free dbt->data; if ovflok is 0, we leaves the DBT zeroed.
- */
- static int
- __bam_safe_getdata(dbp, h, i, ovflok, dbt, freedbtp)
- DB *dbp;
- PAGE *h;
- u_int32_t i;
- int ovflok;
- DBT *dbt;
- int *freedbtp;
- {
- BKEYDATA *bk;
- BOVERFLOW *bo;
- memset(dbt, 0, sizeof(DBT));
- *freedbtp = 0;
- bk = GET_BKEYDATA(dbp, h, i);
- if (B_TYPE(bk->type) == B_OVERFLOW) {
- if (!ovflok)
- return (0);
- bo = (BOVERFLOW *)bk;
- F_SET(dbt, DB_DBT_MALLOC);
- *freedbtp = 1;
- return (__db_goff(dbp, dbt, bo->tlen, bo->pgno, NULL, NULL));
- } else {
- dbt->data = bk->data;
- dbt->size = bk->len;
- }
- return (0);
- }