bd_tree.cpp
上传用户:chinafayin
上传日期:2022-04-05
资源大小:153k
文件大小:16k
- //----------------------------------------------------------------------
- // File: bd_tree.cpp
- // Programmer: David Mount
- // Description: Basic methods for bd-trees.
- // Last modified: 01/04/05 (Version 1.0)
- //----------------------------------------------------------------------
- // Copyright (c) 1997-2005 University of Maryland and Sunil Arya and
- // David Mount. All Rights Reserved.
- //
- // This software and related documentation is part of the Approximate
- // Nearest Neighbor Library (ANN). This software is provided under
- // the provisions of the Lesser GNU Public License (LGPL). See the
- // file ../ReadMe.txt for further information.
- //
- // The University of Maryland (U.M.) and the authors make no
- // representations about the suitability or fitness of this software for
- // any purpose. It is provided "as is" without express or implied
- // warranty.
- //----------------------------------------------------------------------
- // History:
- // Revision 0.1 03/04/98
- // Initial release
- // Revision l.0 04/01/05
- // Fixed centroid shrink threshold condition to depend on the
- // dimension.
- // Moved dump routine to kd_dump.cpp.
- //----------------------------------------------------------------------
- #include "bd_tree.h" // bd-tree declarations
- #include "kd_util.h" // kd-tree utilities
- #include "kd_split.h" // kd-tree splitting rules
- #include "../ANNperf.h" // performance evaluation
- //----------------------------------------------------------------------
- // Printing a bd-tree
- // These routines print a bd-tree. See the analogous procedure
- // in kd_tree.cpp for more information.
- //----------------------------------------------------------------------
- void ANNbd_shrink::print( // print shrinking node
- int level, // depth of node in tree
- ostream &out) // output stream
- {
- child[ANN_OUT]->print(level+1, out); // print out-child
- out << " ";
- for (int i = 0; i < level; i++) // print indentation
- out << "..";
- out << "Shrink";
- for (int j = 0; j < n_bnds; j++) { // print sides, 2 per line
- if (j % 2 == 0) {
- out << "n"; // newline and indentation
- for (int i = 0; i < level+2; i++) out << " ";
- }
- out << " ([" << bnds[j].cd << "]"
- << (bnds[j].sd > 0 ? ">=" : "< ")
- << bnds[j].cv << ")";
- }
- out << "n";
- child[ANN_IN]->print(level+1, out); // print in-child
- }
- //----------------------------------------------------------------------
- // kd_tree statistics utility (for performance evaluation)
- // This routine computes various statistics information for
- // shrinking nodes. See file kd_tree.cpp for more information.
- //----------------------------------------------------------------------
- void ANNbd_shrink::getStats( // get subtree statistics
- int dim, // dimension of space
- ANNkdStats &st, // stats (modified)
- ANNorthRect &bnd_box) // bounding box
- {
- ANNkdStats ch_stats; // stats for children
- ANNorthRect inner_box(dim); // inner box of shrink
- annBnds2Box(bnd_box, // enclosing box
- dim, // dimension
- n_bnds, // number of bounds
- bnds, // bounds array
- inner_box); // inner box (modified)
- // get stats for inner child
- ch_stats.reset(); // reset
- child[ANN_IN]->getStats(dim, ch_stats, inner_box);
- st.merge(ch_stats); // merge them
- // get stats for outer child
- ch_stats.reset(); // reset
- child[ANN_OUT]->getStats(dim, ch_stats, bnd_box);
- st.merge(ch_stats); // merge them
- st.depth++; // increment depth
- st.n_shr++; // increment number of shrinks
- }
- //----------------------------------------------------------------------
- // bd-tree constructor
- // This is the main constructor for bd-trees given a set of points.
- // It first builds a skeleton kd-tree as a basis, then computes the
- // bounding box of the data points, and then invokes rbd_tree() to
- // actually build the tree, passing it the appropriate splitting
- // and shrinking information.
- //----------------------------------------------------------------------
- ANNkd_ptr rbd_tree( // recursive construction of bd-tree
- ANNpointArray pa, // point array
- ANNidxArray pidx, // point indices to store in subtree
- int n, // number of points
- int dim, // dimension of space
- int bsp, // bucket space
- ANNorthRect &bnd_box, // bounding box for current node
- ANNkd_splitter splitter, // splitting routine
- ANNshrinkRule shrink); // shrinking rule
- ANNbd_tree::ANNbd_tree( // construct from point array
- ANNpointArray pa, // point array (with at least n pts)
- int n, // number of points
- int dd, // dimension
- int bs, // bucket size
- ANNsplitRule split, // splitting rule
- ANNshrinkRule shrink) // shrinking rule
- : ANNkd_tree(n, dd, bs) // build skeleton base tree
- {
- pts = pa; // where the points are
- if (n == 0) return; // no points--no sweat
- ANNorthRect bnd_box(dd); // bounding box for points
- // construct bounding rectangle
- annEnclRect(pa, pidx, n, dd, bnd_box);
- // copy to tree structure
- bnd_box_lo = annCopyPt(dd, bnd_box.lo);
- bnd_box_hi = annCopyPt(dd, bnd_box.hi);
- switch (split) { // build by rule
- case ANN_KD_STD: // standard kd-splitting rule
- root = rbd_tree(pa, pidx, n, dd, bs, bnd_box, kd_split, shrink);
- break;
- case ANN_KD_MIDPT: // midpoint split
- root = rbd_tree(pa, pidx, n, dd, bs, bnd_box, midpt_split, shrink);
- break;
- case ANN_KD_SUGGEST: // best (in our opinion)
- case ANN_KD_SL_MIDPT: // sliding midpoint split
- root = rbd_tree(pa, pidx, n, dd, bs, bnd_box, sl_midpt_split, shrink);
- break;
- case ANN_KD_FAIR: // fair split
- root = rbd_tree(pa, pidx, n, dd, bs, bnd_box, fair_split, shrink);
- break;
- case ANN_KD_SL_FAIR: // sliding fair split
- root = rbd_tree(pa, pidx, n, dd, bs,
- bnd_box, sl_fair_split, shrink);
- break;
- default:
- annError("Illegal splitting method", ANNabort);
- }
- }
- //----------------------------------------------------------------------
- // Shrinking rules
- //----------------------------------------------------------------------
- enum ANNdecomp {SPLIT, SHRINK}; // decomposition methods
- //----------------------------------------------------------------------
- // trySimpleShrink - Attempt a simple shrink
- //
- // We compute the tight bounding box of the points, and compute
- // the 2*dim ``gaps'' between the sides of the tight box and the
- // bounding box. If any of the gaps is large enough relative to
- // the longest side of the tight bounding box, then we shrink
- // all sides whose gaps are large enough. (The reason for
- // comparing against the tight bounding box, is that after
- // shrinking the longest box size will decrease, and if we use
- // the standard bounding box, we may decide to shrink twice in
- // a row. Since the tight box is fixed, we cannot shrink twice
- // consecutively.)
- //----------------------------------------------------------------------
- const float BD_GAP_THRESH = 0.5; // gap threshold (must be < 1)
- const int BD_CT_THRESH = 2; // min number of shrink sides
- ANNdecomp trySimpleShrink( // try a simple shrink
- ANNpointArray pa, // point array
- ANNidxArray pidx, // point indices to store in subtree
- int n, // number of points
- int dim, // dimension of space
- const ANNorthRect &bnd_box, // current bounding box
- ANNorthRect &inner_box) // inner box if shrinking (returned)
- {
- int i;
- // compute tight bounding box
- annEnclRect(pa, pidx, n, dim, inner_box);
- ANNcoord max_length = 0; // find longest box side
- for (i = 0; i < dim; i++) {
- ANNcoord length = inner_box.hi[i] - inner_box.lo[i];
- if (length > max_length) {
- max_length = length;
- }
- }
- int shrink_ct = 0; // number of sides we shrunk
- for (i = 0; i < dim; i++) { // select which sides to shrink
- // gap between boxes
- ANNcoord gap_hi = bnd_box.hi[i] - inner_box.hi[i];
- // big enough gap to shrink?
- if (gap_hi < max_length*BD_GAP_THRESH)
- inner_box.hi[i] = bnd_box.hi[i]; // no - expand
- else shrink_ct++; // yes - shrink this side
- // repeat for high side
- ANNcoord gap_lo = inner_box.lo[i] - bnd_box.lo[i];
- if (gap_lo < max_length*BD_GAP_THRESH)
- inner_box.lo[i] = bnd_box.lo[i]; // no - expand
- else shrink_ct++; // yes - shrink this side
- }
- if (shrink_ct >= BD_CT_THRESH) // did we shrink enough sides?
- return SHRINK;
- else return SPLIT;
- }
- //----------------------------------------------------------------------
- // tryCentroidShrink - Attempt a centroid shrink
- //
- // We repeatedly apply the splitting rule, always to the larger subset
- // of points, until the number of points decreases by the constant
- // fraction BD_FRACTION. If this takes more than dim*BD_MAX_SPLIT_FAC
- // splits for this to happen, then we shrink to the final inner box
- // Otherwise we split.
- //----------------------------------------------------------------------
- const float BD_MAX_SPLIT_FAC = 0.5; // maximum number of splits allowed
- const float BD_FRACTION = 0.5; // ...to reduce points by this fraction
- // ...This must be < 1.
- ANNdecomp tryCentroidShrink( // try a centroid shrink
- ANNpointArray pa, // point array
- ANNidxArray pidx, // point indices to store in subtree
- int n, // number of points
- int dim, // dimension of space
- const ANNorthRect &bnd_box, // current bounding box
- ANNkd_splitter splitter, // splitting procedure
- ANNorthRect &inner_box) // inner box if shrinking (returned)
- {
- int n_sub = n; // number of points in subset
- int n_goal = (int) (n*BD_FRACTION); // number of point in goal
- int n_splits = 0; // number of splits needed
- // initialize inner box to bounding box
- annAssignRect(dim, inner_box, bnd_box);
- while (n_sub > n_goal) { // keep splitting until goal reached
- int cd; // cut dim from splitter (ignored)
- ANNcoord cv; // cut value from splitter (ignored)
- int n_lo; // number of points on low side
- // invoke splitting procedure
- (*splitter)(pa, pidx, inner_box, n_sub, dim, cd, cv, n_lo);
- n_splits++; // increment split count
- if (n_lo >= n_sub/2) { // most points on low side
- inner_box.hi[cd] = cv; // collapse high side
- n_sub = n_lo; // recurse on lower points
- }
- else { // most points on high side
- inner_box.lo[cd] = cv; // collapse low side
- pidx += n_lo; // recurse on higher points
- n_sub -= n_lo;
- }
- }
- if (n_splits > dim*BD_MAX_SPLIT_FAC)// took too many splits
- return SHRINK; // shrink to final subset
- else
- return SPLIT;
- }
- //----------------------------------------------------------------------
- // selectDecomp - select which decomposition to use
- //----------------------------------------------------------------------
- ANNdecomp selectDecomp( // select decomposition method
- ANNpointArray pa, // point array
- ANNidxArray pidx, // point indices to store in subtree
- int n, // number of points
- int dim, // dimension of space
- const ANNorthRect &bnd_box, // current bounding box
- ANNkd_splitter splitter, // splitting procedure
- ANNshrinkRule shrink, // shrinking rule
- ANNorthRect &inner_box) // inner box if shrinking (returned)
- {
- ANNdecomp decomp = SPLIT; // decomposition
- switch (shrink) { // check shrinking rule
- case ANN_BD_NONE: // no shrinking allowed
- decomp = SPLIT;
- break;
- case ANN_BD_SUGGEST: // author's suggestion
- case ANN_BD_SIMPLE: // simple shrink
- decomp = trySimpleShrink(
- pa, pidx, // points and indices
- n, dim, // number of points and dimension
- bnd_box, // current bounding box
- inner_box); // inner box if shrinking (returned)
- break;
- case ANN_BD_CENTROID: // centroid shrink
- decomp = tryCentroidShrink(
- pa, pidx, // points and indices
- n, dim, // number of points and dimension
- bnd_box, // current bounding box
- splitter, // splitting procedure
- inner_box); // inner box if shrinking (returned)
- break;
- default:
- annError("Illegal shrinking rule", ANNabort);
- }
- return decomp;
- }
- //----------------------------------------------------------------------
- // rbd_tree - recursive procedure to build a bd-tree
- //
- // This is analogous to rkd_tree, but for bd-trees. See the
- // procedure rkd_tree() in kd_split.cpp for more information.
- //
- // If the number of points falls below the bucket size, then a
- // leaf node is created for the points. Otherwise we invoke the
- // procedure selectDecomp() which determines whether we are to
- // split or shrink. If splitting is chosen, then we essentially
- // do exactly as rkd_tree() would, and invoke the specified
- // splitting procedure to the points. Otherwise, the selection
- // procedure returns a bounding box, from which we extract the
- // appropriate shrinking bounds, and create a shrinking node.
- // Finally the points are subdivided, and the procedure is
- // invoked recursively on the two subsets to form the children.
- //----------------------------------------------------------------------
- ANNkd_ptr rbd_tree( // recursive construction of bd-tree
- ANNpointArray pa, // point array
- ANNidxArray pidx, // point indices to store in subtree
- int n, // number of points
- int dim, // dimension of space
- int bsp, // bucket space
- ANNorthRect &bnd_box, // bounding box for current node
- ANNkd_splitter splitter, // splitting routine
- ANNshrinkRule shrink) // shrinking rule
- {
- ANNdecomp decomp; // decomposition method
- ANNorthRect inner_box(dim); // inner box (if shrinking)
- if (n <= bsp) { // n small, make a leaf node
- if (n == 0) // empty leaf node
- return KD_TRIVIAL; // return (canonical) empty leaf
- else // construct the node and return
- return new ANNkd_leaf(n, pidx);
- }
-
- decomp = selectDecomp( // select decomposition method
- pa, pidx, // points and indices
- n, dim, // number of points and dimension
- bnd_box, // current bounding box
- splitter, shrink, // splitting/shrinking methods
- inner_box); // inner box if shrinking (returned)
-
- if (decomp == SPLIT) { // split selected
- int cd; // cutting dimension
- ANNcoord cv; // cutting value
- int n_lo; // number on low side of cut
- // invoke splitting procedure
- (*splitter)(pa, pidx, bnd_box, n, dim, cd, cv, n_lo);
- ANNcoord lv = bnd_box.lo[cd]; // save bounds for cutting dimension
- ANNcoord hv = bnd_box.hi[cd];
- bnd_box.hi[cd] = cv; // modify bounds for left subtree
- ANNkd_ptr lo = rbd_tree( // build left subtree
- pa, pidx, n_lo, // ...from pidx[0..n_lo-1]
- dim, bsp, bnd_box, splitter, shrink);
- bnd_box.hi[cd] = hv; // restore bounds
- bnd_box.lo[cd] = cv; // modify bounds for right subtree
- ANNkd_ptr hi = rbd_tree( // build right subtree
- pa, pidx + n_lo, n-n_lo,// ...from pidx[n_lo..n-1]
- dim, bsp, bnd_box, splitter, shrink);
- bnd_box.lo[cd] = lv; // restore bounds
- // create the splitting node
- return new ANNkd_split(cd, cv, lv, hv, lo, hi);
- }
- else { // shrink selected
- int n_in; // number of points in box
- int n_bnds; // number of bounding sides
- annBoxSplit( // split points around inner box
- pa, // points to split
- pidx, // point indices
- n, // number of points
- dim, // dimension
- inner_box, // inner box
- n_in); // number of points inside (returned)
- ANNkd_ptr in = rbd_tree( // build inner subtree pidx[0..n_in-1]
- pa, pidx, n_in, dim, bsp, inner_box, splitter, shrink);
- ANNkd_ptr out = rbd_tree( // build outer subtree pidx[n_in..n]
- pa, pidx+n_in, n - n_in, dim, bsp, bnd_box, splitter, shrink);
- ANNorthHSArray bnds = NULL; // bounds (alloc in Box2Bnds and
- // ...freed in bd_shrink destroyer)
- annBox2Bnds( // convert inner box to bounds
- inner_box, // inner box
- bnd_box, // enclosing box
- dim, // dimension
- n_bnds, // number of bounds (returned)
- bnds); // bounds array (modified)
- // return shrinking node
- return new ANNbd_shrink(n_bnds, bnds, in, out);
- }
- }