btr0btr.c
上传用户:tsgydb
上传日期:2007-04-14
资源大小:10674k
文件大小:63k
- /******************************************************
- The B-tree
- (c) 1994-1996 Innobase Oy
- Created 6/2/1994 Heikki Tuuri
- *******************************************************/
- #include "btr0btr.h"
- #ifdef UNIV_NONINL
- #include "btr0btr.ic"
- #endif
- #include "fsp0fsp.h"
- #include "page0page.h"
- #include "btr0cur.h"
- #include "btr0sea.h"
- #include "btr0pcur.h"
- #include "rem0cmp.h"
- #include "lock0lock.h"
- #include "ibuf0ibuf.h"
- /*
- Node pointers
- -------------
- Leaf pages of a B-tree contain the index records stored in the
- tree. On levels n > 0 we store 'node pointers' to pages on level
- n - 1. For each page there is exactly one node pointer stored:
- thus the our tree is an ordinary B-tree, not a B-link tree.
- A node pointer contains a prefix P of an index record. The prefix
- is long enough so that it determines an index record uniquely.
- The file page number of the child page is added as the last
- field. To the child page we can store node pointers or index records
- which are >= P in the alphabetical order, but < P1 if there is
- a next node pointer on the level, and P1 is its prefix.
- If a node pointer with a prefix P points to a non-leaf child,
- then the leftmost record in the child must have the same
- prefix P. If it points to a leaf node, the child is not required
- to contain any record with a prefix equal to P. The leaf case
- is decided this way to allow arbitrary deletions in a leaf node
- without touching upper levels of the tree.
- We have predefined a special minimum record which we
- define as the smallest record in any alphabetical order.
- A minimum record is denoted by setting a bit in the record
- header. A minimum record acts as the prefix of a node pointer
- which points to a leftmost node on any level of the tree.
- File page allocation
- --------------------
- In the root node of a B-tree there are two file segment headers.
- The leaf pages of a tree are allocated from one file segment, to
- make them consecutive on disk if possible. From the other file segment
- we allocate pages for the non-leaf levels of the tree.
- */
- /* If this many inserts occur sequentially, it affects page split */
- #define BTR_PAGE_SEQ_INSERT_LIMIT 5
- /******************************************************************
- Creates a new index page to the tree (not the root, and also not
- used in page reorganization). */
- static
- void
- btr_page_create(
- /*============*/
- page_t* page, /* in: page to be created */
- dict_tree_t* tree, /* in: index tree */
- mtr_t* mtr); /* in: mtr */
- /******************************************************************
- Allocates a new file page to be used in an index tree. */
- static
- page_t*
- btr_page_alloc(
- /*===========*/
- /* out: new allocated page,
- x-latched */
- dict_tree_t* tree, /* in: index tree */
- ulint hint_page_no, /* in: hint of a good page */
- byte file_direction, /* in: direction where a possible
- page split is made */
- ulint level, /* in: level where the page is placed
- in the tree */
- mtr_t* mtr); /* in: mtr */
- /******************************************************************
- Frees a file page used in an index tree. */
- static
- void
- btr_page_free(
- /*==========*/
- dict_tree_t* tree, /* in: index tree */
- page_t* page, /* in, own: page to be freed */
- mtr_t* mtr); /* in: mtr */
- /******************************************************************
- Sets the child node file address in a node pointer. */
- UNIV_INLINE
- void
- btr_node_ptr_set_child_page_no(
- /*===========================*/
- rec_t* rec, /* in: node pointer record */
- ulint page_no, /* in: child node address */
- mtr_t* mtr); /* in: mtr */
- /****************************************************************
- Returns the upper level node pointer to a page. It is assumed that
- mtr holds an x-latch on the tree. */
- static
- rec_t*
- btr_page_get_father_node_ptr(
- /*=========================*/
- /* out: pointer to node pointer record */
- dict_tree_t* tree, /* in: index tree */
- page_t* page, /* in: page: must contain at least one
- user record */
- mtr_t* mtr); /* in: mtr */
- /*****************************************************************
- Empties an index page. */
- static
- void
- btr_page_empty(
- /*===========*/
- page_t* page, /* in: page to be emptied */
- mtr_t* mtr); /* in: mtr */
- /*****************************************************************
- Returns TRUE if the insert fits on the appropriate half-page
- with the chosen split_rec. */
- static
- ibool
- btr_page_insert_fits(
- /*=================*/
- /* out: TRUE if fits */
- btr_cur_t* cursor, /* in: cursor at which insert
- should be made */
- rec_t* split_rec, /* in: suggestion for first record
- on upper half-page, or NULL if
- tuple should be first */
- dtuple_t* tuple); /* in: tuple to insert */
- /******************************************************************
- Gets the root node of a tree and x-latches it. */
- static
- page_t*
- btr_root_get(
- /*=========*/
- /* out: root page, x-latched */
- dict_tree_t* tree, /* in: index tree */
- mtr_t* mtr) /* in: mtr */
- {
- ulint space;
- ulint root_page_no;
- page_t* root;
- ut_ad(mtr_memo_contains(mtr, dict_tree_get_lock(tree), MTR_MEMO_X_LOCK)
- || mtr_memo_contains(mtr, dict_tree_get_lock(tree), MTR_MEMO_S_LOCK));
-
- space = dict_tree_get_space(tree);
- root_page_no = dict_tree_get_page(tree);
- root = btr_page_get(space, root_page_no, RW_X_LATCH, mtr);
-
- return(root);
- }
- /*****************************************************************
- Gets pointer to the previous user record in the tree. It is assumed that
- the caller has appropriate latches on the page and its neighbor. */
- rec_t*
- btr_get_prev_user_rec(
- /*==================*/
- /* out: previous user record, NULL if there is none */
- rec_t* rec, /* in: record on leaf level */
- mtr_t* mtr) /* in: mtr holding a latch on the page, and if
- needed, also to the previous page */
- {
- page_t* page;
- page_t* prev_page;
- ulint prev_page_no;
- rec_t* prev_rec;
- ulint space;
- page = buf_frame_align(rec);
-
- if (page_get_infimum_rec(page) != rec) {
- prev_rec = page_rec_get_prev(rec);
- if (page_get_infimum_rec(page) != prev_rec) {
- return(prev_rec);
- }
- }
-
- prev_page_no = btr_page_get_prev(page, mtr);
- space = buf_frame_get_space_id(page);
-
- if (prev_page_no != FIL_NULL) {
- prev_page = buf_page_get_with_no_latch(space, prev_page_no,
- mtr);
- /* The caller must already have a latch to the brother */
- ut_ad((mtr_memo_contains(mtr, buf_block_align(prev_page),
- MTR_MEMO_PAGE_S_FIX))
- || (mtr_memo_contains(mtr, buf_block_align(prev_page),
- MTR_MEMO_PAGE_X_FIX)));
- prev_rec = page_rec_get_prev(page_get_supremum_rec(prev_page));
- return(prev_rec);
- }
- return(NULL);
- }
- /*****************************************************************
- Gets pointer to the next user record in the tree. It is assumed that the
- caller has appropriate latches on the page and its neighbor. */
- rec_t*
- btr_get_next_user_rec(
- /*==================*/
- /* out: next user record, NULL if there is none */
- rec_t* rec, /* in: record on leaf level */
- mtr_t* mtr) /* in: mtr holding a latch on the page, and if
- needed, also to the next page */
- {
- page_t* page;
- page_t* next_page;
- ulint next_page_no;
- rec_t* next_rec;
- ulint space;
- page = buf_frame_align(rec);
-
- if (page_get_supremum_rec(page) != rec) {
- next_rec = page_rec_get_next(rec);
- if (page_get_supremum_rec(page) != next_rec) {
- return(next_rec);
- }
- }
-
- next_page_no = btr_page_get_next(page, mtr);
- space = buf_frame_get_space_id(page);
-
- if (next_page_no != FIL_NULL) {
- next_page = buf_page_get_with_no_latch(space, next_page_no,
- mtr);
- /* The caller must already have a latch to the brother */
- ut_ad((mtr_memo_contains(mtr, buf_block_align(next_page),
- MTR_MEMO_PAGE_S_FIX))
- || (mtr_memo_contains(mtr, buf_block_align(next_page),
- MTR_MEMO_PAGE_X_FIX)));
- next_rec = page_rec_get_next(page_get_infimum_rec(next_page));
- return(next_rec);
- }
- return(NULL);
- }
- /******************************************************************
- Creates a new index page to the tree (not the root, and also not used in
- page reorganization). */
- static
- void
- btr_page_create(
- /*============*/
- page_t* page, /* in: page to be created */
- dict_tree_t* tree, /* in: index tree */
- mtr_t* mtr) /* in: mtr */
- {
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
- page_create(page, mtr);
-
- btr_page_set_index_id(page, tree->id, mtr);
- }
- /******************************************************************
- Allocates a new file page to be used in an ibuf tree. Takes the page from
- the free list of the tree, which must contain pages! */
- static
- page_t*
- btr_page_alloc_for_ibuf(
- /*====================*/
- /* out: new allocated page, x-latched */
- dict_tree_t* tree, /* in: index tree */
- mtr_t* mtr) /* in: mtr */
- {
- fil_addr_t node_addr;
- page_t* root;
- page_t* new_page;
- root = btr_root_get(tree, mtr);
-
- node_addr = flst_get_first(root + PAGE_HEADER
- + PAGE_BTR_IBUF_FREE_LIST, mtr);
- ut_a(node_addr.page != FIL_NULL);
- new_page = buf_page_get(dict_tree_get_space(tree), node_addr.page,
- RW_X_LATCH, mtr);
- buf_page_dbg_add_level(new_page, SYNC_TREE_NODE_NEW);
- flst_remove(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
- new_page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE,
- mtr);
- ut_ad(flst_validate(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, mtr));
- return(new_page);
- }
- /******************************************************************
- Allocates a new file page to be used in an index tree. NOTE: we assume
- that the caller has made the reservation for free extents! */
- static
- page_t*
- btr_page_alloc(
- /*===========*/
- /* out: new allocated page, x-latched */
- dict_tree_t* tree, /* in: index tree */
- ulint hint_page_no, /* in: hint of a good page */
- byte file_direction, /* in: direction where a possible
- page split is made */
- ulint level, /* in: level where the page is placed
- in the tree */
- mtr_t* mtr) /* in: mtr */
- {
- fseg_header_t* seg_header;
- page_t* root;
- page_t* new_page;
- ulint new_page_no;
- ut_ad(mtr_memo_contains(mtr, dict_tree_get_lock(tree),
- MTR_MEMO_X_LOCK));
- if (tree->type & DICT_IBUF) {
- return(btr_page_alloc_for_ibuf(tree, mtr));
- }
- root = btr_root_get(tree, mtr);
-
- if (level == 0) {
- seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
- } else {
- seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
- }
- /* Parameter TRUE below states that the caller has made the
- reservation for free extents, and thus we know that a page can
- be allocated: */
-
- new_page_no = fseg_alloc_free_page_general(seg_header, hint_page_no,
- file_direction, TRUE, mtr);
- ut_a(new_page_no != FIL_NULL);
- new_page = buf_page_get(dict_tree_get_space(tree), new_page_no,
- RW_X_LATCH, mtr);
- buf_page_dbg_add_level(new_page, SYNC_TREE_NODE_NEW);
-
- return(new_page);
- }
- /******************************************************************
- Gets the number of pages in a B-tree. */
- ulint
- btr_get_size(
- /*=========*/
- /* out: number of pages */
- dict_index_t* index, /* in: index */
- ulint flag) /* in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */
- {
- fseg_header_t* seg_header;
- page_t* root;
- ulint n;
- ulint dummy;
- mtr_t mtr;
- mtr_start(&mtr);
- mtr_s_lock(dict_tree_get_lock(index->tree), &mtr);
- root = btr_root_get(index->tree, &mtr);
-
- if (flag == BTR_N_LEAF_PAGES) {
- seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
-
- fseg_n_reserved_pages(seg_header, &n, &mtr);
-
- } else if (flag == BTR_TOTAL_SIZE) {
- seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
- n = fseg_n_reserved_pages(seg_header, &dummy, &mtr);
-
- seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
-
- n += fseg_n_reserved_pages(seg_header, &dummy, &mtr);
- } else {
- ut_a(0);
- }
- mtr_commit(&mtr);
- return(n);
- }
- /******************************************************************
- Frees a page used in an ibuf tree. Puts the page to the free list of the
- ibuf tree. */
- static
- void
- btr_page_free_for_ibuf(
- /*===================*/
- dict_tree_t* tree, /* in: index tree */
- page_t* page, /* in: page to be freed, x-latched */
- mtr_t* mtr) /* in: mtr */
- {
- page_t* root;
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
- root = btr_root_get(tree, mtr);
-
- flst_add_first(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
- page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, mtr);
- ut_ad(flst_validate(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, mtr));
- }
- /******************************************************************
- Frees a file page used in an index tree. */
- static
- void
- btr_page_free(
- /*==========*/
- dict_tree_t* tree, /* in: index tree */
- page_t* page, /* in: page to be freed, x-latched */
- mtr_t* mtr) /* in: mtr */
- {
- fseg_header_t* seg_header;
- page_t* root;
- ulint space;
- ulint page_no;
- ulint level;
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
- /* The page gets invalid for optimistic searches: increment the frame
- modify clock */
- buf_frame_modify_clock_inc(page);
-
- if (tree->type & DICT_IBUF) {
- btr_page_free_for_ibuf(tree, page, mtr);
- return;
- }
- root = btr_root_get(tree, mtr);
- level = btr_page_get_level(page, mtr);
-
- if (level == 0) {
- seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
- } else {
- seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
- }
- space = buf_frame_get_space_id(page);
- page_no = buf_frame_get_page_no(page);
-
- fseg_free_page(seg_header, space, page_no, mtr);
- }
- /******************************************************************
- Sets the child node file address in a node pointer. */
- UNIV_INLINE
- void
- btr_node_ptr_set_child_page_no(
- /*===========================*/
- rec_t* rec, /* in: node pointer record */
- ulint page_no, /* in: child node address */
- mtr_t* mtr) /* in: mtr */
- {
- ulint n_fields;
- byte* field;
- ulint len;
- ut_ad(0 < btr_page_get_level(buf_frame_align(rec), mtr));
-
- n_fields = rec_get_n_fields(rec);
- /* The child address is in the last field */
- field = rec_get_nth_field(rec, n_fields - 1, &len);
- ut_ad(len == 4);
-
- mlog_write_ulint(field, page_no, MLOG_4BYTES, mtr);
- }
- /****************************************************************
- Returns the child page of a node pointer and x-latches it. */
- static
- page_t*
- btr_node_ptr_get_child(
- /*===================*/
- /* out: child page, x-latched */
- rec_t* node_ptr, /* in: node pointer */
- mtr_t* mtr) /* in: mtr */
- {
- ulint page_no;
- ulint space;
- page_t* page;
-
- space = buf_frame_get_space_id(node_ptr);
- page_no = btr_node_ptr_get_child_page_no(node_ptr);
- page = btr_page_get(space, page_no, RW_X_LATCH, mtr);
-
- return(page);
- }
- /****************************************************************
- Returns the upper level node pointer to a page. It is assumed that mtr holds
- an x-latch on the tree. */
- static
- rec_t*
- btr_page_get_father_for_rec(
- /*========================*/
- /* out: pointer to node pointer record,
- its page x-latched */
- dict_tree_t* tree, /* in: index tree */
- page_t* page, /* in: page: must contain at least one
- user record */
- rec_t* user_rec,/* in: user_record on page */
- mtr_t* mtr) /* in: mtr */
- {
- mem_heap_t* heap;
- dtuple_t* tuple;
- btr_cur_t cursor;
- rec_t* node_ptr;
- ut_ad(mtr_memo_contains(mtr, dict_tree_get_lock(tree),
- MTR_MEMO_X_LOCK));
- ut_ad(user_rec != page_get_supremum_rec(page));
- ut_ad(user_rec != page_get_infimum_rec(page));
-
- ut_ad(dict_tree_get_page(tree) != buf_frame_get_page_no(page));
- heap = mem_heap_create(100);
- tuple = dict_tree_build_node_ptr(tree, user_rec, 0, heap);
- /* In the following, we choose just any index from the tree as the
- first parameter for btr_cur_search_to_nth_level. */
-
- btr_cur_search_to_nth_level(UT_LIST_GET_FIRST(tree->tree_indexes),
- btr_page_get_level(page, mtr) + 1,
- tuple, PAGE_CUR_LE,
- BTR_CONT_MODIFY_TREE, &cursor, 0, mtr);
- node_ptr = btr_cur_get_rec(&cursor);
- ut_ad(btr_node_ptr_get_child_page_no(node_ptr) ==
- buf_frame_get_page_no(page));
- mem_heap_free(heap);
- return(node_ptr);
- }
- /****************************************************************
- Returns the upper level node pointer to a page. It is assumed that
- mtr holds an x-latch on the tree. */
- static
- rec_t*
- btr_page_get_father_node_ptr(
- /*=========================*/
- /* out: pointer to node pointer record */
- dict_tree_t* tree, /* in: index tree */
- page_t* page, /* in: page: must contain at least one
- user record */
- mtr_t* mtr) /* in: mtr */
- {
- return(btr_page_get_father_for_rec(tree, page,
- page_rec_get_next(page_get_infimum_rec(page)), mtr));
- }
- /****************************************************************
- Creates the root node for a new index tree. */
- ulint
- btr_create(
- /*=======*/
- /* out: page number of the created root, FIL_NULL if
- did not succeed */
- ulint type, /* in: type of the index */
- ulint space, /* in: space where created */
- dulint index_id,/* in: index id */
- mtr_t* mtr) /* in: mini-transaction handle */
- {
- ulint page_no;
- buf_frame_t* ibuf_hdr_frame;
- buf_frame_t* frame;
- page_t* page;
- /* Create the two new segments (one, in the case of an ibuf tree) for
- the index tree; the segment headers are put on the allocated root page
- (for an ibuf tree, not in the root, but on a separate ibuf header
- page) */
- if (type & DICT_IBUF) {
- /* Allocate first the ibuf header page */
- ibuf_hdr_frame = fseg_create(space, 0,
- IBUF_HEADER + IBUF_TREE_SEG_HEADER, mtr);
- buf_page_dbg_add_level(ibuf_hdr_frame, SYNC_TREE_NODE_NEW);
- ut_ad(buf_frame_get_page_no(ibuf_hdr_frame)
- == IBUF_HEADER_PAGE_NO);
- /* Allocate then the next page to the segment: it will be the
- tree root page */
- page_no = fseg_alloc_free_page(
- ibuf_hdr_frame + IBUF_HEADER
- + IBUF_TREE_SEG_HEADER, IBUF_TREE_ROOT_PAGE_NO,
- FSP_UP, mtr);
- ut_ad(page_no == IBUF_TREE_ROOT_PAGE_NO);
- frame = buf_page_get(space, page_no, RW_X_LATCH, mtr);
- } else {
- frame = fseg_create(space, 0, PAGE_HEADER + PAGE_BTR_SEG_TOP,
- mtr);
- }
-
- if (frame == NULL) {
- return(FIL_NULL);
- }
- page_no = buf_frame_get_page_no(frame);
-
- buf_page_dbg_add_level(frame, SYNC_TREE_NODE_NEW);
- if (type & DICT_IBUF) {
- /* It is an insert buffer tree: initialize the free list */
- ut_ad(page_no == IBUF_TREE_ROOT_PAGE_NO);
-
- flst_init(frame + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, mtr);
- } else {
- /* It is a non-ibuf tree: create a file segment for leaf
- pages */
- fseg_create(space, page_no, PAGE_HEADER + PAGE_BTR_SEG_LEAF,
- mtr);
- /* The fseg create acquires a second latch on the page,
- therefore we must declare it: */
- buf_page_dbg_add_level(frame, SYNC_TREE_NODE_NEW);
- }
-
- /* Create a new index page on the the allocated segment page */
- page = page_create(frame, mtr);
- /* Set the index id of the page */
- btr_page_set_index_id(page, index_id, mtr);
- /* Set the level of the new index page */
- btr_page_set_level(page, 0, mtr);
-
- /* Set the next node and previous node fields */
- btr_page_set_next(page, FIL_NULL, mtr);
- btr_page_set_prev(page, FIL_NULL, mtr);
- /* We reset the free bits for the page to allow creation of several
- trees in the same mtr, otherwise the latch on a bitmap page would
- prevent it because of the latching order */
-
- ibuf_reset_free_bits_with_type(type, page);
- /* In the following assertion we test that two records of maximum
- allowed size fit on the root page: this fact is needed to ensure
- correctness of split algorithms */
- ut_ad(page_get_max_insert_size(page, 2) > 2 * BTR_PAGE_MAX_REC_SIZE);
- return(page_no);
- }
- /****************************************************************
- Frees a B-tree except the root page, which MUST be freed after this
- by calling btr_free_root. */
- void
- btr_free_but_not_root(
- /*==================*/
- ulint space, /* in: space where created */
- ulint root_page_no) /* in: root page number */
- {
- ibool finished;
- page_t* root;
- mtr_t mtr;
- leaf_loop:
- mtr_start(&mtr);
-
- root = btr_page_get(space, root_page_no, RW_X_LATCH, &mtr);
- /* NOTE: page hash indexes are dropped when a page is freed inside
- fsp0fsp. */
- finished = fseg_free_step(
- root + PAGE_HEADER + PAGE_BTR_SEG_LEAF, &mtr);
- mtr_commit(&mtr);
- if (!finished) {
- goto leaf_loop;
- }
- top_loop:
- mtr_start(&mtr);
-
- root = btr_page_get(space, root_page_no, RW_X_LATCH, &mtr);
- finished = fseg_free_step_not_header(
- root + PAGE_HEADER + PAGE_BTR_SEG_TOP, &mtr);
- mtr_commit(&mtr);
- if (!finished) {
- goto top_loop;
- }
- }
- /****************************************************************
- Frees the B-tree root page. Other tree MUST already have been freed. */
- void
- btr_free_root(
- /*==========*/
- ulint space, /* in: space where created */
- ulint root_page_no, /* in: root page number */
- mtr_t* mtr) /* in: a mini-transaction which has already
- been started */
- {
- ibool finished;
- page_t* root;
- root = btr_page_get(space, root_page_no, RW_X_LATCH, mtr);
- btr_search_drop_page_hash_index(root);
- top_loop:
- finished = fseg_free_step(
- root + PAGE_HEADER + PAGE_BTR_SEG_TOP, mtr);
- if (!finished) {
- goto top_loop;
- }
- }
- /*****************************************************************
- Reorganizes an index page. */
- void
- btr_page_reorganize_low(
- /*====================*/
- ibool low, /* in: TRUE if locks should not be updated, i.e.,
- there cannot exist locks on the page */
- page_t* page, /* in: page to be reorganized */
- mtr_t* mtr) /* in: mtr */
- {
- page_t* new_page;
- ulint log_mode;
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
- /* Write the log record */
- mlog_write_initial_log_record(page, MLOG_PAGE_REORGANIZE, mtr);
- /* Turn logging off */
- log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
- new_page = buf_frame_alloc();
- /* Copy the old page to temporary space */
- buf_frame_copy(new_page, page);
- btr_search_drop_page_hash_index(page);
- /* Recreate the page: note that global data on page (possible
- segment headers, next page-field, etc.) is preserved intact */
- page_create(page, mtr);
-
- /* Copy the records from the temporary space to the recreated page;
- do not copy the lock bits yet */
- page_copy_rec_list_end_no_locks(page, new_page,
- page_get_infimum_rec(new_page), mtr);
- /* Copy max trx id to recreated page */
- page_set_max_trx_id(page, page_get_max_trx_id(new_page));
-
- if (!low) {
- /* Update the record lock bitmaps */
- lock_move_reorganize_page(page, new_page);
- }
- buf_frame_free(new_page);
- /* Restore logging mode */
- mtr_set_log_mode(mtr, log_mode);
- }
- /*****************************************************************
- Reorganizes an index page. */
- void
- btr_page_reorganize(
- /*================*/
- page_t* page, /* in: page to be reorganized */
- mtr_t* mtr) /* in: mtr */
- {
- btr_page_reorganize_low(FALSE, page, mtr);
- }
- /***************************************************************
- Parses a redo log record of reorganizing a page. */
- byte*
- btr_parse_page_reorganize(
- /*======================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr) /* in: mtr or NULL */
- {
- ut_ad(ptr && end_ptr);
- /* The record is empty, except for the record initial part */
- if (page) {
- btr_page_reorganize_low(TRUE, page, mtr);
- }
- return(ptr);
- }
- /*****************************************************************
- Empties an index page. */
- static
- void
- btr_page_empty(
- /*===========*/
- page_t* page, /* in: page to be emptied */
- mtr_t* mtr) /* in: mtr */
- {
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
- btr_search_drop_page_hash_index(page);
- /* Recreate the page: note that global data on page (possible
- segment headers, next page-field, etc.) is preserved intact */
- page_create(page, mtr);
- }
- /*****************************************************************
- Makes tree one level higher by splitting the root, and inserts
- the tuple. It is assumed that mtr contains an x-latch on the tree.
- NOTE that the operation of this function must always succeed,
- we cannot reverse it: therefore enough free disk space must be
- guaranteed to be available before this function is called. */
- rec_t*
- btr_root_raise_and_insert(
- /*======================*/
- /* out: inserted record */
- btr_cur_t* cursor, /* in: cursor at which to insert: must be
- on the root page; when the function returns,
- the cursor is positioned on the predecessor
- of the inserted record */
- dtuple_t* tuple, /* in: tuple to insert */
- mtr_t* mtr) /* in: mtr */
- {
- dict_tree_t* tree;
- page_t* root;
- page_t* new_page;
- ulint new_page_no;
- rec_t* rec;
- mem_heap_t* heap;
- dtuple_t* node_ptr;
- ulint level;
- rec_t* node_ptr_rec;
- page_cur_t* page_cursor;
-
- root = btr_cur_get_page(cursor);
- tree = btr_cur_get_tree(cursor);
- ut_ad(dict_tree_get_page(tree) == buf_frame_get_page_no(root));
- ut_ad(mtr_memo_contains(mtr, dict_tree_get_lock(tree),
- MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains(mtr, buf_block_align(root),
- MTR_MEMO_PAGE_X_FIX));
- btr_search_drop_page_hash_index(root);
- /* Allocate a new page to the tree. Root splitting is done by first
- moving the root records to the new page, emptying the root, putting
- a node pointer to the new page, and then splitting the new page. */
-
- new_page = btr_page_alloc(tree, 0, FSP_NO_DIR,
- btr_page_get_level(root, mtr), mtr);
- btr_page_create(new_page, tree, mtr);
- level = btr_page_get_level(root, mtr);
-
- /* Set the levels of the new index page and root page */
- btr_page_set_level(new_page, level, mtr);
- btr_page_set_level(root, level + 1, mtr);
-
- /* Set the next node and previous node fields of new page */
- btr_page_set_next(new_page, FIL_NULL, mtr);
- btr_page_set_prev(new_page, FIL_NULL, mtr);
- /* Move the records from root to the new page */
- page_move_rec_list_end(new_page, root, page_get_infimum_rec(root),
- mtr);
- /* If this is a pessimistic insert which is actually done to
- perform a pessimistic update then we have stored the lock
- information of the record to be inserted on the infimum of the
- root page: we cannot discard the lock structs on the root page */
-
- lock_update_root_raise(new_page, root);
- /* Create a memory heap where the node pointer is stored */
- heap = mem_heap_create(100);
- rec = page_rec_get_next(page_get_infimum_rec(new_page));
- new_page_no = buf_frame_get_page_no(new_page);
-
- /* Build the node pointer (= node key and page address) for the
- child */
- node_ptr = dict_tree_build_node_ptr(tree, rec, new_page_no, heap);
-
- /* Reorganize the root to get free space */
- btr_page_reorganize(root, mtr);
- page_cursor = btr_cur_get_page_cur(cursor);
-
- /* Insert node pointer to the root */
- page_cur_set_before_first(root, page_cursor);
- node_ptr_rec = page_cur_tuple_insert(page_cursor, node_ptr, mtr);
- ut_ad(node_ptr_rec);
- /* The node pointer must be marked as the predefined minimum record,
- as there is no lower alphabetical limit to records in the leftmost
- node of a level: */
- btr_set_min_rec_mark(node_ptr_rec, mtr);
-
- /* Free the memory heap */
- mem_heap_free(heap);
- /* We play safe and reset the free bits for the new page */
- /* printf("Root raise new page no %lun",
- buf_frame_get_page_no(new_page)); */
- ibuf_reset_free_bits(UT_LIST_GET_FIRST(tree->tree_indexes),
- new_page);
- /* Reposition the cursor to the child node */
- page_cur_search(new_page, tuple, PAGE_CUR_LE, page_cursor);
-
- /* Split the child and insert tuple */
- return(btr_page_split_and_insert(cursor, tuple, mtr));
- }
- /*****************************************************************
- Decides if the page should be split at the convergence point of inserts
- converging to the left. */
- ibool
- btr_page_get_split_rec_to_left(
- /*===========================*/
- /* out: TRUE if split recommended */
- btr_cur_t* cursor, /* in: cursor at which to insert */
- rec_t** split_rec) /* out: if split recommended,
- the first record on upper half page,
- or NULL if tuple to be inserted should
- be first */
- {
- page_t* page;
- rec_t* insert_point;
- rec_t* infimum;
- page = btr_cur_get_page(cursor);
- insert_point = btr_cur_get_rec(cursor);
- if ((page_header_get_ptr(page, PAGE_LAST_INSERT)
- == page_rec_get_next(insert_point))
- && (page_header_get_field(page, PAGE_DIRECTION) == PAGE_LEFT)
- && ((page_header_get_field(page, PAGE_N_DIRECTION)
- >= BTR_PAGE_SEQ_INSERT_LIMIT)
- || (page_header_get_field(page, PAGE_N_DIRECTION) + 1
- >= page_get_n_recs(page)))) {
- infimum = page_get_infimum_rec(page);
-
- if ((infimum != insert_point)
- && (page_rec_get_next(infimum) != insert_point)) {
- *split_rec = insert_point;
- } else {
- *split_rec = page_rec_get_next(insert_point);
- }
- return(TRUE);
- }
- return(FALSE);
- }
- /*****************************************************************
- Decides if the page should be split at the convergence point of inserts
- converging to the right. */
- ibool
- btr_page_get_split_rec_to_right(
- /*============================*/
- /* out: TRUE if split recommended */
- btr_cur_t* cursor, /* in: cursor at which to insert */
- rec_t** split_rec) /* out: if split recommended,
- the first record on upper half page,
- or NULL if tuple to be inserted should
- be first */
- {
- page_t* page;
- rec_t* insert_point;
- rec_t* supremum;
- page = btr_cur_get_page(cursor);
- insert_point = btr_cur_get_rec(cursor);
- if ((page_header_get_ptr(page, PAGE_LAST_INSERT) == insert_point)
- && (page_header_get_field(page, PAGE_DIRECTION) == PAGE_RIGHT)
- && ((page_header_get_field(page, PAGE_N_DIRECTION)
- >= BTR_PAGE_SEQ_INSERT_LIMIT)
- || (page_header_get_field(page, PAGE_N_DIRECTION) + 1
- >= page_get_n_recs(page)))) {
- supremum = page_get_supremum_rec(page);
-
- if ((page_rec_get_next(insert_point) != supremum)
- && (page_rec_get_next(page_rec_get_next(insert_point))
- != supremum)
- && (page_rec_get_next(page_rec_get_next(
- page_rec_get_next(insert_point)))
- != supremum)) {
- /* If there are >= 3 user records up from the insert
- point, split all but 2 off */
- *split_rec = page_rec_get_next(page_rec_get_next(
- page_rec_get_next(insert_point)));
- } else {
- /* Else split at inserted record */
- *split_rec = NULL;
- }
- return(TRUE);
- }
- return(FALSE);
- }
- /*****************************************************************
- Calculates a split record such that the tuple will certainly fit on
- its half-page when the split is performed. We assume in this function
- only that the cursor page has at least one user record. */
- static
- rec_t*
- btr_page_get_sure_split_rec(
- /*========================*/
- /* out: split record, or NULL if
- tuple will be the first record on
- upper half-page */
- btr_cur_t* cursor, /* in: cursor at which insert
- should be made */
- dtuple_t* tuple) /* in: tuple to insert */
- {
- page_t* page;
- ulint insert_size;
- ulint free_space;
- ulint total_data;
- ulint total_n_recs;
- ulint total_space;
- ulint incl_data;
- rec_t* ins_rec;
- rec_t* rec;
- rec_t* next_rec;
- ulint n;
-
- page = btr_cur_get_page(cursor);
-
- insert_size = rec_get_converted_size(tuple);
- free_space = page_get_free_space_of_empty();
- /* free_space is now the free space of a created new page */
- total_data = page_get_data_size(page) + insert_size;
- total_n_recs = page_get_n_recs(page) + 1;
- ut_ad(total_n_recs >= 2);
- total_space = total_data + page_dir_calc_reserved_space(total_n_recs);
- n = 0;
- incl_data = 0;
- ins_rec = btr_cur_get_rec(cursor);
- rec = page_get_infimum_rec(page);
- /* We start to include records to the left half, and when the
- space reserved by them exceeds half of total_space, then if
- the included records fit on the left page, they will be put there
- if something was left over also for the right page,
- otherwise the last included record will be the first on the right
- half page */
- for (;;) {
- /* Decide the next record to include */
- if (rec == ins_rec) {
- rec = NULL; /* NULL denotes that tuple is
- now included */
- } else if (rec == NULL) {
- rec = page_rec_get_next(ins_rec);
- } else {
- rec = page_rec_get_next(rec);
- }
- if (rec == NULL) {
- /* Include tuple */
- incl_data += insert_size;
- } else {
- incl_data += rec_get_size(rec);
- }
- n++;
-
- if (incl_data + page_dir_calc_reserved_space(n)
- >= total_space / 2) {
- if (incl_data + page_dir_calc_reserved_space(n)
- <= free_space) {
- /* The next record will be the first on
- the right half page if it is not the
- supremum record of page */
- if (rec == ins_rec) {
- next_rec = NULL;
- } else if (rec == NULL) {
- next_rec = page_rec_get_next(ins_rec);
- } else {
- next_rec = page_rec_get_next(rec);
- }
- if (next_rec != page_get_supremum_rec(page)) {
- return(next_rec);
- }
- }
- return(rec);
- }
- }
- }
- /*****************************************************************
- Returns TRUE if the insert fits on the appropriate half-page with the
- chosen split_rec. */
- static
- ibool
- btr_page_insert_fits(
- /*=================*/
- /* out: TRUE if fits */
- btr_cur_t* cursor, /* in: cursor at which insert
- should be made */
- rec_t* split_rec, /* in: suggestion for first record
- on upper half-page, or NULL if
- tuple to be inserted should be first */
- dtuple_t* tuple) /* in: tuple to insert */
- {
- page_t* page;
- ulint insert_size;
- ulint free_space;
- ulint total_data;
- ulint total_n_recs;
- rec_t* rec;
- rec_t* end_rec;
-
- page = btr_cur_get_page(cursor);
-
- insert_size = rec_get_converted_size(tuple);
- free_space = page_get_free_space_of_empty();
- /* free_space is now the free space of a created new page */
- total_data = page_get_data_size(page) + insert_size;
- total_n_recs = page_get_n_recs(page) + 1;
-
- /* We determine which records (from rec to end_rec, not including
- end_rec) will end up on the other half page from tuple when it is
- inserted. */
-
- if (split_rec == NULL) {
- rec = page_rec_get_next(page_get_infimum_rec(page));
- end_rec = page_rec_get_next(btr_cur_get_rec(cursor));
- } else if (cmp_dtuple_rec(tuple, split_rec) >= 0) {
- rec = page_rec_get_next(page_get_infimum_rec(page));
- end_rec = split_rec;
- } else {
- rec = split_rec;
- end_rec = page_get_supremum_rec(page);
- }
- if (total_data + page_dir_calc_reserved_space(total_n_recs)
- <= free_space) {
- /* Ok, there will be enough available space on the
- half page where the tuple is inserted */
- return(TRUE);
- }
- while (rec != end_rec) {
- /* In this loop we calculate the amount of reserved
- space after rec is removed from page. */
- total_data -= rec_get_size(rec);
- total_n_recs--;
- if (total_data + page_dir_calc_reserved_space(total_n_recs)
- <= free_space) {
- /* Ok, there will be enough available space on the
- half page where the tuple is inserted */
- return(TRUE);
- }
- rec = page_rec_get_next(rec);
- }
- return(FALSE);
- }
- /***********************************************************
- Inserts a data tuple to a tree on a non-leaf level. It is assumed
- that mtr holds an x-latch on the tree. */
- void
- btr_insert_on_non_leaf_level(
- /*=========================*/
- dict_tree_t* tree, /* in: tree */
- ulint level, /* in: level, must be > 0 */
- dtuple_t* tuple, /* in: the record to be inserted */
- mtr_t* mtr) /* in: mtr */
- {
- btr_cur_t cursor;
- ulint err;
- rec_t* rec;
- ut_ad(level > 0);
-
- /* In the following, choose just any index from the tree as the
- first parameter for btr_cur_search_to_nth_level. */
- btr_cur_search_to_nth_level(UT_LIST_GET_FIRST(tree->tree_indexes),
- level, tuple, PAGE_CUR_LE,
- BTR_CONT_MODIFY_TREE,
- &cursor, 0, mtr);
- err = btr_cur_pessimistic_insert(BTR_NO_LOCKING_FLAG
- | BTR_KEEP_SYS_FLAG
- | BTR_NO_UNDO_LOG_FLAG,
- &cursor, tuple,
- &rec, NULL, mtr);
- ut_a(err == DB_SUCCESS);
- }
- /******************************************************************
- Attaches the halves of an index page on the appropriate level in an
- index tree. */
- static
- void
- btr_attach_half_pages(
- /*==================*/
- dict_tree_t* tree, /* in: the index tree */
- page_t* page, /* in: page to be split */
- rec_t* split_rec, /* in: first record on upper
- half page */
- page_t* new_page, /* in: the new half page */
- ulint direction, /* in: FSP_UP or FSP_DOWN */
- mtr_t* mtr) /* in: mtr */
- {
- ulint space;
- rec_t* node_ptr;
- page_t* prev_page;
- page_t* next_page;
- ulint prev_page_no;
- ulint next_page_no;
- ulint level;
- page_t* lower_page;
- page_t* upper_page;
- ulint lower_page_no;
- ulint upper_page_no;
- dtuple_t* node_ptr_upper;
- mem_heap_t* heap;
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
- ut_ad(mtr_memo_contains(mtr, buf_block_align(new_page),
- MTR_MEMO_PAGE_X_FIX));
- /* Based on split direction, decide upper and lower pages */
- if (direction == FSP_DOWN) {
- lower_page_no = buf_frame_get_page_no(new_page);
- upper_page_no = buf_frame_get_page_no(page);
- lower_page = new_page;
- upper_page = page;
- /* Look from the tree for the node pointer to page */
- node_ptr = btr_page_get_father_node_ptr(tree, page, mtr);
- /* Replace the address of the old child node (= page) with the
- address of the new lower half */
- btr_node_ptr_set_child_page_no(node_ptr, lower_page_no, mtr);
- } else {
- lower_page_no = buf_frame_get_page_no(page);
- upper_page_no = buf_frame_get_page_no(new_page);
- lower_page = page;
- upper_page = new_page;
- }
-
- /* Create a memory heap where the data tuple is stored */
- heap = mem_heap_create(100);
- /* Get the level of the split pages */
- level = btr_page_get_level(page, mtr);
- /* Build the node pointer (= node key and page address) for the upper
- half */
- node_ptr_upper = dict_tree_build_node_ptr(tree, split_rec,
- upper_page_no, heap);
- /* Insert it next to the pointer to the lower half. Note that this
- may generate recursion leading to a split on the higher level. */
- btr_insert_on_non_leaf_level(tree, level + 1, node_ptr_upper, mtr);
-
- /* Free the memory heap */
- mem_heap_free(heap);
- /* Get the previous and next pages of page */
- prev_page_no = btr_page_get_prev(page, mtr);
- next_page_no = btr_page_get_next(page, mtr);
- space = buf_frame_get_space_id(page);
-
- /* Update page links of the level */
-
- if (prev_page_no != FIL_NULL) {
- prev_page = btr_page_get(space, prev_page_no, RW_X_LATCH, mtr);
- btr_page_set_next(prev_page, lower_page_no, mtr);
- }
- if (next_page_no != FIL_NULL) {
- next_page = btr_page_get(space, next_page_no, RW_X_LATCH, mtr);
- btr_page_set_prev(next_page, upper_page_no, mtr);
- }
-
- btr_page_set_prev(lower_page, prev_page_no, mtr);
- btr_page_set_next(lower_page, upper_page_no, mtr);
- btr_page_set_level(lower_page, level, mtr);
- btr_page_set_prev(upper_page, lower_page_no, mtr);
- btr_page_set_next(upper_page, next_page_no, mtr);
- btr_page_set_level(upper_page, level, mtr);
- }
- /*****************************************************************
- Splits an index page to halves and inserts the tuple. It is assumed
- that mtr holds an x-latch to the index tree. NOTE: the tree x-latch
- is released within this function! NOTE that the operation of this
- function must always succeed, we cannot reverse it: therefore
- enough free disk space must be guaranteed to be available before
- this function is called. */
- rec_t*
- btr_page_split_and_insert(
- /*======================*/
- /* out: inserted record; NOTE: the tree
- x-latch is released! NOTE: 2 free disk
- pages must be available! */
- btr_cur_t* cursor, /* in: cursor at which to insert; when the
- function returns, the cursor is positioned
- on the predecessor of the inserted record */
- dtuple_t* tuple, /* in: tuple to insert */
- mtr_t* mtr) /* in: mtr */
- {
- dict_tree_t* tree;
- page_t* page;
- ulint page_no;
- byte direction;
- ulint hint_page_no;
- page_t* new_page;
- rec_t* split_rec;
- page_t* left_page;
- page_t* right_page;
- page_t* insert_page;
- page_cur_t* page_cursor;
- rec_t* first_rec;
- byte* buf;
- rec_t* move_limit;
- ibool insert_will_fit;
- ulint n_iterations = 0;
- rec_t* rec;
- func_start:
- tree = btr_cur_get_tree(cursor);
-
- ut_ad(mtr_memo_contains(mtr, dict_tree_get_lock(tree),
- MTR_MEMO_X_LOCK));
- ut_ad(rw_lock_own(dict_tree_get_lock(tree), RW_LOCK_EX));
- page = btr_cur_get_page(cursor);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
- ut_ad(page_get_n_recs(page) >= 2);
-
- page_no = buf_frame_get_page_no(page);
- /* 1. Decide the split record; split_rec == NULL means that the
- tuple to be inserted should be the first record on the upper
- half-page */
- if (n_iterations > 0) {
- direction = FSP_UP;
- hint_page_no = page_no + 1;
- split_rec = btr_page_get_sure_split_rec(cursor, tuple);
-
- } else if (btr_page_get_split_rec_to_right(cursor, &split_rec)) {
- direction = FSP_UP;
- hint_page_no = page_no + 1;
- } else if (btr_page_get_split_rec_to_left(cursor, &split_rec)) {
- direction = FSP_DOWN;
- hint_page_no = page_no - 1;
- } else {
- direction = FSP_UP;
- hint_page_no = page_no + 1;
- split_rec = page_get_middle_rec(page);
- }
- /* 2. Allocate a new page to the tree */
- new_page = btr_page_alloc(tree, hint_page_no, direction,
- btr_page_get_level(page, mtr), mtr);
- btr_page_create(new_page, tree, mtr);
-
- /* 3. Calculate the first record on the upper half-page, and the
- first record (move_limit) on original page which ends up on the
- upper half */
- if (split_rec != NULL) {
- first_rec = split_rec;
- move_limit = split_rec;
- } else {
- buf = mem_alloc(rec_get_converted_size(tuple));
- first_rec = rec_convert_dtuple_to_rec(buf, tuple);
- move_limit = page_rec_get_next(btr_cur_get_rec(cursor));
- }
-
- /* 4. Do first the modifications in the tree structure */
- btr_attach_half_pages(tree, page, first_rec, new_page, direction, mtr);
- if (split_rec == NULL) {
- mem_free(buf);
- }
- /* If the split is made on the leaf level and the insert will fit
- on the appropriate half-page, we may release the tree x-latch.
- We can then move the records after releasing the tree latch,
- thus reducing the tree latch contention. */
- insert_will_fit = btr_page_insert_fits(cursor, split_rec, tuple);
-
- if (insert_will_fit && (btr_page_get_level(page, mtr) == 0)) {
- mtr_memo_release(mtr, dict_tree_get_lock(tree),
- MTR_MEMO_X_LOCK);
- }
- /* 5. Move then the records to the new page */
- if (direction == FSP_DOWN) {
- /* printf("Split leftn"); */
- page_move_rec_list_start(new_page, page, move_limit, mtr);
- left_page = new_page;
- right_page = page;
- lock_update_split_left(right_page, left_page);
- } else {
- /* printf("Split rightn"); */
- page_move_rec_list_end(new_page, page, move_limit, mtr);
- left_page = page;
- right_page = new_page;
- lock_update_split_right(right_page, left_page);
- }
- /* 6. The split and the tree modification is now completed. Decide the
- page where the tuple should be inserted */
- if (split_rec == NULL) {
- insert_page = right_page;
- } else if (cmp_dtuple_rec(tuple, first_rec) >= 0) {
- insert_page = right_page;
- } else {
- insert_page = left_page;
- }
- /* 7. Reposition the cursor for insert and try insertion */
- page_cursor = btr_cur_get_page_cur(cursor);
- page_cur_search(insert_page, tuple, PAGE_CUR_LE, page_cursor);
- rec = page_cur_tuple_insert(page_cursor, tuple, mtr);
- if (rec != NULL) {
- /* Insert fit on the page: update the free bits for the
- left and right pages in the same mtr */
- ibuf_update_free_bits_for_two_pages_low(cursor->index,
- left_page,
- right_page, mtr);
- /* printf("Split and insert done %lu %lun",
- buf_frame_get_page_no(left_page),
- buf_frame_get_page_no(right_page)); */
- return(rec);
- }
-
- /* 8. If insert did not fit, try page reorganization */
- btr_page_reorganize(insert_page, mtr);
- page_cur_search(insert_page, tuple, PAGE_CUR_LE, page_cursor);
- rec = page_cur_tuple_insert(page_cursor, tuple, mtr);
- if (rec == NULL) {
- /* The insert did not fit on the page: loop back to the
- start of the function for a new split */
-
- /* We play safe and reset the free bits for new_page */
- ibuf_reset_free_bits(cursor->index, new_page);
- /* printf("Split second round %lun",
- buf_frame_get_page_no(page)); */
- n_iterations++;
- ut_ad(n_iterations < 2);
- ut_ad(!insert_will_fit);
- goto func_start;
- }
- /* Insert fit on the page: update the free bits for the
- left and right pages in the same mtr */
- ibuf_update_free_bits_for_two_pages_low(cursor->index, left_page,
- right_page, mtr);
- /* printf("Split and insert done %lu %lun",
- buf_frame_get_page_no(left_page),
- buf_frame_get_page_no(right_page)); */
- ut_ad(page_validate(left_page, UT_LIST_GET_FIRST(tree->tree_indexes)));
- ut_ad(page_validate(right_page, UT_LIST_GET_FIRST(tree->tree_indexes)));
- return(rec);
- }
- /*****************************************************************
- Removes a page from the level list of pages. */
- static
- void
- btr_level_list_remove(
- /*==================*/
- dict_tree_t* tree, /* in: index tree */
- page_t* page, /* in: page to remove */
- mtr_t* mtr) /* in: mtr */
- {
- ulint space;
- ulint prev_page_no;
- page_t* prev_page;
- ulint next_page_no;
- page_t* next_page;
-
- ut_ad(tree && page && mtr);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
- /* Get the previous and next page numbers of page */
- prev_page_no = btr_page_get_prev(page, mtr);
- next_page_no = btr_page_get_next(page, mtr);
- space = buf_frame_get_space_id(page);
-
- /* Update page links of the level */
-
- if (prev_page_no != FIL_NULL) {
- prev_page = btr_page_get(space, prev_page_no, RW_X_LATCH, mtr);
- btr_page_set_next(prev_page, next_page_no, mtr);
- }
- if (next_page_no != FIL_NULL) {
- next_page = btr_page_get(space, next_page_no, RW_X_LATCH, mtr);
- btr_page_set_prev(next_page, prev_page_no, mtr);
- }
- }
-
- /********************************************************************
- Writes the redo log record for setting an index record as the predefined
- minimum record. */
- UNIV_INLINE
- void
- btr_set_min_rec_mark_log(
- /*=====================*/
- rec_t* rec, /* in: record */
- mtr_t* mtr) /* in: mtr */
- {
- mlog_write_initial_log_record(rec, MLOG_REC_MIN_MARK, mtr);
- /* Write rec offset as a 2-byte ulint */
- mlog_catenate_ulint(mtr, rec - buf_frame_align(rec), MLOG_2BYTES);
- }
- /********************************************************************
- Parses the redo log record for setting an index record as the predefined
- minimum record. */
- byte*
- btr_parse_set_min_rec_mark(
- /*=======================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr) /* in: mtr or NULL */
- {
- rec_t* rec;
- if (end_ptr < ptr + 2) {
- return(NULL);
- }
- if (page) {
- rec = page + mach_read_from_2(ptr);
- btr_set_min_rec_mark(rec, mtr);
- }
- return(ptr + 2);
- }
- /********************************************************************
- Sets a record as the predefined minimum record. */
- void
- btr_set_min_rec_mark(
- /*=================*/
- rec_t* rec, /* in: record */
- mtr_t* mtr) /* in: mtr */
- {
- ulint info_bits;
- info_bits = rec_get_info_bits(rec);
- rec_set_info_bits(rec, info_bits | REC_INFO_MIN_REC_FLAG);
- btr_set_min_rec_mark_log(rec, mtr);
- }
- /*****************************************************************
- Deletes on the upper level the node pointer to a page. */
- void
- btr_node_ptr_delete(
- /*================*/
- dict_tree_t* tree, /* in: index tree */
- page_t* page, /* in: page whose node pointer is deleted */
- mtr_t* mtr) /* in: mtr */
- {
- rec_t* node_ptr;
- btr_cur_t cursor;
- ibool compressed;
- ulint err;
-
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
- /* Delete node pointer on father page */
- node_ptr = btr_page_get_father_node_ptr(tree, page, mtr);
- btr_cur_position(UT_LIST_GET_FIRST(tree->tree_indexes), node_ptr,
- &cursor);
- compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor, mtr);
- ut_a(err == DB_SUCCESS);
- if (!compressed) {
- btr_cur_compress_if_useful(&cursor, mtr);
- }
- }
- /*****************************************************************
- If page is the only on its level, this function moves its records to the
- father page, thus reducing the tree height. */
- static
- void
- btr_lift_page_up(
- /*=============*/
- dict_tree_t* tree, /* in: index tree */
- page_t* page, /* in: page which is the only on its level;
- must not be empty: use
- btr_discard_only_page_on_level if the last
- record from the page should be removed */
- mtr_t* mtr) /* in: mtr */
- {
- rec_t* node_ptr;
- page_t* father_page;
- ulint page_level;
-
- ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL);
- ut_ad(btr_page_get_next(page, mtr) == FIL_NULL);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
- node_ptr = btr_page_get_father_node_ptr(tree, page, mtr);
- father_page = buf_frame_align(node_ptr);
-
- page_level = btr_page_get_level(page, mtr);
- btr_search_drop_page_hash_index(page);
-
- /* Make the father empty */
- btr_page_empty(father_page, mtr);
- /* Move records to the father */
- page_copy_rec_list_end(father_page, page, page_get_infimum_rec(page),
- mtr);
- lock_update_copy_and_discard(father_page, page);
- btr_page_set_level(father_page, page_level, mtr);
- /* Free the file page */
- btr_page_free(tree, page, mtr);
- /* We play safe and reset the free bits for the father */
- ibuf_reset_free_bits(UT_LIST_GET_FIRST(tree->tree_indexes),
- father_page);
- ut_ad(page_validate(father_page,
- UT_LIST_GET_FIRST(tree->tree_indexes)));
- ut_ad(btr_check_node_ptr(tree, father_page, mtr));
- }
- /*****************************************************************
- Tries to merge the page first to the left immediate brother if such a
- brother exists, and the node pointers to the current page and to the brother
- reside on the same page. If the left brother does not satisfy these
- conditions, looks at the right brother. If the page is the only one on that
- level lifts the records of the page to the father page, thus reducing the
- tree height. It is assumed that mtr holds an x-latch on the tree and on the
- page. If cursor is on the leaf level, mtr must also hold x-latches to the
- brothers, if they exist. NOTE: it is assumed that the caller has reserved
- enough free extents so that the compression will always succeed if done! */
- void
- btr_compress(
- /*=========*/
- btr_cur_t* cursor, /* in: cursor on the page to merge or lift;
- the page must not be empty: in record delete
- use btr_discard_page if the page would become
- empty */
- mtr_t* mtr) /* in: mtr */
- {
- dict_tree_t* tree;
- ulint space;
- ulint left_page_no;
- ulint right_page_no;
- page_t* merge_page;
- page_t* father_page;
- ibool is_left;
- page_t* page;
- rec_t* orig_pred;
- rec_t* orig_succ;
- rec_t* node_ptr;
- ulint data_size;
- ulint n_recs;
- ulint max_ins_size;
- ulint max_ins_size_reorg;
- ulint level;
-
- page = btr_cur_get_page(cursor);
- tree = btr_cur_get_tree(cursor);
- ut_ad(mtr_memo_contains(mtr, dict_tree_get_lock(tree),
- MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
- level = btr_page_get_level(page, mtr);
- space = dict_tree_get_space(tree);
- left_page_no = btr_page_get_prev(page, mtr);
- right_page_no = btr_page_get_next(page, mtr);
- /* printf("Merge left page %lu right %lu n", left_page_no,
- right_page_no); */
- node_ptr = btr_page_get_father_node_ptr(tree, page, mtr);
- father_page = buf_frame_align(node_ptr);
- /* Decide the page to which we try to merge and which will inherit
- the locks */
- if (left_page_no != FIL_NULL) {
- is_left = TRUE;
- merge_page = btr_page_get(space, left_page_no, RW_X_LATCH,
- mtr);
- } else if (right_page_no != FIL_NULL) {
- is_left = FALSE;
- merge_page = btr_page_get(space, right_page_no, RW_X_LATCH,
- mtr);
- } else {
- /* The page is the only one on the level, lift the records
- to the father */
- btr_lift_page_up(tree, page, mtr);
- return;
- }
-
- n_recs = page_get_n_recs(page);
- data_size = page_get_data_size(page);
- max_ins_size_reorg = page_get_max_insert_size_after_reorganize(
- merge_page, n_recs);
- if (data_size > max_ins_size_reorg) {
- /* No space for merge */
- return;
- }
- ut_ad(page_validate(merge_page, cursor->index));
- max_ins_size = page_get_max_insert_size(merge_page, n_recs);
-
- if (data_size > max_ins_size) {
- /* We have to reorganize merge_page */
- btr_page_reorganize(merge_page, mtr);
- ut_ad(page_validate(merge_page, cursor->index));
- ut_ad(page_get_max_insert_size(merge_page, n_recs)
- == max_ins_size_reorg);
- }
- btr_search_drop_page_hash_index(page);
- /* Remove the page from the level list */
- btr_level_list_remove(tree, page, mtr);
- if (is_left) {
- btr_node_ptr_delete(tree, page, mtr);
- } else {
- /* Replace the address of the old child node (= page) with the
- address of the merge page to the right */
- btr_node_ptr_set_child_page_no(node_ptr, right_page_no, mtr);
- btr_node_ptr_delete(tree, merge_page, mtr);
- }
-
- /* Move records to the merge page */
- if (is_left) {
- orig_pred = page_rec_get_prev(
- page_get_supremum_rec(merge_page));
- page_copy_rec_list_start(merge_page, page,
- page_get_supremum_rec(page), mtr);
- lock_update_merge_left(merge_page, orig_pred, page);
- } else {
- orig_succ = page_rec_get_next(
- page_get_infimum_rec(merge_page));
- page_copy_rec_list_end(merge_page, page,
- page_get_infimum_rec(page), mtr);
- lock_update_merge_right(orig_succ, page);
- }
- /* We have added new records to merge_page: update its free bits */
- ibuf_update_free_bits_if_full(cursor->index, merge_page,
- UNIV_PAGE_SIZE, ULINT_UNDEFINED);
-
- ut_ad(page_validate(merge_page, cursor->index));
- /* Free the file page */
- btr_page_free(tree, page, mtr);
- ut_ad(btr_check_node_ptr(tree, merge_page, mtr));
- }
- /*****************************************************************
- Discards a page that is the only page on its level. */
- static
- void
- btr_discard_only_page_on_level(
- /*===========================*/
- dict_tree_t* tree, /* in: index tree */
- page_t* page, /* in: page which is the only on its level */
- mtr_t* mtr) /* in: mtr */
- {
- rec_t* node_ptr;
- page_t* father_page;
- ulint page_level;
-
- ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL);
- ut_ad(btr_page_get_next(page, mtr) == FIL_NULL);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
- btr_search_drop_page_hash_index(page);
- node_ptr = btr_page_get_father_node_ptr(tree, page, mtr);
- father_page = buf_frame_align(node_ptr);
- page_level = btr_page_get_level(page, mtr);
- lock_update_discard(page_get_supremum_rec(father_page), page);
- btr_page_set_level(father_page, page_level, mtr);
- /* Free the file page */
- btr_page_free(tree, page, mtr);
- if (buf_frame_get_page_no(father_page) == dict_tree_get_page(tree)) {
- /* The father is the root page */
- btr_page_empty(father_page, mtr);
- /* We play safe and reset the free bits for the father */
- ibuf_reset_free_bits(UT_LIST_GET_FIRST(tree->tree_indexes),
- father_page);
- } else {
- ut_ad(page_get_n_recs(father_page) == 1);
- btr_discard_only_page_on_level(tree, father_page, mtr);
- }
- }
- /*****************************************************************
- Discards a page from a B-tree. This is used to remove the last record from
- a B-tree page: the whole page must be removed at the same time. This cannot
- be used for the root page, which is allowed to be empty. */
- void
- btr_discard_page(
- /*=============*/
- btr_cur_t* cursor, /* in: cursor on the page to discard: not on
- the root page */
- mtr_t* mtr) /* in: mtr */
- {
- dict_tree_t* tree;
- ulint space;
- ulint left_page_no;
- ulint right_page_no;
- page_t* merge_page;
- ibool is_left;
- page_t* page;
- rec_t* node_ptr;
-
- page = btr_cur_get_page(cursor);
- tree = btr_cur_get_tree(cursor);
- ut_ad(dict_tree_get_page(tree) != buf_frame_get_page_no(page));
- ut_ad(mtr_memo_contains(mtr, dict_tree_get_lock(tree),
- MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
- space = dict_tree_get_space(tree);
-
- /* Decide the page which will inherit the locks */
- left_page_no = btr_page_get_prev(page, mtr);
- right_page_no = btr_page_get_next(page, mtr);
- if (left_page_no != FIL_NULL) {
- is_left = TRUE;
- merge_page = btr_page_get(space, left_page_no, RW_X_LATCH,
- mtr);
- } else if (right_page_no != FIL_NULL) {
- is_left = FALSE;
- merge_page = btr_page_get(space, right_page_no, RW_X_LATCH,
- mtr);
- } else {
- btr_discard_only_page_on_level(tree, page, mtr);
- return;
- }
- btr_search_drop_page_hash_index(page);
-
- if ((left_page_no == FIL_NULL)
- && (btr_page_get_level(page, mtr) > 0)) {
- /* We have to mark the leftmost node pointer on the right
- side page as the predefined minimum record */
- node_ptr = page_rec_get_next(page_get_infimum_rec(merge_page));
- ut_ad(node_ptr != page_get_supremum_rec(merge_page));
- btr_set_min_rec_mark(node_ptr, mtr);
- }
-
- btr_node_ptr_delete(tree, page, mtr);
- /* Remove the page from the level list */
- btr_level_list_remove(tree, page, mtr);
- if (is_left) {
- lock_update_discard(page_get_supremum_rec(merge_page), page);
- } else {
- lock_update_discard(page_rec_get_next(
- page_get_infimum_rec(merge_page)), page);
- }
- /* Free the file page */
- btr_page_free(tree, page, mtr);
- ut_ad(btr_check_node_ptr(tree, merge_page, mtr));
- }
- /*****************************************************************
- Prints size info of a B-tree. */
- void
- btr_print_size(
- /*===========*/
- dict_tree_t* tree) /* in: index tree */
- {
- page_t* root;
- fseg_header_t* seg;
- mtr_t mtr;
- if (tree->type & DICT_IBUF) {
- printf(
- "Sorry, cannot print info of an ibuf tree: use ibuf functionsn");
- return;
- }
- mtr_start(&mtr);
-
- root = btr_root_get(tree, &mtr);
- seg = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
- printf("INFO OF THE NON-LEAF PAGE SEGMENTn");
- fseg_print(seg, &mtr);
- if (!(tree->type & DICT_UNIVERSAL)) {
- seg = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
- printf("INFO OF THE LEAF PAGE SEGMENTn");
- fseg_print(seg, &mtr);
- }
- mtr_commit(&mtr);
- }
- /****************************************************************
- Prints recursively index tree pages. */
- static
- void
- btr_print_recursive(
- /*================*/
- dict_tree_t* tree, /* in: index tree */
- page_t* page, /* in: index page */
- ulint width, /* in: print this many entries from start
- and end */
- mtr_t* mtr) /* in: mtr */
- {
- page_cur_t cursor;
- ulint n_recs;
- ulint i = 0;
- mtr_t mtr2;
- rec_t* node_ptr;
- page_t* child;
-
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
- printf("NODE ON LEVEL %lu page number %lun",
- btr_page_get_level(page, mtr), buf_frame_get_page_no(page));
-
- page_print(page, width, width);
-
- n_recs = page_get_n_recs(page);
-
- page_cur_set_before_first(page, &cursor);
- page_cur_move_to_next(&cursor);
- while (!page_cur_is_after_last(&cursor)) {
- if (0 == btr_page_get_level(page, mtr)) {
- /* If this is the leaf level, do nothing */
- } else if ((i <= width) || (i >= n_recs - width)) {
- mtr_start(&mtr2);
- node_ptr = page_cur_get_rec(&cursor);
- child = btr_node_ptr_get_child(node_ptr, &mtr2);
- btr_print_recursive(tree, child, width, &mtr2);
- mtr_commit(&mtr2);
- }
- page_cur_move_to_next(&cursor);
- i++;
- }
- }
- /******************************************************************
- Prints directories and other info of all nodes in the tree. */
- void
- btr_print_tree(
- /*===========*/
- dict_tree_t* tree, /* in: tree */
- ulint width) /* in: print this many entries from start
- and end */
- {
- mtr_t mtr;
- page_t* root;
- printf("--------------------------n");
- printf("INDEX TREE PRINTn");
- mtr_start(&mtr);
- root = btr_root_get(tree, &mtr);
- btr_print_recursive(tree, root, width, &mtr);
- mtr_commit(&mtr);
- btr_validate_tree(tree);
- }
- /****************************************************************
- Checks that the node pointer to a page is appropriate. */
- ibool
- btr_check_node_ptr(
- /*===============*/
- /* out: TRUE */
- dict_tree_t* tree, /* in: index tree */
- page_t* page, /* in: index page */
- mtr_t* mtr) /* in: mtr */
- {
- mem_heap_t* heap;
- rec_t* node_ptr;
- dtuple_t* node_ptr_tuple;
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
- if (dict_tree_get_page(tree) == buf_frame_get_page_no(page)) {
- return(TRUE);
- }
- node_ptr = btr_page_get_father_node_ptr(tree, page, mtr);
-
- if (btr_page_get_level(page, mtr) == 0) {
- return(TRUE);
- }
-
- heap = mem_heap_create(256);
-
- node_ptr_tuple = dict_tree_build_node_ptr(
- tree,
- page_rec_get_next(page_get_infimum_rec(page)),
- 0, heap);
-
- ut_a(cmp_dtuple_rec(node_ptr_tuple, node_ptr) == 0);
- mem_heap_free(heap);
- return(TRUE);
- }
- /****************************************************************
- Validates index tree level. */
- static
- void
- btr_validate_level(
- /*===============*/
- dict_tree_t* tree, /* in: index tree */
- ulint level) /* in: level number */
- {
- ulint space;
- mtr_t mtr;
- page_t* page;
- page_t* right_page;
- page_t* father_page;
- page_t* right_father_page;
- rec_t* node_ptr;
- rec_t* right_node_ptr;
- ulint right_page_no;
- ulint left_page_no;
- page_cur_t cursor;
- mem_heap_t* heap;
- dtuple_t* node_ptr_tuple;
- mtr_start(&mtr);
- page = btr_root_get(tree, &mtr);
- space = buf_frame_get_space_id(page);
- while (level != btr_page_get_level(page, &mtr)) {
- ut_a(btr_page_get_level(page, &mtr) > 0);
- page_cur_set_before_first(page, &cursor);
- page_cur_move_to_next(&cursor);
- node_ptr = page_cur_get_rec(&cursor);
- page = btr_node_ptr_get_child(node_ptr, &mtr);
- }
- /* Now we are on the desired level */
- loop:
- mtr_x_lock(dict_tree_get_lock(tree), &mtr);
- /* Check ordering of records */
- page_validate(page, UT_LIST_GET_FIRST(tree->tree_indexes));
- ut_a(btr_page_get_level(page, &mtr) == level);
- right_page_no = btr_page_get_next(page, &mtr);
- left_page_no = btr_page_get_prev(page, &mtr);
- ut_a((page_get_n_recs(page) > 0)
- || ((level == 0) &&
- (buf_frame_get_page_no(page) == dict_tree_get_page(tree))));
- if (right_page_no != FIL_NULL) {
- right_page = btr_page_get(space, right_page_no, RW_X_LATCH,
- &mtr);
- ut_a(cmp_rec_rec(page_rec_get_prev(page_get_supremum_rec(page)),
- page_rec_get_next(page_get_infimum_rec(right_page)),
- UT_LIST_GET_FIRST(tree->tree_indexes)) < 0);
- }
-
- if ((level > 0) && (left_page_no == FIL_NULL)) {
- ut_a(REC_INFO_MIN_REC_FLAG & rec_get_info_bits(
- page_rec_get_next(page_get_infimum_rec(page))));
- }
- if (buf_frame_get_page_no(page) != dict_tree_get_page(tree)) {
- /* Check father node pointers */
-
- node_ptr = btr_page_get_father_node_ptr(tree, page, &mtr);
- ut_a(node_ptr == btr_page_get_father_for_rec(tree, page,
- page_rec_get_prev(page_get_supremum_rec(page)), &mtr));
- father_page = buf_frame_align(node_ptr);
- if (btr_page_get_level(page, &mtr) > 0) {
- heap = mem_heap_create(256);
-
- node_ptr_tuple = dict_tree_build_node_ptr(
- tree,
- page_rec_get_next(
- page_get_infimum_rec(page)),
- 0, heap);
- ut_a(cmp_dtuple_rec(node_ptr_tuple, node_ptr) == 0);
- mem_heap_free(heap);
- }
- if (left_page_no == FIL_NULL) {
- ut_a(node_ptr == page_rec_get_next(
- page_get_infimum_rec(father_page)));
- ut_a(btr_page_get_prev(father_page, &mtr) == FIL_NULL);
- }
- if (right_page_no == FIL_NULL) {
- ut_a(node_ptr == page_rec_get_prev(
- page_get_supremum_rec(father_page)));
- ut_a(btr_page_get_next(father_page, &mtr) == FIL_NULL);
- }
- if (right_page_no != FIL_NULL) {
- right_node_ptr = btr_page_get_father_node_ptr(tree,
- right_page, &mtr);
- if (page_rec_get_next(node_ptr) !=
- page_get_supremum_rec(father_page)) {
- ut_a(right_node_ptr ==
- page_rec_get_next(node_ptr));
- } else {
- right_father_page = buf_frame_align(
- right_node_ptr);
-
- ut_a(right_node_ptr == page_rec_get_next(
- page_get_infimum_rec(
- right_father_page)));
- ut_a(buf_frame_get_page_no(right_father_page)
- == btr_page_get_next(father_page, &mtr));
- }
- }
- }
- mtr_commit(&mtr);
- if (right_page_no != FIL_NULL) {
- mtr_start(&mtr);
-
- page = btr_page_get(space, right_page_no, RW_X_LATCH, &mtr);
- goto loop;
- }
- }
- /******************************************************************
- Checks the consistency of an index tree. */
- void
- btr_validate_tree(
- /*==============*/
- dict_tree_t* tree) /* in: tree */
- {
- mtr_t mtr;
- page_t* root;
- ulint i;
- ulint n;
- mtr_start(&mtr);
- mtr_x_lock(dict_tree_get_lock(tree), &mtr);
- root = btr_root_get(tree, &mtr);
- n = btr_page_get_level(root, &mtr);
- for (i = 0; i <= n; i++) {
-
- btr_validate_level(tree, n - i);
- }
- mtr_commit(&mtr);
- }