fil0fil.c
上传用户:tsgydb
上传日期:2007-04-14
资源大小:10674k
文件大小:33k
- /******************************************************
- The low-level file system
- (c) 1995 Innobase Oy
- Created 10/25/1995 Heikki Tuuri
- *******************************************************/
- #include "fil0fil.h"
- #include "mem0mem.h"
- #include "sync0sync.h"
- #include "hash0hash.h"
- #include "os0file.h"
- #include "os0sync.h"
- #include "mach0data.h"
- #include "ibuf0ibuf.h"
- #include "buf0buf.h"
- #include "log0log.h"
- #include "log0recv.h"
- #include "fsp0fsp.h"
- /*
- IMPLEMENTATION OF THE LOW-LEVEL FILE SYSTEM
- ===========================================
- The file system is responsible for providing fast read/write access to
- tablespaces and logs of the database. File creation and deletion is done
- in other modules which know more of the logic of the operation, however.
- A tablespace consists of a chain of files. The size of the files does not
- have to be divisible by the database block size, because we may just leave
- the last incomplete block unused. When a new file is appended to the
- tablespace, the maximum size of the file is also specified. At the moment,
- we think that it is best to extend the file to its maximum size already at
- the creation of the file, because then we can avoid dynamically extending
- the file when more space is needed for the tablespace.
- A block's position in the tablespace is specified with a 32-bit unsigned
- integer. The files in the chain are thought to be catenated, and the block
- corresponding to an address n is the nth block in the catenated file (where
- the first block is named the 0th block, and the incomplete block fragments
- at the end of files are not taken into account). A tablespace can be extended
- by appending a new file at the end of the chain.
- Our tablespace concept is similar to the one of Oracle.
- To acquire more speed in disk transfers, a technique called disk striping is
- sometimes used. This means that logical block addresses are divided in a
- round-robin fashion across several disks. Windows NT supports disk striping,
- so there we do not need to support it in the database. Disk striping is
- implemented in hardware in RAID disks. We conclude that it is not necessary
- to implement it in the database. Oracle 7 does not support disk striping,
- either.
- Another trick used at some database sites is replacing tablespace files by
- raw disks, that is, the whole physical disk drive, or a partition of it, is
- opened as a single file, and it is accessed through byte offsets calculated
- from the start of the disk or the partition. This is recommended in some
- books on database tuning to achieve more speed in i/o. Using raw disk
- certainly prevents the OS from fragmenting disk space, but it is not clear
- if it really adds speed. We measured on the Pentium 100 MHz + NT + NTFS file
- system + EIDE Conner disk only a negligible difference in speed when reading
- from a file, versus reading from a raw disk.
- To have fast access to a tablespace or a log file, we put the data structures
- to a hash table. Each tablespace and log file is given an unique 32-bit
- identifier.
- Some operating systems do not support many open files at the same time,
- though NT seems to tolerate at least 900 open files. Therefore, we put the
- open files in an LRU-list. If we need to open another file, we may close the
- file at the end of the LRU-list. When an i/o-operation is pending on a file,
- the file cannot be closed. We take the file nodes with pending i/o-operations
- out of the LRU-list and keep a count of pending operations. When an operation
- completes, we decrement the count and return the file node to the LRU-list if
- the count drops to zero. */
- /* Null file address */
- fil_addr_t fil_addr_null = {FIL_NULL, 0};
- /* File system file node data structure */
- typedef struct fil_node_struct fil_node_t;
- struct fil_node_struct {
- char* name; /* the file name or path */
- ibool open; /* TRUE if file open */
- os_file_t handle; /* OS handle to the file, if file open */
- ulint size; /* size of the file in database blocks
- (where the possible last incomplete block
- is ignored) */
- ulint n_pending;
- /* count of pending i/o-ops on this file */
- UT_LIST_NODE_T(fil_node_t) chain;
- /* link field for the file chain */
- UT_LIST_NODE_T(fil_node_t) LRU;
- /* link field for the LRU list */
- ulint magic_n;
- };
- #define FIL_NODE_MAGIC_N 89389
- /* File system tablespace or log data structure: let us call them by a common
- name space */
- struct fil_space_struct {
- char* name; /* space name */
- ulint id; /* space id */
- ulint purpose;/* FIL_TABLESPACE, FIL_LOG, or FIL_ARCH_LOG */
- UT_LIST_BASE_NODE_T(fil_node_t) chain;
- /* base node for the file chain */
- ulint size; /* space size in pages */
- ulint n_reserved_extents;
- /* number of reserved free extents for
- ongoing operations like B-tree page split */
- hash_node_t hash; /* hash chain node */
- rw_lock_t latch; /* latch protecting the file space storage
- allocation */
- UT_LIST_NODE_T(fil_space_t) space_list;
- /* list of all spaces */
- ibuf_data_t* ibuf_data;
- /* insert buffer data */
- ulint magic_n;
- };
- #define FIL_SPACE_MAGIC_N 89472
- /* The file system data structure */
- typedef struct fil_system_struct fil_system_t;
- struct fil_system_struct {
- mutex_t mutex; /* The mutex protecting the system */
- hash_table_t* spaces; /* The hash table of spaces in the
- system */
- UT_LIST_BASE_NODE_T(fil_node_t) LRU;
- /* base node for the LRU list of the
- most recently used open files */
- ulint n_open_pending; /* current number of open files with
- pending i/o-ops on them */
- ulint max_n_open; /* maximum allowed open files */
- os_event_t can_open; /* this event is set to the signaled
- state when the system is capable of
- opening a new file, i.e.,
- n_open_pending < max_n_open */
- UT_LIST_BASE_NODE_T(fil_space_t) space_list;
- /* list of all file spaces */
- };
- /* The file system. This variable is NULL before the module is initialized. */
- fil_system_t* fil_system = NULL;
- /* The file system hash table size */
- #define FIL_SYSTEM_HASH_SIZE 500
- /***********************************************************************
- Reserves a right to open a single file. The right must be released with
- fil_release_right_to_open. */
- void
- fil_reserve_right_to_open(void)
- /*===========================*/
- {
- loop:
- mutex_enter(&(fil_system->mutex));
-
- if (fil_system->n_open_pending == fil_system->max_n_open) {
- /* It is not sure we can open the file if it is closed: wait */
- os_event_reset(fil_system->can_open);
- mutex_exit(&(fil_system->mutex));
- os_event_wait(fil_system->can_open);
- goto loop;
- }
- fil_system->max_n_open--;
- mutex_exit(&(fil_system->mutex));
- }
- /***********************************************************************
- Releases a right to open a single file. */
- void
- fil_release_right_to_open(void)
- /*===========================*/
- {
- mutex_enter(&(fil_system->mutex));
-
- if (fil_system->n_open_pending == fil_system->max_n_open) {
- os_event_set(fil_system->can_open);
- }
- fil_system->max_n_open++;
- mutex_exit(&(fil_system->mutex));
- }
- /***********************************************************************
- Returns the latch of a file space. */
- rw_lock_t*
- fil_space_get_latch(
- /*================*/
- /* out: latch protecting storage allocation */
- ulint id) /* in: space id */
- {
- fil_space_t* space;
- fil_system_t* system = fil_system;
- ut_ad(system);
- mutex_enter(&(system->mutex));
- HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
- mutex_exit(&(system->mutex));
- return(&(space->latch));
- }
- /***********************************************************************
- Returns the type of a file space. */
- ulint
- fil_space_get_type(
- /*===============*/
- /* out: FIL_TABLESPACE or FIL_LOG */
- ulint id) /* in: space id */
- {
- fil_space_t* space;
- fil_system_t* system = fil_system;
- ut_ad(system);
- mutex_enter(&(system->mutex));
- HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
- mutex_exit(&(system->mutex));
- return(space->purpose);
- }
- /***********************************************************************
- Returns the ibuf data of a file space. */
- ibuf_data_t*
- fil_space_get_ibuf_data(
- /*====================*/
- /* out: ibuf data for this space */
- ulint id) /* in: space id */
- {
- fil_space_t* space;
- fil_system_t* system = fil_system;
- ut_ad(system);
- mutex_enter(&(system->mutex));
- HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
- mutex_exit(&(system->mutex));
- return(space->ibuf_data);
- }
- /***********************************************************************
- Appends a new file to the chain of files of a space. File must be closed. */
- void
- fil_node_create(
- /*============*/
- char* name, /* in: file name (file must be closed) */
- ulint size, /* in: file size in database blocks, rounded downwards
- to an integer */
- ulint id) /* in: space id where to append */
- {
- fil_node_t* node;
- fil_space_t* space;
- char* name2;
- fil_system_t* system = fil_system;
- ut_a(system);
- ut_a(name);
- ut_a(size > 0);
- mutex_enter(&(system->mutex));
- node = mem_alloc(sizeof(fil_node_t));
- name2 = mem_alloc(ut_strlen(name) + 1);
- ut_strcpy(name2, name);
- node->name = name2;
- node->open = FALSE;
- node->size = size;
- node->magic_n = FIL_NODE_MAGIC_N;
- node->n_pending = 0;
-
- HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
- space->size += size;
- UT_LIST_ADD_LAST(chain, space->chain, node);
-
- mutex_exit(&(system->mutex));
- }
- /**************************************************************************
- Closes a file. */
- static
- void
- fil_node_close(
- /*===========*/
- fil_node_t* node, /* in: file node */
- fil_system_t* system) /* in: file system */
- {
- ibool ret;
- ut_ad(node && system);
- ut_ad(mutex_own(&(system->mutex)));
- ut_a(node->open);
- ut_a(node->n_pending == 0);
- ret = os_file_close(node->handle);
- ut_a(ret);
- node->open = FALSE;
- /* The node is in the LRU list, remove it */
- UT_LIST_REMOVE(LRU, system->LRU, node);
- }
- /***********************************************************************
- Frees a file node object from a file system. */
- static
- void
- fil_node_free(
- /*==========*/
- fil_node_t* node, /* in, own: file node */
- fil_system_t* system, /* in: file system */
- fil_space_t* space) /* in: space where the file node is chained */
- {
- ut_ad(node && system && space);
- ut_ad(mutex_own(&(system->mutex)));
- ut_a(node->magic_n == FIL_NODE_MAGIC_N);
- if (node->open) {
- fil_node_close(node, system);
- }
- space->size -= node->size;
-
- UT_LIST_REMOVE(chain, space->chain, node);
- mem_free(node->name);
- mem_free(node);
- }
- /********************************************************************
- Drops files from the start of a file space, so that its size is cut by
- the amount given. */
- void
- fil_space_truncate_start(
- /*=====================*/
- ulint id, /* in: space id */
- ulint trunc_len) /* in: truncate by this much; it is an error
- if this does not equal to the combined size of
- some initial files in the space */
- {
- fil_node_t* node;
- fil_space_t* space;
- fil_system_t* system = fil_system;
- mutex_enter(&(system->mutex));
- HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
- ut_a(space);
-
- while (trunc_len > 0) {
- node = UT_LIST_GET_FIRST(space->chain);
- ut_a(node->size * UNIV_PAGE_SIZE >= trunc_len);
- trunc_len -= node->size * UNIV_PAGE_SIZE;
- fil_node_free(node, system, space);
- }
-
- mutex_exit(&(system->mutex));
- }
- /********************************************************************
- Creates a file system object. */
- static
- fil_system_t*
- fil_system_create(
- /*==============*/
- /* out, own: file system object */
- ulint hash_size, /* in: hash table size */
- ulint max_n_open) /* in: maximum number of open files */
- {
- fil_system_t* system;
- ut_a(hash_size > 0);
- ut_a(max_n_open > 0);
- system = mem_alloc(sizeof(fil_system_t));
- mutex_create(&(system->mutex));
- mutex_set_level(&(system->mutex), SYNC_ANY_LATCH);
- system->spaces = hash_create(hash_size);
- UT_LIST_INIT(system->LRU);
- system->n_open_pending = 0;
- system->max_n_open = max_n_open;
- system->can_open = os_event_create(NULL);
- UT_LIST_INIT(system->space_list);
- return(system);
- }
- /********************************************************************
- Initializes the file system of this module. */
- void
- fil_init(
- /*=====*/
- ulint max_n_open) /* in: max number of open files */
- {
- ut_a(fil_system == NULL);
- fil_system = fil_system_create(FIL_SYSTEM_HASH_SIZE, max_n_open);
- }
- /********************************************************************
- Writes the flushed lsn to the header of each file space. */
- void
- fil_ibuf_init_at_db_start(void)
- /*===========================*/
- {
- fil_space_t* space;
- space = UT_LIST_GET_FIRST(fil_system->space_list);
-
- while (space) {
- if (space->purpose == FIL_TABLESPACE) {
- space->ibuf_data = ibuf_data_init_for_space(space->id);
- }
- space = UT_LIST_GET_NEXT(space_list, space);
- }
- }
- /********************************************************************
- Writes the flushed lsn and the latest archived log number to the page
- header of the first page of a data file. */
- static
- ulint
- fil_write_lsn_and_arch_no_to_file(
- /*==============================*/
- ulint space_id, /* in: space number */
- ulint sum_of_sizes, /* in: combined size of previous files in space,
- in database pages */
- dulint lsn, /* in: lsn to write */
- ulint arch_log_no) /* in: archived log number to write */
- {
- byte* buf1;
- byte* buf;
- buf1 = mem_alloc(2 * UNIV_PAGE_SIZE);
- buf = ut_align(buf1, UNIV_PAGE_SIZE);
- fil_read(TRUE, space_id, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL);
- mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN, lsn);
- mach_write_to_4(buf + FIL_PAGE_ARCH_LOG_NO, arch_log_no);
- fil_write(TRUE, space_id, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL);
- return(DB_SUCCESS);
- }
- /********************************************************************
- Writes the flushed lsn and the latest archived log number to the page
- header of the first page of each data file. */
- ulint
- fil_write_flushed_lsn_to_data_files(
- /*================================*/
- /* out: DB_SUCCESS or error number */
- dulint lsn, /* in: lsn to write */
- ulint arch_log_no) /* in: latest archived log file number */
- {
- fil_space_t* space;
- fil_node_t* node;
- ulint sum_of_sizes;
- ulint err;
- mutex_enter(&(fil_system->mutex));
-
- space = UT_LIST_GET_FIRST(fil_system->space_list);
-
- while (space) {
- if (space->purpose == FIL_TABLESPACE) {
- sum_of_sizes = 0;
- node = UT_LIST_GET_FIRST(space->chain);
- while (node) {
- mutex_exit(&(fil_system->mutex));
- err = fil_write_lsn_and_arch_no_to_file(
- space->id,
- sum_of_sizes,
- lsn, arch_log_no);
- if (err != DB_SUCCESS) {
- return(err);
- }
- mutex_enter(&(fil_system->mutex));
- sum_of_sizes += node->size;
- node = UT_LIST_GET_NEXT(chain, node);
- }
- }
- space = UT_LIST_GET_NEXT(space_list, space);
- }
- mutex_exit(&(fil_system->mutex));
- return(DB_SUCCESS);
- }
- /***********************************************************************
- Reads the flushed lsn and arch no fields from a data file at database
- startup. */
- void
- fil_read_flushed_lsn_and_arch_log_no(
- /*=================================*/
- os_file_t data_file, /* in: open data file */
- ibool one_read_already, /* in: TRUE if min and max parameters
- below already contain sensible data */
- dulint* min_flushed_lsn, /* in/out: */
- ulint* min_arch_log_no, /* in/out: */
- dulint* max_flushed_lsn, /* in/out: */
- ulint* max_arch_log_no) /* in/out: */
- {
- byte* buf;
- dulint flushed_lsn;
- ulint arch_log_no;
- buf = ut_malloc(UNIV_PAGE_SIZE);
- os_file_read(data_file, buf, 0, 0, UNIV_PAGE_SIZE);
- flushed_lsn = mach_read_from_8(buf + FIL_PAGE_FILE_FLUSH_LSN);
- arch_log_no = mach_read_from_4(buf + FIL_PAGE_ARCH_LOG_NO);
- ut_free(buf);
- if (!one_read_already) {
- *min_flushed_lsn = flushed_lsn;
- *max_flushed_lsn = flushed_lsn;
- *min_arch_log_no = arch_log_no;
- *max_arch_log_no = arch_log_no;
- return;
- }
- if (ut_dulint_cmp(*min_flushed_lsn, flushed_lsn) > 0) {
- *min_flushed_lsn = flushed_lsn;
- }
- if (ut_dulint_cmp(*max_flushed_lsn, flushed_lsn) < 0) {
- *max_flushed_lsn = flushed_lsn;
- }
- if (*min_arch_log_no > arch_log_no) {
- *min_arch_log_no = arch_log_no;
- }
- if (*max_arch_log_no < arch_log_no) {
- *max_arch_log_no = arch_log_no;
- }
- }
- /***********************************************************************
- Creates a space object and puts it to the file system. */
- void
- fil_space_create(
- /*=============*/
- char* name, /* in: space name */
- ulint id, /* in: space id */
- ulint purpose)/* in: FIL_TABLESPACE, or FIL_LOG if log */
- {
- fil_space_t* space;
- char* name2;
- fil_system_t* system = fil_system;
-
- ut_a(system);
- ut_a(name);
- #ifndef UNIV_BASIC_LOG_DEBUG
- /* Spaces with an odd id number are reserved to replicate spaces
- used in log debugging */
-
- ut_a((purpose == FIL_LOG) || (id % 2 == 0));
- #endif
- mutex_enter(&(system->mutex));
- space = mem_alloc(sizeof(fil_space_t));
- name2 = mem_alloc(ut_strlen(name) + 1);
- ut_strcpy(name2, name);
- space->name = name2;
- space->id = id;
- space->purpose = purpose;
- space->size = 0;
- space->n_reserved_extents = 0;
-
- UT_LIST_INIT(space->chain);
- space->magic_n = FIL_SPACE_MAGIC_N;
- space->ibuf_data = NULL;
-
- rw_lock_create(&(space->latch));
- rw_lock_set_level(&(space->latch), SYNC_FSP);
-
- HASH_INSERT(fil_space_t, hash, system->spaces, id, space);
- UT_LIST_ADD_LAST(space_list, system->space_list, space);
-
- mutex_exit(&(system->mutex));
- }
- /***********************************************************************
- Frees a space object from a file system. Closes the files in the chain
- but does not delete them. */
- void
- fil_space_free(
- /*===========*/
- ulint id) /* in: space id */
- {
- fil_space_t* space;
- fil_node_t* fil_node;
- fil_system_t* system = fil_system;
-
- mutex_enter(&(system->mutex));
- HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
- HASH_DELETE(fil_space_t, hash, system->spaces, id, space);
- UT_LIST_REMOVE(space_list, system->space_list, space);
- ut_a(space->magic_n == FIL_SPACE_MAGIC_N);
- fil_node = UT_LIST_GET_FIRST(space->chain);
- ut_d(UT_LIST_VALIDATE(chain, fil_node_t, space->chain));
- while (fil_node != NULL) {
- fil_node_free(fil_node, system, space);
- fil_node = UT_LIST_GET_FIRST(space->chain);
- }
-
- ut_d(UT_LIST_VALIDATE(chain, fil_node_t, space->chain));
- ut_ad(0 == UT_LIST_GET_LEN(space->chain));
- mutex_exit(&(system->mutex));
- mem_free(space->name);
- mem_free(space);
- }
- /***********************************************************************
- Returns the size of the space in pages. */
- ulint
- fil_space_get_size(
- /*===============*/
- /* out: space size */
- ulint id) /* in: space id */
- {
- fil_space_t* space;
- fil_system_t* system = fil_system;
- ulint size;
- ut_ad(system);
- mutex_enter(&(system->mutex));
- HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
- size = space->size;
-
- mutex_exit(&(system->mutex));
- return(size);
- }
- /***********************************************************************
- Tries to reserve free extents in a file space. */
- ibool
- fil_space_reserve_free_extents(
- /*===========================*/
- /* out: TRUE if succeed */
- ulint id, /* in: space id */
- ulint n_free_now, /* in: number of free extents now */
- ulint n_to_reserve) /* in: how many one wants to reserve */
- {
- fil_space_t* space;
- fil_system_t* system = fil_system;
- ibool success;
- ut_ad(system);
- mutex_enter(&(system->mutex));
- HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
- if (space->n_reserved_extents + n_to_reserve > n_free_now) {
- success = FALSE;
- } else {
- space->n_reserved_extents += n_to_reserve;
- success = TRUE;
- }
-
- mutex_exit(&(system->mutex));
- return(success);
- }
- /***********************************************************************
- Releases free extents in a file space. */
- void
- fil_space_release_free_extents(
- /*===========================*/
- ulint id, /* in: space id */
- ulint n_reserved) /* in: how many one reserved */
- {
- fil_space_t* space;
- fil_system_t* system = fil_system;
- ut_ad(system);
- mutex_enter(&(system->mutex));
- HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
- ut_a(space->n_reserved_extents >= n_reserved);
-
- space->n_reserved_extents -= n_reserved;
-
- mutex_exit(&(system->mutex));
- }
- /************************************************************************
- Prepares a file node for i/o. Opens the file if it is closed. Updates the
- pending i/o's field in the node and the system appropriately. Takes the node
- off the LRU list if it is in the LRU list. */
- static
- void
- fil_node_prepare_for_io(
- /*====================*/
- fil_node_t* node, /* in: file node */
- fil_system_t* system, /* in: file system */
- fil_space_t* space) /* in: space */
- {
- ibool ret;
- fil_node_t* last_node;
- ut_ad(node && system && space);
- ut_ad(mutex_own(&(system->mutex)));
-
- if (node->open == FALSE) {
- /* File is closed */
- ut_a(node->n_pending == 0);
- /* If too many files are open, close one */
- if (system->n_open_pending + UT_LIST_GET_LEN(system->LRU)
- == system->max_n_open) {
- ut_a(UT_LIST_GET_LEN(system->LRU) > 0);
- last_node = UT_LIST_GET_LAST(system->LRU);
- fil_node_close(last_node, system);
- }
- node->handle = os_file_create(node->name, OS_FILE_OPEN,
- OS_FILE_AIO, &ret);
- ut_a(ret);
-
- node->open = TRUE;
- system->n_open_pending++;
- node->n_pending = 1;
- /* File was closed: the node was not in the LRU list */
- return;
- }
- /* File is open */
- if (node->n_pending == 0) {
- /* The node is in the LRU list, remove it */
- UT_LIST_REMOVE(LRU, system->LRU, node);
- system->n_open_pending++;
- node->n_pending = 1;
- } else {
- /* There is already a pending i/o-op on the file: the node is
- not in the LRU list */
- node->n_pending++;
- }
- }
- /************************************************************************
- Updates the data structures when an i/o operation finishes. Updates the
- pending i/os field in the node and the system appropriately. Puts the node
- in the LRU list if there are no other pending i/os. */
- static
- void
- fil_node_complete_io(
- /*=================*/
- fil_node_t* node, /* in: file node */
- fil_system_t* system) /* in: file system */
- {
- ut_ad(node);
- ut_ad(system);
- ut_ad(mutex_own(&(system->mutex)));
- ut_a(node->n_pending > 0);
-
- node->n_pending--;
- if (node->n_pending == 0) {
- /* The node must be put back to the LRU list */
- UT_LIST_ADD_FIRST(LRU, system->LRU, node);
- ut_a(system->n_open_pending > 0);
- system->n_open_pending--;
- if (system->n_open_pending == system->max_n_open - 1) {
- os_event_set(system->can_open);
- }
- }
- }
-
- /************************************************************************
- Reads or writes data. This operation is asynchronous (aio). */
- void
- fil_io(
- /*===*/
- ulint type, /* in: OS_FILE_READ or OS_FILE_WRITE,
- ORed to OS_FILE_LOG, if a log i/o
- and ORed to OS_AIO_SIMULATED_WAKE_LATER
- if simulated aio and we want to post a
- batch of i/os; NOTE that a simulated batch
- may introduce hidden chances of deadlocks,
- because i/os are not actually handled until
- all have been posted: use with great
- caution! */
- ibool sync, /* in: TRUE if synchronous aio is desired */
- ulint space_id, /* in: space id */
- ulint block_offset, /* in: offset in number of blocks */
- ulint byte_offset, /* in: remainder of offset in bytes; in
- aio this must be divisible by the OS block
- size */
- ulint len, /* in: how many bytes to read; this must
- not cross a file boundary; in aio this must
- be a block size multiple */
- void* buf, /* in/out: buffer where to store read data
- or from where to write; in aio this must be
- appropriately aligned */
- void* message) /* in: message for aio handler if non-sync
- aio used, else ignored */
- {
- ulint mode;
- fil_space_t* space;
- fil_node_t* node;
- ulint offset_high;
- ulint offset_low;
- fil_system_t* system;
- os_event_t event;
- ibool ret;
- ulint is_log;
- ulint wake_later;
- is_log = type & OS_FILE_LOG;
- type = type & ~OS_FILE_LOG;
- wake_later = type & OS_AIO_SIMULATED_WAKE_LATER;
- type = type & ~OS_AIO_SIMULATED_WAKE_LATER;
-
- ut_ad(byte_offset < UNIV_PAGE_SIZE);
- ut_ad(buf);
- ut_ad(len > 0);
- ut_ad((1 << UNIV_PAGE_SIZE_SHIFT) == UNIV_PAGE_SIZE);
- ut_ad(fil_validate());
- #ifndef UNIV_LOG_DEBUG
- /* ibuf bitmap pages must be read in the sync aio mode: */
- ut_ad(recv_no_ibuf_operations || (type == OS_FILE_WRITE)
- || !ibuf_bitmap_page(block_offset) || sync || is_log);
- #ifdef UNIV_SYNC_DEBUG
- ut_ad(!ibuf_inside() || is_log || (type == OS_FILE_WRITE)
- || ibuf_page(space_id, block_offset));
- #endif
- #endif
- if (sync) {
- mode = OS_AIO_SYNC;
- } else if ((type == OS_FILE_READ) && !is_log
- && ibuf_page(space_id, block_offset)) {
- mode = OS_AIO_IBUF;
- } else if (is_log) {
- mode = OS_AIO_LOG;
- } else {
- mode = OS_AIO_NORMAL;
- }
- system = fil_system;
- loop:
- mutex_enter(&(system->mutex));
-
- if (system->n_open_pending == system->max_n_open) {
- /* It is not sure we can open the file if it is closed: wait */
- event = system->can_open;
- os_event_reset(event);
- mutex_exit(&(system->mutex));
- os_event_wait(event);
- goto loop;
- }
-
- HASH_SEARCH(hash, system->spaces, space_id, space,
- space->id == space_id);
- ut_a(space);
- ut_ad((mode != OS_AIO_IBUF) || (space->purpose == FIL_TABLESPACE));
- node = UT_LIST_GET_FIRST(space->chain);
- for (;;) {
- ut_a(node);
- if (node->size > block_offset) {
- /* Found! */
- break;
- } else {
- block_offset -= node->size;
- node = UT_LIST_GET_NEXT(chain, node);
- }
- }
-
- /* Open file if closed */
- fil_node_prepare_for_io(node, system, space);
- /* Now we have made the changes in the data structures of system */
- mutex_exit(&(system->mutex));
- /* Calculate the low 32 bits and the high 32 bits of the file offset */
- offset_high = (block_offset >> (32 - UNIV_PAGE_SIZE_SHIFT));
- offset_low = ((block_offset << UNIV_PAGE_SIZE_SHIFT) & 0xFFFFFFFF)
- + byte_offset;
- ut_a(node->size - block_offset >=
- (byte_offset + len + (UNIV_PAGE_SIZE - 1)) / UNIV_PAGE_SIZE);
- /* Do aio */
- ut_a(byte_offset % OS_FILE_LOG_BLOCK_SIZE == 0);
- ut_a((len % OS_FILE_LOG_BLOCK_SIZE) == 0);
- /* Queue the aio request */
- ret = os_aio(type, mode | wake_later, node->name, node->handle, buf,
- offset_low, offset_high, len, node, message);
- ut_a(ret);
- if (mode == OS_AIO_SYNC) {
- /* The i/o operation is already completed when we return from
- os_aio: */
-
- mutex_enter(&(system->mutex));
- fil_node_complete_io(node, system);
- mutex_exit(&(system->mutex));
- ut_ad(fil_validate());
- }
- }
- /************************************************************************
- Reads data from a space to a buffer. Remember that the possible incomplete
- blocks at the end of file are ignored: they are not taken into account when
- calculating the byte offset within a space. */
- void
- fil_read(
- /*=====*/
- ibool sync, /* in: TRUE if synchronous aio is desired */
- ulint space_id, /* in: space id */
- ulint block_offset, /* in: offset in number of blocks */
- ulint byte_offset, /* in: remainder of offset in bytes; in aio
- this must be divisible by the OS block size */
- ulint len, /* in: how many bytes to read; this must not
- cross a file boundary; in aio this must be a
- block size multiple */
- void* buf, /* in/out: buffer where to store data read;
- in aio this must be appropriately aligned */
- void* message) /* in: message for aio handler if non-sync
- aio used, else ignored */
- {
- fil_io(OS_FILE_READ, sync, space_id, block_offset, byte_offset, len,
- buf, message);
- }
- /************************************************************************
- Writes data to a space from a buffer. Remember that the possible incomplete
- blocks at the end of file are ignored: they are not taken into account when
- calculating the byte offset within a space. */
- void
- fil_write(
- /*======*/
- ibool sync, /* in: TRUE if synchronous aio is desired */
- ulint space_id, /* in: space id */
- ulint block_offset, /* in: offset in number of blocks */
- ulint byte_offset, /* in: remainder of offset in bytes; in aio
- this must be divisible by the OS block size */
- ulint len, /* in: how many bytes to write; this must
- not cross a file boundary; in aio this must
- be a block size multiple */
- void* buf, /* in: buffer from which to write; in aio
- this must be appropriately aligned */
- void* message) /* in: message for aio handler if non-sync
- aio used, else ignored */
- {
- fil_io(OS_FILE_WRITE, sync, space_id, block_offset, byte_offset, len,
- buf, message);
- }
- /**************************************************************************
- Waits for an aio operation to complete. This function is used to write the
- handler for completed requests. The aio array of pending requests is divided
- into segments (see os0file.c for more info). The thread specifies which
- segment it wants to wait for. */
- void
- fil_aio_wait(
- /*=========*/
- ulint segment) /* in: the number of the segment in the aio
- array to wait for */
- {
- ibool ret;
- fil_node_t* fil_node;
- fil_system_t* system = fil_system;
- void* message;
-
- ut_ad(fil_validate());
- if (os_aio_use_native_aio) {
- #ifdef WIN_ASYNC_IO
- ret = os_aio_windows_handle(segment, 0, &fil_node, &message);
- #elif defined(POSIX_ASYNC_IO)
- ret = os_aio_posix_handle(segment, &fil_node, &message);
- #else
- ut_a(0);
- #endif
- } else {
- ret = os_aio_simulated_handle(segment, (void**) &fil_node,
- &message);
- }
-
- ut_a(ret);
-
- mutex_enter(&(system->mutex));
- fil_node_complete_io(fil_node, fil_system);
- mutex_exit(&(system->mutex));
- ut_ad(fil_validate());
- /* Do the i/o handling */
- if (buf_pool_is_block(message)) {
-
- buf_page_io_complete(message);
- } else {
- log_io_complete(message);
- }
- }
- /**************************************************************************
- Flushes to disk possible writes cached by the OS. */
- void
- fil_flush(
- /*======*/
- ulint space_id) /* in: file space id (this can be a group of
- log files or a tablespace of the database) */
- {
- fil_system_t* system = fil_system;
- fil_space_t* space;
- fil_node_t* node;
- os_file_t file;
- mutex_enter(&(system->mutex));
-
- HASH_SEARCH(hash, system->spaces, space_id, space,
- space->id == space_id);
- ut_a(space);
- node = UT_LIST_GET_FIRST(space->chain);
- while (node) {
- if (node->open) {
- file = node->handle;
-
- mutex_exit(&(system->mutex));
- /* Note that it is not certain, when we have
- released the mutex above, that the file of the
- handle is still open: we assume that the OS
- will not crash or trap even if we pass a handle
- to a closed file below in os_file_flush! */
-
- os_file_flush(file);
- mutex_enter(&(system->mutex));
- }
- node = UT_LIST_GET_NEXT(chain, node);
- }
- mutex_exit(&(system->mutex));
- }
- /**************************************************************************
- Flushes to disk writes in file spaces of the given type possibly cached by
- the OS. */
- void
- fil_flush_file_spaces(
- /*==================*/
- ulint purpose) /* in: FIL_TABLESPACE, FIL_LOG */
- {
- fil_system_t* system = fil_system;
- fil_space_t* space;
- mutex_enter(&(system->mutex));
- space = UT_LIST_GET_FIRST(system->space_list);
- while (space) {
- if (space->purpose == purpose) {
- mutex_exit(&(system->mutex));
- fil_flush(space->id);
- mutex_enter(&(system->mutex));
- }
- space = UT_LIST_GET_NEXT(space_list, space);
- }
-
- mutex_exit(&(system->mutex));
- }
- /**********************************************************************
- Checks the consistency of the file system. */
- ibool
- fil_validate(void)
- /*==============*/
- /* out: TRUE if ok */
- {
- fil_space_t* space;
- fil_node_t* fil_node;
- ulint pending_count = 0;
- fil_system_t* system;
- ulint i;
- system = fil_system;
-
- mutex_enter(&(system->mutex));
- /* Look for spaces in the hash table */
- for (i = 0; i < hash_get_n_cells(system->spaces); i++) {
- space = HASH_GET_FIRST(system->spaces, i);
-
- while (space != NULL) {
- UT_LIST_VALIDATE(chain, fil_node_t, space->chain);
- fil_node = UT_LIST_GET_FIRST(space->chain);
- while (fil_node != NULL) {
- if (fil_node->n_pending > 0) {
- pending_count++;
- ut_a(fil_node->open);
- }
- fil_node = UT_LIST_GET_NEXT(chain, fil_node);
- }
- space = HASH_GET_NEXT(hash, space);
- }
- }
- ut_a(pending_count == system->n_open_pending);
- UT_LIST_VALIDATE(LRU, fil_node_t, system->LRU);
- fil_node = UT_LIST_GET_FIRST(system->LRU);
- while (fil_node != NULL) {
- ut_a(fil_node->n_pending == 0);
- ut_a(fil_node->open);
- fil_node = UT_LIST_GET_NEXT(LRU, fil_node);
- }
-
- mutex_exit(&(system->mutex));
- return(TRUE);
- }
- /************************************************************************
- Returns TRUE if file address is undefined. */
- ibool
- fil_addr_is_null(
- /*=============*/
- /* out: TRUE if undefined */
- fil_addr_t addr) /* in: address */
- {
- if (addr.page == FIL_NULL) {
- return(TRUE);
- }
- return(FALSE);
- }
- /************************************************************************
- Accessor functions for a file page */
- ulint
- fil_page_get_prev(byte* page)
- {
- return(mach_read_from_4(page + FIL_PAGE_PREV));
- }
- ulint
- fil_page_get_next(byte* page)
- {
- return(mach_read_from_4(page + FIL_PAGE_NEXT));
- }
- /*************************************************************************
- Sets the file page type. */
- void
- fil_page_set_type(
- /*==============*/
- byte* page, /* in: file page */
- ulint type) /* in: type */
- {
- ut_ad(page);
- ut_ad((type == FIL_PAGE_INDEX) || (type == FIL_PAGE_INDEX));
- mach_write_to_2(page + FIL_PAGE_TYPE, type);
- }
- /*************************************************************************
- Gets the file page type. */
- ulint
- fil_page_get_type(
- /*==============*/
- /* out: type; NOTE that if the type has not been
- written to page, the return value not defined */
- byte* page) /* in: file page */
- {
- ut_ad(page);
- return(mach_read_from_2(page + FIL_PAGE_TYPE));
- }