blast_inline.h
上传用户:yhdzpy8989
上传日期:2007-06-13
资源大小:13604k
文件大小:12k
- /*
- * ===========================================================================
- * PRODUCTION $Log: blast_inline.h,v $
- * PRODUCTION Revision 1000.0 2004/06/01 18:13:43 gouriano
- * PRODUCTION PRODUCTION: IMPORTED [GCC34_MSVC7] Dev-tree R1.2
- * PRODUCTION
- * ===========================================================================
- */
- /* $Id: blast_inline.h,v 1000.0 2004/06/01 18:13:43 gouriano Exp $
- * ===========================================================================
- *
- * PUBLIC DOMAIN NOTICE
- * National Center for Biotechnology Information
- *
- * This software/database is a "United States Government Work" under the
- * terms of the United States Copyright Act. It was written as part of
- * the author's offical duties as a United States Government employee and
- * thus cannot be copyrighted. This software/database is freely available
- * to the public for use. The National Library of Medicine and the U.S.
- * Government have not placed any restriction on its use or reproduction.
- *
- * Although all reasonable efforts have been taken to ensure the accuracy
- * and reliability of the software and data, the NLM and the U.S.
- * Government do not and cannot warrant the performance or results that
- * may be obtained by using this software or data. The NLM and the U.S.
- * Government disclaim all warranties, express or implied, including
- * warranties of performance, merchantability or fitness for any particular
- * purpose.
- *
- * Please cite the author in any work or product based on this material.
- *
- * ===========================================================================
- *
- */
- /** @file blast_inline.h
- * @todo FIXME needs file description
- */
- #include <algo/blast/core/mb_lookup.h>
- #include <algo/blast/core/blast_util.h>
- /** Given a word packed into an integer, compute a discontiguous word lookup
- * index.
- * @param subject Pointer to the next byte of the sequence after the end of
- * the word (needed when word template is longer than 16 bases) [in]
- * @param word A piece of the sequence packed into an integer [in]
- * @param template_type What type of discontiguous word template to use [in]
- * @return The lookup table index of the discontiguous word [out]
- */
- static NCBI_INLINE Int4 ComputeDiscontiguousIndex(Uint1* subject, Int4 word,
- Uint1 template_type)
- {
- Int4 index;
- Int4 extra_code;
- switch (template_type) {
- case TEMPL_11_16:
- index = GET_WORD_INDEX_11_16(word);
- break;
- case TEMPL_12_16:
- index = GET_WORD_INDEX_12_16(word);
- break;
- case TEMPL_11_16_OPT:
- index = GET_WORD_INDEX_11_16_OPT(word);
- break;
- case TEMPL_12_16_OPT:
- index = GET_WORD_INDEX_12_16_OPT(word);
- break;
- case TEMPL_11_18:
- extra_code = (Int4) GET_EXTRA_CODE_PACKED_4_18(subject);
- index = (GET_WORD_INDEX_11_18(word) | extra_code);
- break;
- case TEMPL_12_18:
- extra_code = (Int4) GET_EXTRA_CODE_PACKED_4_18(subject);
- index = (GET_WORD_INDEX_12_18(word) | extra_code);
- break;
- case TEMPL_11_18_OPT:
- extra_code = (Int4) GET_EXTRA_CODE_PACKED_4_18_OPT(subject);
- index = (GET_WORD_INDEX_11_18_OPT(word) | extra_code);
- break;
- case TEMPL_12_18_OPT:
- extra_code = (Int4) GET_EXTRA_CODE_PACKED_4_18_OPT(subject);
- index = (GET_WORD_INDEX_12_18_OPT(word) | extra_code);
- break;
- case TEMPL_11_21:
- extra_code = (Int4) GET_EXTRA_CODE_PACKED_4_21(subject);
- index = (GET_WORD_INDEX_11_21(word) | extra_code);
- break;
- case TEMPL_12_21:
- extra_code = (Int4) GET_EXTRA_CODE_PACKED_4_21(subject);
- index = (GET_WORD_INDEX_12_21(word) | extra_code);
- break;
- case TEMPL_11_21_OPT:
- extra_code = (Int4) GET_EXTRA_CODE_PACKED_4_21_OPT(subject);
- index = (GET_WORD_INDEX_11_21_OPT(word) | extra_code);
- break;
- case TEMPL_12_21_OPT:
- extra_code = (Int4) GET_EXTRA_CODE_PACKED_4_21_OPT(subject);
- index = (GET_WORD_INDEX_12_21_OPT(word) | extra_code);
- break;
- default:
- extra_code = 0;
- index = 0;
- break;
- }
- #ifdef USE_HASH_TABLE
- hash_buf = (Uint1*)&index;
- CRC32(crc, hash_buf);
- index = (crc>>hash_shift) & hash_mask;
- #endif
- return index;
- }
- /** Compute the lookup table index for the first word template, given a word
- * position, template type and previous value of the word, in case of
- * one-base (2 bit) database scanning.
- * @param word_start Pointer to the start of a word in the sequence [in]
- * @param word The word packed into an integer value [in]
- * @param sequence_bit By how many bits the real word start is shifted within
- * a compressed sequence byte [in]
- * @param template_type What discontiguous word template to use for index
- * computation [in]
- * @return The lookup index for the discontiguous word.
- */
- static NCBI_INLINE Int4 ComputeDiscontiguousIndex_1b(const Uint1* word_start,
- Int4 word, Uint1 sequence_bit, Uint1 template_type)
- {
- Int4 index;
- Uint1* subject = (Uint1 *) word_start;
- Uint1 bit;
- Int4 extra_code, tmpval;
- /* Prepare auxiliary variables for extra code calculation */
- tmpval = 0;
- extra_code = 0;
- /* The bits in an integer byte are counted in a reverse order than in a
- sequence byte */
- bit = 6 - sequence_bit;
- switch (template_type) {
- case TEMPL_11_16:
- index = GET_WORD_INDEX_11_16(word);
- break;
- case TEMPL_12_16:
- index = GET_WORD_INDEX_12_16(word);
- break;
- case TEMPL_11_16_OPT:
- index = GET_WORD_INDEX_11_16_OPT(word);
- break;
- case TEMPL_12_16_OPT:
- index = GET_WORD_INDEX_12_16_OPT(word);
- break;
- case TEMPL_11_18:
- GET_EXTRA_CODE_PACKED_18(subject, bit, tmpval, extra_code);
- index = (GET_WORD_INDEX_11_18(word) | extra_code);
- break;
- case TEMPL_12_18:
- GET_EXTRA_CODE_PACKED_18(subject, bit, tmpval, extra_code);
- index = (GET_WORD_INDEX_12_18(word) | extra_code);
- break;
- case TEMPL_11_18_OPT:
- GET_EXTRA_CODE_PACKED_18_OPT(subject, bit, tmpval, extra_code);
- index = (GET_WORD_INDEX_11_18_OPT(word) | extra_code);
- break;
- case TEMPL_12_18_OPT:
- GET_EXTRA_CODE_PACKED_18_OPT(subject, bit, tmpval, extra_code);
- index = (GET_WORD_INDEX_12_18_OPT(word) | extra_code);
- break;
- case TEMPL_11_21:
- GET_EXTRA_CODE_PACKED_21(subject, bit, tmpval, extra_code);
- index = (GET_WORD_INDEX_11_21(word) | extra_code);
- break;
- case TEMPL_12_21:
- GET_EXTRA_CODE_PACKED_21(subject, bit, tmpval, extra_code);
- index = (GET_WORD_INDEX_12_21(word) | extra_code);
- break;
- case TEMPL_11_21_OPT:
- GET_EXTRA_CODE_PACKED_21_OPT(subject, bit, tmpval, extra_code);
- index = (GET_WORD_INDEX_11_21_OPT(word) | extra_code);
- break;
- case TEMPL_12_21_OPT:
- GET_EXTRA_CODE_PACKED_21_OPT(subject, bit, tmpval, extra_code);
- index = (GET_WORD_INDEX_12_21_OPT(word) | extra_code);
- break;
- default:
- extra_code = 0;
- index = 0;
- break;
- }
- #ifdef USE_HASH_TABLE
- hash_buf = (Uint1*)&index;
- CRC32(crc, hash_buf);
- index = (crc>>hash_shift) & hash_mask;
- #endif
-
- return index;
- }
- static NCBI_INLINE void _ComputeIndex(Int4 wordsize,
- Int4 charsize,
- Int4 mask,
- const Uint1* word,
- Int4* index);
- static NCBI_INLINE void _ComputeIndexIncremental(Int4 wordsize,
- Int4 charsize,
- Int4 mask,
- const Uint1* word,
- Int4* index);
- /** Given a word, compute its index value from scratch.
- *
- * @param wordsize length of the word, in residues [in]
- * @param charsize length of one residue, in bits [in]
- * @param mask value used to mask the index so that only the bottom wordsize * charsize bits remain [in]
- * @param word pointer to the beginning of the word [in]
- * @param index the computed index value [out]
- */
- static NCBI_INLINE void _ComputeIndex(Int4 wordsize,
- Int4 charsize,
- Int4 mask,
- const Uint1* word,
- Int4* index)
- {
- Int4 i;
- *index = 0;
- for(i=0;i<wordsize;i++)
- {
- *index = ((*index << charsize) | word[i]) & mask;
- }
- return;
- }
- /** Given a word, compute its index value, reusing a previously
- * computed index value.
- *
- * @param wordsize length of the word - 1, in residues [in]
- * @param charsize length of one residue, in bits [in]
- * @param mask value used to mask the index so that only the bottom wordsize * charsize bits remain [in]
- * @param word pointer to the beginning of the word [in]
- * @param index the computed index value [in/out]
- */
- static NCBI_INLINE void _ComputeIndexIncremental(Int4 wordsize,
- Int4 charsize,
- Int4 mask,
- const Uint1* word,
- Int4* index)
- {
- *index = ((*index << charsize) | word[wordsize - 1]) & mask;
- return;
- }
- /* Given a starting position of a word in a compressed nucleotide sequence,
- * compute this word's lookup table index
- */
- static NCBI_INLINE Uint1* BlastNaLookupInitIndex(Int4 length,
- const Uint1* word, Int4* index)
- {
- Int4 i;
-
- *index = 0;
- for (i = 0; i < length; ++i)
- *index = ((*index)<<FULL_BYTE_SHIFT) | word[i];
- return (Uint1 *) (word + length);
- }
- /* Recompute the word index given its previous value and the new location
- * of the last byte of the word
- */
- static NCBI_INLINE Int4 BlastNaLookupComputeIndex(Int4 scan_shift, Int4 mask,
- const Uint1* word, Int4 index)
- {
- return (((index)<<scan_shift) & mask) | *(word);
-
- }
- /* Given a word computed from full bytes of a compressed sequence,
- * shift it by 0-3 bases
- */
- static NCBI_INLINE Int4 BlastNaLookupAdjustIndex(Uint1* s, Int4 index,
- Int4 mask, Uint1 bit)
- {
- return (((index)<<bit) & mask) | ((*s)>>(FULL_BYTE_SHIFT-bit));
- }
- #define BLAST2NA_MASK 0xfc
- /** Compute the lookup index for a word in an uncompressed sequence, without
- * using any previous index information.
- * @param lookup Pointer to the traditional BLASTn lookup table structure [in]
- * @param word Pointer to the start of the word [in]
- * @param index The lookup index [out]
- */
- static NCBI_INLINE Int2
- Na_LookupComputeIndex(LookupTable* lookup, Uint1* word, Int4* index)
- {
- Int4 i;
- Int4 wordsize = lookup->reduced_wordsize*COMPRESSION_RATIO; /* i.e. 8 or 4 */
- *index = 0;
- for (i = 0; i < wordsize; ++i) {
- if ((word[i] & BLAST2NA_MASK) != 0) {
- *index = 0;
- return -1;
- } else {
- *index = (((*index)<<lookup->charsize) & lookup->mask) | word[i];
- }
- }
- return 0;
- }
- /** Pack 4 sequence bytes into a one byte integer, assuming sequence contains
- * no ambiguities.
- */
- #define PACK_WORD(q) ((q[0]<<6) + (q[1]<<4) + (q[2]<<2) + q[3])
- /** Compare a given number of bytes of an compressed subject sequence with
- * the non-compressed query sequence.
- * @param q Pointer to the first byte to be compared in the query sequence [in]
- * @param s Pointer to the first byte to be compared in the subject
- * sequence [in]
- * @param extra_bytes Number of compressed bytes to compare [in]
- * @return TRUE if sequences are identical, FALSE if mismatch is found.
- */
- static NCBI_INLINE Boolean BlastNaCompareExtraBytes(Uint1* q, Uint1* s,
- Int4 extra_bytes)
- {
- Int4 index;
-
- for (index = 0; index < extra_bytes; ++index) {
- if (*s++ != PACK_WORD(q))
- return FALSE;
- q += COMPRESSION_RATIO;
- }
- return TRUE;
- }
- /** Perform mini extension (up to max_left <= 4 bases) to the left;
- * @param q Pointer to the query base right after the ones to be extended [in]
- * @param s Pointer to a byte in the compressed subject sequence that is to be
- * tested for extension [in]
- * @param max_left Maximal number of bits to compare [in]
- * @return Number of matched bases
- */
- static NCBI_INLINE Uint1
- BlastNaMiniExtendLeft(Uint1* q, const Uint1* s, Uint1 max_left)
- {
- Uint1 left = 0;
- for (left = 0; left < max_left; ++left) {
- if (NCBI2NA_UNPACK_BASE(*s, left) != *--q) {
- break;
- }
- }
- return left;
- }
- /** Perform mini extension (up to max_right <= 4 bases) to the right;
- * @param q Pointer to the start of the extension in the query [in]
- * @param s Pointer to a byte in the compressed subject sequence that is to be
- * tested for extension [in]
- * @param max_right Maximal number of bits to compare [in]
- * @return Number of matched bases
- */
- static NCBI_INLINE Uint1
- BlastNaMiniExtendRight(Uint1* q, const Uint1* s, Uint1 max_right)
- {
- Uint1 right;
- Uint1 index = 3;
-
- for (right = 0; right < max_right; ++right, --index) {
- if (NCBI2NA_UNPACK_BASE(*s, index) != *q++) {
- break;
- }
- }
- return right;
- }