blast_gapalign.c
上传用户:yhdzpy8989
上传日期:2007-06-13
资源大小:13604k
文件大小:134k
- /*
- * ===========================================================================
- * PRODUCTION $Log: blast_gapalign.c,v $
- * PRODUCTION Revision 1000.5 2004/06/01 18:07:19 gouriano
- * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.101
- * PRODUCTION
- * ===========================================================================
- */
- /* $Id: blast_gapalign.c,v 1000.5 2004/06/01 18:07:19 gouriano Exp $
- * ===========================================================================
- *
- * PUBLIC DOMAIN NOTICE
- * National Center for Biotechnology Information
- *
- * This software/database is a "United States Government Work" under the
- * terms of the United States Copyright Act. It was written as part of
- * the author's offical duties as a United States Government employee and
- * thus cannot be copyrighted. This software/database is freely available
- * to the public for use. The National Library of Medicine and the U.S.
- * Government have not placed any restriction on its use or reproduction.
- *
- * Although all reasonable efforts have been taken to ensure the accuracy
- * and reliability of the software and data, the NLM and the U.S.
- * Government do not and cannot warrant the performance or results that
- * may be obtained by using this software or data. The NLM and the U.S.
- * Government disclaim all warranties, express or implied, including
- * warranties of performance, merchantability or fitness for any particular
- * purpose.
- *
- * Please cite the author in any work or product based on this material.
- *
- * ===========================================================================
- *
- * Author: Ilya Dondoshansky
- *
- */
- /** @file blast_gapalign.c
- * Functions to perform gapped alignment
- */
- static char const rcsid[] =
- "$Id: blast_gapalign.c,v 1000.5 2004/06/01 18:07:19 gouriano Exp $";
- #include <algo/blast/core/blast_options.h>
- #include <algo/blast/core/blast_def.h>
- #include <algo/blast/core/blast_gapalign.h>
- #include <algo/blast/core/blast_util.h> /* for NCBI2NA_UNPACK_BASE macros */
- #include <algo/blast/core/blast_setup.h>
- #include <algo/blast/core/greedy_align.h>
- static Int2 BLAST_DynProgNtGappedAlignment(BLAST_SequenceBlk* query_blk,
- BLAST_SequenceBlk* subject_blk, BlastGapAlignStruct* gap_align,
- const BlastScoringParameters* score_params, BlastInitHSP* init_hsp);
- static Int4 BLAST_AlignPackedNucl(Uint1* B, Uint1* A, Int4 N, Int4 M,
- Int4* pej, Int4* pei, BlastGapAlignStruct* gap_align,
- const BlastScoringParameters* score_params, Boolean reverse_sequence);
- static Int2 BLAST_ProtGappedAlignment(Uint1 program,
- BLAST_SequenceBlk* query_in, BLAST_SequenceBlk* subject_in,
- BlastGapAlignStruct* gap_align,
- const BlastScoringParameters* score_params, BlastInitHSP* init_hsp);
- /** Auxiliary structure for dynamic programming gapped extension */
- typedef struct BlastGapDP {
- Int4 best;
- Int4 best_gap;
- Int4 best_decline;
- } BlastGapDP;
- typedef struct GapData {
- BlastGapDP* CD;
- Int4** v;
- Int4* sapp; /* Current script append ptr */
- Int4 last;
- Int4 h, g;
- } GapData;
- /** Append "Delete k" op */
- #define DEL_(k)
- data.last = (data.last < 0) ? (data.sapp[-1] -= (k)) : (*data.sapp++ = -(k));
- /** Append "Insert k" op */
- #define INS_(k)
- data.last = (data.last > 0) ? (data.sapp[-1] += (k)) : (*data.sapp++ = (k));
- /** Append "Replace" op */
- #define REP_
- {data.last = *data.sapp++ = 0;}
- /* Divide by two to prevent underflows. */
- #define MININT INT4_MIN/2
- #define REPP_
- {*data.sapp++ = MININT; data.last = 0;}
- /** Are the two HSPs within a given number of diagonals from each other? */
- #define MB_HSP_CLOSE(q1, q2, s1, s2, c)
- (ABS((q1-s1) - (q2-s2)) < c)
- /** Is one HSP contained in a diagonal strip around another? */
- #define MB_HSP_CONTAINED(qo1,qo2,qe2,so1,so2,se2,c)
- (qo1>=qo2 && qo1<=qe2 && so1>=so2 && so1<=se2 &&
- MB_HSP_CLOSE(qo1,qo2,so1,so2,c))
- /** TRUE if c is between a and b; f between d and f. Determines if the
- * coordinates are already in an HSP that has been evaluated.
- */
- #define CONTAINED_IN_HSP(a,b,c,d,e,f) (((a <= c && b >= c) && (d <= f && e >= f)) ? TRUE : FALSE)
- /** Callback for sorting HSPs by starting offset in query */
- static int
- query_offset_compare_hsps(const void* v1, const void* v2)
- {
- BlastHSP* h1,* h2;
- BlastHSP** hp1,** hp2;
- hp1 = (BlastHSP**) v1;
- hp2 = (BlastHSP**) v2;
- h1 = *hp1;
- h2 = *hp2;
- if (h1 == NULL || h2 == NULL)
- return 0;
- /* If these are from different contexts, don't compare offsets */
- if (h1->context < h2->context)
- return -1;
- if (h1->context > h2->context)
- return 1;
- if (h1->query.offset < h2->query.offset)
- return -1;
- if (h1->query.offset > h2->query.offset)
- return 1;
- return 0;
- }
- /** Callback for sorting HSPs by ending offset in query */
- static int
- query_end_compare_hsps(const void* v1, const void* v2)
- {
- BlastHSP* h1,* h2;
- BlastHSP** hp1,** hp2;
- hp1 = (BlastHSP**) v1;
- hp2 = (BlastHSP**) v2;
- h1 = *hp1;
- h2 = *hp2;
- if (h1 == NULL || h2 == NULL)
- return 0;
- /* If these are from different contexts, don't compare offsets */
- if (h1->context < h2->context)
- return -1;
- if (h1->context > h2->context)
- return 1;
- if (h1->query.end < h2->query.end)
- return -1;
- if (h1->query.end > h2->query.end)
- return 1;
- return 0;
- }
- /** Callback for sorting HSPs by score */
- static int
- score_compare_hsp(const void* v1, const void* v2)
- {
- BlastHSP* h1,* h2;
- BlastHSP** hp1,** hp2;
- hp1 = (BlastHSP**) v1;
- hp2 = (BlastHSP**) v2;
- h1 = *hp1;
- h2 = *hp2;
-
- /* NULL HSPs are moved to the end */
- if (h1 == NULL)
- return 1;
- if (h2 == NULL)
- return -1;
- if (h1->score < h2->score)
- return 1;
- if (h1->score > h2->score)
- return -1;
- return 0;
- }
- /** Check the gapped alignments for an overlap of two different alignments.
- * A sufficient overlap is when two alignments have the same start values
- * of have the same final values.
- * @param hsp_array Pointer to an array of BlastHSP structures [in]
- * @param hsp_count The size of the hsp_array [in]
- * @param frame What frame these HSPs are from? [in]
- * @return The number of valid alignments remaining.
- */
- static Int4
- CheckGappedAlignmentsForOverlap(BlastHSP* *hsp_array, Int4 hsp_count, Int2 frame)
- {
- Int4 index1, index, increment;
- if (*hsp_array == NULL || hsp_count == 0)
- return 0;
-
- qsort(hsp_array, hsp_count, sizeof(BlastHSP*), query_offset_compare_hsps);
- index=0;
- increment=1;
- while (index < hsp_count-increment) {
- /* Check if both HSP's start on or end on the same digonal. */
- if (hsp_array[index+increment] == NULL) {
- increment++;
- continue;
- }
-
- if (frame != 0 && hsp_array[index+increment]->subject.frame != frame)
- break;
-
- if (hsp_array[index] && hsp_array[index]->query.offset == hsp_array[index+increment]->query.offset &&
- hsp_array[index]->subject.offset == hsp_array[index+increment]->subject.offset &&
- SIGN(hsp_array[index]->query.frame) == SIGN(hsp_array[index+increment]->query.frame))
- {
- if (hsp_array[index]->score > hsp_array[index+increment]->score) {
- sfree(hsp_array[index+increment]);
- increment++;
- } else {
- sfree(hsp_array[index]);
- index++;
- increment = 1;
- }
- } else {
- index++;
- increment = 1;
- }
- }
-
- qsort(hsp_array, hsp_count, sizeof(BlastHSP*), query_end_compare_hsps);
- index=0;
- increment=1;
- while (index < hsp_count-increment)
- { /* Check if both HSP's start on or end on the same digonal. */
- if (hsp_array[index+increment] == NULL)
- {
- increment++;
- continue;
- }
-
- if (frame != 0 && hsp_array[index+increment]->subject.frame != frame)
- break;
-
- if (hsp_array[index] &&
- hsp_array[index]->query.end == hsp_array[index+increment]->query.end &&
- hsp_array[index]->subject.end == hsp_array[index+increment]->subject.end &&
- SIGN(hsp_array[index]->query.frame) == SIGN(hsp_array[index+increment]->query.frame))
- {
- if (hsp_array[index]->score > hsp_array[index+increment]->score)
- {
- sfree(hsp_array[index+increment]);
- increment++;
- } else {
- sfree(hsp_array[index]);
- index++;
- increment = 1;
- }
- } else {
- index++;
- increment = 1;
- }
- }
- qsort(hsp_array,hsp_count,sizeof(BlastHSP*), score_compare_hsp);
-
- index1 = 0;
- for (index=0; index<hsp_count; index++)
- {
- if (hsp_array[index] != NULL)
- index1++;
- }
- return index1;
- }
- /** Retrieve the state structure corresponding to a given length
- * @param head Pointer to the first element of the state structures
- * array [in]
- * @param length The length for which the state structure has to be
- * found [in]
- * @return The found or created state structure
- */
- #define CHUNKSIZE 2097152
- static GapStateArrayStruct*
- GapGetState(GapStateArrayStruct** head, Int4 length)
- {
- GapStateArrayStruct* retval,* var,* last;
- Int4 chunksize = MAX(CHUNKSIZE, length + length/3);
- length += length/3; /* Add on about 30% so the end will get reused. */
- retval = NULL;
- if (*head == NULL) {
- retval = (GapStateArrayStruct*)
- malloc(sizeof(GapStateArrayStruct));
- retval->state_array = (Uint1*) malloc(chunksize*sizeof(Uint1));
- retval->length = chunksize;
- retval->used = 0;
- retval->next = NULL;
- *head = retval;
- } else {
- var = *head;
- last = *head;
- while (var) {
- if (length < (var->length - var->used)) {
- retval = var;
- break;
- } else if (var->used == 0) {
- /* If it's empty and too small, replace. */
- sfree(var->state_array);
- var->state_array = (Uint1*) malloc(chunksize*sizeof(Uint1));
- var->length = chunksize;
- retval = var;
- break;
- }
- last = var;
- var = var->next;
- }
-
- if (var == NULL)
- {
- retval = (GapStateArrayStruct*) malloc(sizeof(GapStateArrayStruct));
- retval->state_array = (Uint1*) malloc(chunksize*sizeof(Uint1));
- retval->length = chunksize;
- retval->used = 0;
- retval->next = NULL;
- last->next = retval;
- }
- }
- #ifdef ERR_POST_EX_DEFINED
- if (retval->state_array == NULL)
- ErrPostEx(SEV_ERROR, 0, 0, "state array is NULL");
- #endif
-
- return retval;
- }
- /** Remove a state from a state structure */
- static Boolean
- GapPurgeState(GapStateArrayStruct* state_struct)
- {
- while (state_struct)
- {
- /*
- memset(state_struct->state_array, 0, state_struct->used);
- */
- state_struct->used = 0;
- state_struct = state_struct->next;
- }
-
- return TRUE;
- }
- /** Greatest common divisor */
- static Int4 gcd(Int4 a, Int4 b)
- {
- Int4 c;
- if (a < b) {
- c = a;
- a = b; b = c;
- }
- while ((c = a % b) != 0) {
- a = b; b = c;
- }
- return b;
- }
- /** Divide 3 numbers by their greatest common divisor
- * @return The GCD
- */
- static Int4 gdb3(Int4* a, Int4* b, Int4* c)
- {
- Int4 g;
- if (*b == 0) g = gcd(*a, *c);
- else g = gcd(*a, gcd(*b, *c));
- if (g > 1) {
- *a /= g;
- *b /= g;
- *c /= g;
- }
- return g;
- }
- /** Deallocate the memory for greedy gapped alignment */
- static SGreedyAlignMem* BLAST_GreedyAlignsfree(SGreedyAlignMem* gamp)
- {
- if (gamp->flast_d) {
- sfree(gamp->flast_d[0]);
- sfree(gamp->flast_d);
- } else {
- if (gamp->flast_d_affine) {
- sfree(gamp->flast_d_affine[0]);
- sfree(gamp->flast_d_affine);
- }
- sfree(gamp->uplow_free);
- }
- sfree(gamp->max_row_free);
- if (gamp->space)
- MBSpaceFree(gamp->space);
- sfree(gamp);
- return NULL;
- }
- /** Allocate memory for the greedy gapped alignment algorithm
- * @param score_params Parameters related to scoring [in]
- * @param ext_params Options and parameters related to the extension [in]
- * @param max_dbseq_length The length of the longest sequence in the
- * database [in]
- * @return The allocated SGreedyAlignMem structure
- */
- static SGreedyAlignMem*
- BLAST_GreedyAlignMemAlloc(const BlastScoringParameters* score_params,
- const BlastExtensionParameters* ext_params,
- Int4 max_dbseq_length)
- {
- #define ERROR_FRACTION 2 /* N.B.: This value should match the value of
- ERROR_FRACTION in the anonymous enum in
- greedy_align.c */
- #define ICEIL(x,y) ((((x)-1)/(y))+1) /* FIXME: duplicated from greedy_align.c */
- SGreedyAlignMem* gamp;
- Int4 max_d, max_d_1, Xdrop, d_diff, max_cost, gd, i;
- Int4 reward, penalty, gap_open, gap_extend;
- Int4 Mis_cost, GE_cost;
- Boolean do_traceback;
-
- if (score_params == NULL || ext_params == NULL)
- return NULL;
-
- do_traceback =
- (ext_params->options->ePrelimGapExt != eGreedyExt);
- if (score_params->reward % 2 == 1) {
- reward = 2*score_params->reward;
- penalty = -2*score_params->penalty;
- Xdrop = 2*ext_params->gap_x_dropoff;
- gap_open = 2*score_params->gap_open;
- gap_extend = 2*score_params->gap_extend;
- } else {
- reward = score_params->reward;
- penalty = -score_params->penalty;
- Xdrop = ext_params->gap_x_dropoff;
- gap_open = score_params->gap_open;
- gap_extend = score_params->gap_extend;
- }
- if (gap_open == 0 && gap_extend == 0)
- gap_extend = reward / 2 + penalty;
- max_d = (Int4) (max_dbseq_length / ERROR_FRACTION + 1);
- gamp = (SGreedyAlignMem*) calloc(1, sizeof(SGreedyAlignMem));
- if (score_params->gap_open==0 && score_params->gap_extend==0) {
- d_diff = ICEIL(Xdrop+reward/2, penalty+reward);
-
- gamp->flast_d = (Int4**) malloc((max_d + 2) * sizeof(Int4*));
- if (gamp->flast_d == NULL) {
- sfree(gamp);
- return NULL;
- }
- gamp->flast_d[0] =
- (Int4*) malloc((max_d + max_d + 6) * sizeof(Int4) * 2);
- if (gamp->flast_d[0] == NULL) {
- #ifdef ERR_POST_EX_DEFINED
- ErrPostEx(SEV_WARNING, 0, 0,
- "Failed to allocate %ld bytes for greedy alignment",
- (max_d + max_d + 6) * sizeof(Int4) * 2);
- #endif
- BLAST_GreedyAlignsfree(gamp);
- return NULL;
- }
- gamp->flast_d[1] = gamp->flast_d[0] + max_d + max_d + 6;
- gamp->flast_d_affine = NULL;
- gamp->uplow_free = NULL;
- } else {
- gamp->flast_d = NULL;
- Mis_cost = reward + penalty;
- GE_cost = gap_extend+reward/2;
- max_d_1 = max_d;
- max_d *= GE_cost;
- max_cost = MAX(Mis_cost, gap_open+GE_cost);
- gd = gdb3(&Mis_cost, &gap_open, &GE_cost);
- d_diff = ICEIL(Xdrop+reward/2, gd);
- gamp->uplow_free = (Int4*) calloc(2*(max_d+1+max_cost), sizeof(Int4));
- gamp->flast_d_affine = (SThreeVal**)
- malloc((MAX(max_d, max_cost) + 2) * sizeof(SThreeVal*));
- if (!gamp->uplow_free || !gamp->flast_d_affine) {
- BLAST_GreedyAlignsfree(gamp);
- return NULL;
- }
- gamp->flast_d_affine[0] = (SThreeVal*)
- calloc((2*max_d_1 + 6) , sizeof(SThreeVal) * (max_cost+1));
- for (i = 1; i <= max_cost; i++)
- gamp->flast_d_affine[i] =
- gamp->flast_d_affine[i-1] + 2*max_d_1 + 6;
- if (!gamp->flast_d_affine || !gamp->flast_d_affine[0])
- BLAST_GreedyAlignsfree(gamp);
- }
- gamp->max_row_free = (Int4*) malloc(sizeof(Int4) * (max_d + 1 + d_diff));
- if (do_traceback)
- gamp->space = MBSpaceNew();
- if (!gamp->max_row_free || (do_traceback && !gamp->space))
- /* Failure in one of the memory allocations */
- BLAST_GreedyAlignsfree(gamp);
- return gamp;
- }
- /* Documented in blast_gapalign.h */
- BlastGapAlignStruct*
- BLAST_GapAlignStructFree(BlastGapAlignStruct* gap_align)
- {
- GapEditBlockDelete(gap_align->edit_block);
- if (gap_align->greedy_align_mem)
- BLAST_GreedyAlignsfree(gap_align->greedy_align_mem);
- GapStateFree(gap_align->state_struct);
- sfree(gap_align);
- return NULL;
- }
- /* Documented in blast_gapalign.h */
- Int2
- BLAST_GapAlignStructNew(const BlastScoringParameters* score_params,
- const BlastExtensionParameters* ext_params,
- Uint4 max_subject_length, Int4 query_length,
- BlastScoreBlk* sbp, BlastGapAlignStruct** gap_align_ptr)
- {
- Int2 status = 0;
- BlastGapAlignStruct* gap_align;
- /* If pointer to output structure is NULL, just don't do anything */
- if (!gap_align_ptr)
- return 0;
- if (!gap_align_ptr || !sbp || !score_params || !ext_params)
- return -1;
- gap_align = (BlastGapAlignStruct*) calloc(1, sizeof(BlastGapAlignStruct));
- *gap_align_ptr = gap_align;
- gap_align->sbp = sbp;
- gap_align->gap_x_dropoff = ext_params->gap_x_dropoff;
- if (ext_params->options->ePrelimGapExt != eDynProgExt) {
- max_subject_length = MIN(max_subject_length, MAX_DBSEQ_LEN);
- gap_align->greedy_align_mem =
- BLAST_GreedyAlignMemAlloc(score_params, ext_params,
- max_subject_length);
- if (!gap_align->greedy_align_mem)
- gap_align = BLAST_GapAlignStructFree(gap_align);
- }
- if (!gap_align)
- return -1;
- gap_align->positionBased = (sbp->posMatrix != NULL);
- return status;
- }
- /** Low level function to perform dynamic programming gapped extension
- * with traceback.
- * @param A The query sequence [in]
- * @param B The subject sequence [in]
- * @param M Maximal extension length in query [in]
- * @param N Maximal extension length in subject [in]
- * @param S The traceback information from previous extension [in]
- * @param a_offset Resulting starting offset in query [out]
- * @param b_offset Resulting starting offset in subject [out]
- * @param sapp The traceback information [out]
- * @param gap_align Structure holding various information and allocated
- * memory for the gapped alignment [in]
- * @param score_params Parameters related to scoring [in]
- * @param query_offset The starting offset in query [in]
- * @param reversed Has the sequence been reversed? Used for psi-blast [in]
- * @param reverse_sequence Do reverse the sequence [in]
- * @return The best alignment score found.
- */
- #define SCRIPT_SUB 0x00
- #define SCRIPT_INS 0x01
- #define SCRIPT_DEL 0x02
- #define SCRIPT_DECLINE 0x03
- #define SCRIPT_OP_MASK 0x03
- #define SCRIPT_OPEN_GAP 0x04
- #define SCRIPT_INS_ROW 0x10
- #define SCRIPT_DEL_ROW 0x20
- #define SCRIPT_INS_COL 0x40
- #define SCRIPT_DEL_COL 0x80
- Int4
- ALIGN_EX(Uint1* A, Uint1* B, Int4 M, Int4 N, Int4* S, Int4* a_offset,
- Int4* b_offset, Int4** sapp, BlastGapAlignStruct* gap_align,
- const BlastScoringParameters* score_params, Int4 query_offset,
- Boolean reversed, Boolean reverse_sequence)
-
- {
- /* See SEMI_G_ALIGN_EX for more general comments on
- what this code is doing; comments in this function
- only apply to the traceback computations */
- BlastGapDP* score_array;
- Int4 i;
- Int4 a_index;
- Int4 b_index, b_size, first_b_index, last_b_index, b_increment;
- Uint1* b_ptr;
-
- Int4 gap_open;
- Int4 gap_extend;
- Int4 gap_open_extend;
- Int4 decline_penalty;
- Int4 x_dropoff;
- Int4 best_score;
-
- Int4* *matrix;
- Int4* matrix_row;
-
- Int4 score;
- Int4 score_gap_row;
- Int4 score_gap_col;
- Int4 score_decline;
- Int4 next_score;
- Int4 next_score_decline;
-
- GapData data; /* traceback variables */
- GapStateArrayStruct* state_struct;
- Uint1** edit_script;
- Uint1* edit_script_row;
- Int4 orig_b_index;
- Uint1 script, next_script, script_row, script_col;
- Int4 align_len;
- matrix = gap_align->sbp->matrix;
- *a_offset = 0;
- *b_offset = 0;
- gap_open = score_params->gap_open;
- gap_extend = score_params->gap_extend;
- gap_open_extend = gap_open + gap_extend;
- decline_penalty = score_params->decline_align;
- x_dropoff = gap_align->gap_x_dropoff;
-
- if (x_dropoff < gap_open_extend)
- x_dropoff = gap_open_extend;
-
- if(N <= 0 || M <= 0)
- return 0;
-
- /* Initialize traceback information. edit_script[] is
- a 2-D array which is filled in row by row as the
- dynamic programming takes place */
- data.sapp = S;
- data.last = 0;
- *sapp = S;
- GapPurgeState(gap_align->state_struct);
- edit_script = (Uint1**) malloc(sizeof(Uint1*)*(M+1));
- if (gap_extend > 0)
- state_struct = GapGetState(&gap_align->state_struct,
- x_dropoff / gap_extend + 5);
- else
- state_struct = GapGetState(&gap_align->state_struct, N + 3);
- edit_script[0] = state_struct->state_array;
- edit_script_row = state_struct->state_array;
- score_array = (BlastGapDP*)malloc((N + 2) * sizeof(BlastGapDP));
- score = -gap_open_extend;
- score_array[0].best = 0;
- score_array[0].best_gap = -gap_open_extend;
- score_array[0].best_decline = -gap_open_extend - decline_penalty;
-
- for (i = 1; i <= N; i++) {
- if (score < -x_dropoff)
- break;
- score_array[i].best = score;
- score_array[i].best_gap = score - gap_open_extend;
- score_array[i].best_decline = score - gap_open_extend - decline_penalty;
- score -= gap_extend;
- edit_script_row[i] = SCRIPT_INS;
- }
- state_struct->used = i + 1;
-
- b_size = i;
- best_score = 0;
- first_b_index = 0;
- if (reverse_sequence)
- b_increment = -1;
- else
- b_increment = 1;
-
- for (a_index = 1; a_index <= M; a_index++) {
- /* Set up the next row of the edit script; this involves
- allocating memory for the row, then pointing to it */
- if (gap_extend > 0)
- state_struct = GapGetState(&gap_align->state_struct,
- b_size - first_b_index + x_dropoff / gap_extend + 5);
- else
- state_struct = GapGetState(&gap_align->state_struct,
- N + 3 - first_b_index);
- edit_script[a_index] = state_struct->state_array +
- state_struct->used + 1;
- edit_script_row = edit_script[a_index];
- orig_b_index = first_b_index;
- if (!(gap_align->positionBased)) {
- if(reverse_sequence)
- matrix_row = matrix[ A[ M - a_index ] ];
- else
- matrix_row = matrix[ A[ a_index ] ];
- }
- else {
- if(reversed || reverse_sequence)
- matrix_row = gap_align->sbp->posMatrix[M - a_index];
- else
- matrix_row = gap_align->sbp->posMatrix[a_index + query_offset];
- }
- if(reverse_sequence)
- b_ptr = &B[N - first_b_index];
- else
- b_ptr = &B[first_b_index];
- score = MININT;
- score_gap_row = MININT;
- score_decline = MININT;
- last_b_index = first_b_index;
- for (b_index = first_b_index; b_index < b_size; b_index++) {
- b_ptr += b_increment;
- score_gap_col = score_array[b_index].best_gap;
- next_score = score_array[b_index].best + matrix_row[ *b_ptr ];
- next_score_decline = score_array[b_index].best_decline;
- /* script, script_row and script_col contain the
- actions specified by the dynamic programming.
- when the inner loop has finished, 'script' con-
- tains all of the actions to perform, and is
- written to edit_script[a_index][b_index]. Otherwise,
- this inner loop is exactly the same as the one
- in SEMI_G_ALIGN_EX() */
- if (score_decline > score) {
- script = SCRIPT_DECLINE;
- score = score_decline;
- }
- else {
- script = SCRIPT_SUB;
- }
- if (score_gap_col < score_decline) {
- score_gap_col = score_decline;
- script_col = SCRIPT_DEL_COL;
- }
- else {
- script_col = SCRIPT_INS_COL;
- if (score < score_gap_col) {
- script = SCRIPT_DEL;
- score = score_gap_col;
- }
- }
- if (score_gap_row < score_decline) {
- score_gap_row = score_decline;
- script_row = SCRIPT_DEL_ROW;
- }
- else {
- script_row = SCRIPT_INS_ROW;
- if (score < score_gap_row) {
- script = SCRIPT_INS;
- score = score_gap_row;
- }
- }
- if (best_score - score > x_dropoff) {
- if (first_b_index == b_index)
- first_b_index++;
- else
- score_array[b_index].best = MININT;
- }
- else {
- last_b_index = b_index;
- if (score > best_score) {
- best_score = score;
- *a_offset = a_index;
- *b_offset = b_index;
- }
- score_gap_row -= gap_extend;
- score_gap_col -= gap_extend;
- if (score_gap_col < (score - gap_open_extend)) {
- score_array[b_index].best_gap = score - gap_open_extend;
- }
- else {
- score_array[b_index].best_gap = score_gap_col;
- script += script_col;
- }
- if (score_gap_row < (score - gap_open_extend))
- score_gap_row = score - gap_open_extend;
- else
- script += script_row;
- if (score_decline < (score - gap_open)) {
- score_array[b_index].best_decline = score - gap_open - decline_penalty;
- }
- else {
- score_array[b_index].best_decline = score_decline - decline_penalty;
- script += SCRIPT_OPEN_GAP;
- }
- score_array[b_index].best = score;
- }
- score = next_score;
- score_decline = next_score_decline;
- edit_script_row[b_index] = script;
- }
-
- if (first_b_index == b_size) {
- a_index++;
- break;
- }
- if (last_b_index < b_size - 1) {
- b_size = last_b_index + 1;
- }
- else {
- while (score_gap_row >= (best_score - x_dropoff) && b_size <= N) {
- score_array[b_size].best = score_gap_row;
- score_array[b_size].best_gap = score_gap_row - gap_open_extend;
- score_array[b_size].best_decline = score_gap_row - gap_open -
- decline_penalty;
- score_gap_row -= gap_extend;
- edit_script_row[b_size] = SCRIPT_INS;
- b_size++;
- }
- }
- if (b_size <= N) {
- score_array[b_size].best = MININT;
- score_array[b_size].best_gap = MININT;
- score_array[b_size].best_decline = MININT;
- b_size++;
- }
- state_struct->used += MAX(b_index, b_size) - orig_b_index + 1;
- }
-
- /* Pick the optimal path through the now complete
- edit_script[][]. This is equivalent to flattening
- the 2-D array into a 1-D list of actions. */
- a_index = *a_offset;
- b_index = *b_offset;
- script = 0;
- edit_script_row = (Uint1 *)malloc(a_index + b_index);
- for (i = 0; a_index > 0 || b_index > 0; i++) {
- next_script = edit_script[a_index][b_index];
- switch(script) {
- case SCRIPT_INS:
- script = next_script & SCRIPT_OP_MASK;
- if (next_script & SCRIPT_INS_ROW)
- script = SCRIPT_INS;
- else if (next_script & SCRIPT_DEL_ROW)
- script = SCRIPT_DECLINE;
- break;
- case SCRIPT_DEL:
- script = next_script & SCRIPT_OP_MASK;
- if (next_script & SCRIPT_INS_COL)
- script = SCRIPT_DEL;
- else if (next_script & SCRIPT_DEL_COL)
- script = SCRIPT_DECLINE;
- break;
- case SCRIPT_DECLINE:
- script = next_script & SCRIPT_OP_MASK;
- if (next_script & SCRIPT_OPEN_GAP)
- script = SCRIPT_DECLINE;
- break;
- default:
- script = next_script & SCRIPT_OP_MASK;
- break;
- }
- if (script == SCRIPT_INS)
- b_index--;
- else if (script == SCRIPT_DEL)
- a_index--;
- else {
- a_index--;
- b_index--;
- }
- edit_script_row[i] = script;
- }
- /* Traceback proceeded backwards through edit_script,
- so the output traceback information is written
- in reverse order */
- align_len = i;
- for (i--; i >= 0; i--) {
- if (edit_script_row[i] == SCRIPT_SUB)
- REP_
- else if (edit_script_row[i] == SCRIPT_INS)
- INS_(1)
- else if (edit_script_row[i] == SCRIPT_DECLINE)
- REPP_
- else
- DEL_(1)
- }
-
- sfree(edit_script_row);
- sfree(edit_script);
- sfree(score_array);
- align_len -= data.sapp - S;
- if (align_len > 0)
- memset(data.sapp, 0, align_len);
- *sapp = data.sapp;
- return best_score;
- }
- /** Low level function to perform gapped extension in one direction with
- * or without traceback.
- * @param A The query sequence [in]
- * @param B The subject sequence [in]
- * @param M Maximal extension length in query [in]
- * @param N Maximal extension length in subject [in]
- * @param S The traceback information from previous extension [in]
- * @param a_offset Resulting starting offset in query [out]
- * @param b_offset Resulting starting offset in subject [out]
- * @param score_only Only find the score, without saving traceback [in]
- * @param sapp The traceback information [out]
- * @param gap_align Structure holding various information and allocated
- * memory for the gapped alignment [in]
- * @param score_params Parameters related to scoring [in]
- * @param query_offset The starting offset in query [in]
- * @param reversed Has the sequence been reversed? Used for psi-blast [in]
- * @param reverse_sequence Do reverse the sequence [in]
- * @return The best alignment score found.
- */
- static Int4 SEMI_G_ALIGN_EX(Uint1* A, Uint1* B, Int4 M, Int4 N,
- Int4* S, Int4* a_offset, Int4* b_offset, Boolean score_only, Int4** sapp,
- BlastGapAlignStruct* gap_align, const BlastScoringParameters* score_params,
- Int4 query_offset, Boolean reversed, Boolean reverse_sequence)
- {
- BlastGapDP* score_array; /* sequence pointers and indices */
- Int4 i;
- Int4 a_index;
- Int4 b_index, b_size, first_b_index, last_b_index, b_increment;
- Uint1* b_ptr;
-
- Int4 gap_open; /* alignment penalty variables */
- Int4 gap_extend;
- Int4 gap_open_extend;
- Int4 decline_penalty;
- Int4 x_dropoff;
-
- Int4* *matrix; /* pointers to the score matrix */
- Int4* matrix_row;
-
- Int4 score; /* score tracking variables */
- Int4 score_gap_row;
- Int4 score_gap_col;
- Int4 score_decline;
- Int4 next_score;
- Int4 next_score_decline;
- Int4 best_score;
-
- if (!score_only) {
- return ALIGN_EX(A, B, M, N, S, a_offset, b_offset, sapp, gap_align,
- score_params, query_offset, reversed, reverse_sequence);
- }
-
- /* do initialization and sanity-checking */
- matrix = gap_align->sbp->matrix;
- *a_offset = 0;
- *b_offset = 0;
- gap_open = score_params->gap_open;
- gap_extend = score_params->gap_extend;
- gap_open_extend = gap_open + gap_extend;
- decline_penalty = score_params->decline_align;
- x_dropoff = gap_align->gap_x_dropoff;
-
- if (x_dropoff < gap_open_extend)
- x_dropoff = gap_open_extend;
-
- if(N <= 0 || M <= 0)
- return 0;
-
- /* Allocate and fill in the auxiliary
- bookeeping structures (one struct
- per letter of B) */
- score_array = (BlastGapDP*)malloc((N + 2) * sizeof(BlastGapDP));
- score = -gap_open_extend;
- score_array[0].best = 0;
- score_array[0].best_gap = -gap_open_extend;
- score_array[0].best_decline = -gap_open_extend - decline_penalty;
-
- for (i = 1; i <= N; i++) {
- if (score < -x_dropoff)
- break;
- score_array[i].best = score;
- score_array[i].best_gap = score - gap_open_extend;
- score_array[i].best_decline = score - gap_open_extend - decline_penalty;
- score -= gap_extend;
- }
-
- /* The inner loop below examines letters of B from
- index 'first_b_index' to 'b_size' */
- b_size = i;
- best_score = 0;
- first_b_index = 0;
- if (reverse_sequence)
- b_increment = -1;
- else
- b_increment = 1;
-
- for (a_index = 1; a_index <= M; a_index++) {
- /* pick out the row of the score matrix
- appropriate for A[a_index] */
- if (!(gap_align->positionBased)) {
- if(reverse_sequence)
- matrix_row = matrix[ A[ M - a_index ] ];
- else
- matrix_row = matrix[ A[ a_index ] ];
- }
- else {
- if(reversed || reverse_sequence)
- matrix_row = gap_align->sbp->posMatrix[M - a_index];
- else
- matrix_row = gap_align->sbp->posMatrix[a_index + query_offset];
- }
- if(reverse_sequence)
- b_ptr = &B[N - first_b_index];
- else
- b_ptr = &B[first_b_index];
- /* initialize running-score variables */
- score = MININT;
- score_gap_row = MININT;
- score_decline = MININT;
- last_b_index = first_b_index;
- for (b_index = first_b_index; b_index < b_size; b_index++) {
- b_ptr += b_increment;
- score_gap_col = score_array[b_index].best_gap;
- next_score = score_array[b_index].best + matrix_row[ *b_ptr ];
- next_score_decline = score_array[b_index].best_decline;
- /* decline the alignment if that improves the score */
- score = MAX(score, score_decline);
-
- /* decline the best row score if that improves it;
- if not, make it the new high score if it's
- an improvement */
- if (score_gap_col < score_decline)
- score_gap_col = score_decline;
- else if (score < score_gap_col)
- score = score_gap_col;
- /* decline the best column score if that improves it;
- if not, make it the new high score if it's
- an improvement */
- if (score_gap_row < score_decline)
- score_gap_row = score_decline;
- else if (score < score_gap_row)
- score = score_gap_row;
- if (best_score - score > x_dropoff) {
- /* the current best score failed the X-dropoff
- criterion. Note that this does not stop the
- inner loop, only forces future iterations to
- skip this column of B.
- Also, if the very first letter of B that was
- tested failed the X dropoff criterion, make
- sure future inner loops start one letter to
- the right */
- if (b_index == first_b_index)
- first_b_index++;
- else
- score_array[b_index].best = MININT;
- }
- else {
- last_b_index = b_index;
- if (score > best_score) {
- best_score = score;
- *a_offset = a_index;
- *b_offset = b_index;
- }
- /* If starting a gap at this position will improve
- the best row, column, or declined alignment score,
- update them to reflect that. */
- score_gap_row -= gap_extend;
- score_gap_col -= gap_extend;
- score_array[b_index].best_gap = MAX(score - gap_open_extend,
- score_gap_col);
- score_gap_row = MAX(score - gap_open_extend, score_gap_row);
- score_array[b_index].best_decline =
- MAX(score_decline, score - gap_open) - decline_penalty;
- score_array[b_index].best = score;
- }
- score = next_score;
- score_decline = next_score_decline;
- }
-
- /* Finish aligning if the best scores for all positions
- of B will fail the X-dropoff test, i.e. the inner loop
- bounds have converged to each other */
- if (first_b_index == b_size)
- break;
- if (last_b_index < b_size - 1) {
- /* This row failed the X-dropoff test earlier than
- the last row did; just shorten the loop bounds
- before doing the next row */
- b_size = last_b_index + 1;
- }
- else {
- /* The inner loop finished without failing the X-dropoff
- test; initialize extra bookkeeping structures until
- the X dropoff test fails or we run out of letters in B.
- The next inner loop will have larger bounds */
- while (score_gap_row >= (best_score - x_dropoff) && b_size <= N) {
- score_array[b_size].best = score_gap_row;
- score_array[b_size].best_gap = score_gap_row - gap_open_extend;
- score_array[b_size].best_decline = score_gap_row - gap_open -
- decline_penalty;
- score_gap_row -= gap_extend;
- b_size++;
- }
- }
- if (b_size <= N) {
- score_array[b_size].best = MININT;
- score_array[b_size].best_gap = MININT;
- score_array[b_size].best_decline = MININT;
- b_size++;
- }
- }
-
- sfree(score_array);
- return best_score;
- }
- /** Low level function to perform gapped extension with out-of-frame
- * gapping with traceback
- * @param A The query sequence [in]
- * @param B The subject sequence [in]
- * @param M Maximal extension length in query [in]
- * @param N Maximal extension length in subject [in]
- * @param S The traceback information from previous extension [in]
- * @param a_offset Resulting starting offset in query [out]
- * @param b_offset Resulting starting offset in subject [out]
- * @param sapp The traceback information [out]
- * @param gap_align Structure holding various information and allocated
- * memory for the gapped alignment [in]
- * @param score_params Parameters related to scoring [in]
- * @param query_offset The starting offset in query [in]
- * @param reversed Has the sequence been reversed? Used for psi-blast [in]
- * @return The best alignment score found.
- */
- #define SCRIPT_GAP_IN_A 0
- #define SCRIPT_AHEAD_ONE_FRAME 1
- #define SCRIPT_AHEAD_TWO_FRAMES 2
- #define SCRIPT_NEXT_IN_FRAME 3
- #define SCRIPT_NEXT_PLUS_ONE_FRAME 4
- #define SCRIPT_NEXT_PLUS_TWO_FRAMES 5
- #define SCRIPT_GAP_IN_B 6
- #define SCRIPT_OOF_OP_MASK 0x07
- #define SCRIPT_OOF_OPEN_GAP 0x10
- #define SCRIPT_EXTEND_GAP_IN_A 0x20
- #define SCRIPT_EXTEND_GAP_IN_B 0x40
- static Int4 OOF_ALIGN(Uint1* A, Uint1* B, Int4 M, Int4 N,
- Int4* S, Int4* a_offset, Int4* b_offset, Int4** sapp,
- BlastGapAlignStruct* gap_align, const BlastScoringParameters* score_params,
- Int4 query_offset, Boolean reversed)
- {
- BlastGapDP* score_array; /* sequence pointers and indices */
- Int4 i, increment;
- Int4 a_index;
- Int4 b_index, b_size, first_b_index, last_b_index;
-
- Int4 gap_open; /* alignment penalty variables */
- Int4 gap_extend;
- Int4 gap_open_extend;
- Int4 shift_penalty;
- Int4 x_dropoff;
-
- Int4* *matrix; /* pointers to the score matrix */
- Int4* matrix_row;
-
- Int4 score; /* score tracking variables */
- Int4 score_row1;
- Int4 score_row2;
- Int4 score_row3;
- Int4 score_gap_col;
- Int4 score_col1;
- Int4 score_col2;
- Int4 score_col3;
- Int4 score_other_frame1;
- Int4 score_other_frame2;
- Int4 best_score;
-
- GapData data; /* traceback variables */
- GapStateArrayStruct* state_struct;
- Uint1** edit_script;
- Uint1* edit_script_row;
- Int4 orig_b_index;
- Int1 script, next_script;
- Int4 align_len;
- /* do initialization and sanity-checking */
- matrix = gap_align->sbp->matrix;
- *a_offset = 0;
- *b_offset = -2;
- gap_open = score_params->gap_open;
- gap_extend = score_params->gap_extend;
- gap_open_extend = gap_open + gap_extend;
- shift_penalty = score_params->shift_pen;
- x_dropoff = gap_align->gap_x_dropoff;
-
- if (x_dropoff < gap_open_extend)
- x_dropoff = gap_open_extend;
-
- if(N <= 0 || M <= 0)
- return 0;
-
- /* Initialize traceback information. edit_script[] is
- a 2-D array which is filled in row by row as the
- dynamic programming takes place */
- data.sapp = S;
- data.last = 0;
- *sapp = S;
- GapPurgeState(gap_align->state_struct);
- edit_script = (Uint1**) malloc(sizeof(Uint1*)*(M+1));
- state_struct = GapGetState(&gap_align->state_struct, N + 5);
- edit_script[0] = state_struct->state_array;
- edit_script_row = state_struct->state_array;
- /* Allocate and fill in the auxiliary
- bookeeping structures (one struct
- per letter of B) */
- score_array = (BlastGapDP*)calloc((N + 4), sizeof(BlastGapDP));
- score = -gap_open_extend;
- score_array[0].best = 0;
- score_array[0].best_gap = -gap_open_extend;
-
- for (i = 3; i <= N + 2; i += 3) {
- score_array[i].best = score;
- score_array[i].best_gap = score - gap_open_extend;
- edit_script_row[i] = SCRIPT_GAP_IN_B;
- score_array[i-1].best = MININT;
- score_array[i-1].best_gap = MININT;
- edit_script_row[i-1] = SCRIPT_GAP_IN_B;
- score_array[i-2].best = MININT;
- score_array[i-2].best_gap = MININT;
- edit_script_row[i-2] = SCRIPT_GAP_IN_B;
- if (score < -x_dropoff)
- break;
- score -= gap_extend;
- }
-
- /* The inner loop below examines letters of B from
- index 'first_b_index' to 'b_size' */
- b_size = i - 2;
- state_struct->used = b_size + 1;
- score_array[b_size].best = MININT;
- score_array[b_size].best_gap = MININT;
- best_score = 0;
- first_b_index = 0;
- if (reversed) {
- increment = -1;
- }
- else {
- /* Allow for a backwards frame shift */
- B -= 2;
- increment = 1;
- }
-
- for (a_index = 1; a_index <= M; a_index++) {
- /* Set up the next row of the edit script; this involves
- allocating memory for the row, then pointing to it */
- state_struct = GapGetState(&gap_align->state_struct,
- N + 5 - first_b_index);
- edit_script[a_index] = state_struct->state_array +
- state_struct->used + 1;
- edit_script_row = edit_script[a_index];
- orig_b_index = first_b_index;
- if (!(gap_align->positionBased)) {
- matrix_row = matrix[ A[ a_index * increment ] ];
- }
- else {
- if(reversed)
- matrix_row = gap_align->sbp->posMatrix[M - a_index];
- else
- matrix_row = gap_align->sbp->posMatrix[a_index + query_offset];
- }
- score = MININT;
- score_row1 = MININT;
- score_row2 = MININT;
- score_row3 = MININT;
- score_gap_col = MININT;
- score_col1 = MININT;
- score_col2 = MININT;
- score_col3 = MININT;
- score_other_frame1 = MININT;
- score_other_frame2 = MININT;
- last_b_index = first_b_index;
- b_index = first_b_index;
- /* The inner loop is identical to that of OOF_SEMI_G_ALIGN,
- except that traceback operations are sprinkled throughout. */
- while (b_index < b_size) {
- /* FRAME 0 */
- score = MAX(score_other_frame1, score_other_frame2) - shift_penalty;
- score = MAX(score, score_col1);
- if (score == score_col1) {
- script = SCRIPT_NEXT_IN_FRAME;
- }
- else if (score + shift_penalty == score_other_frame1) {
- if (score_other_frame1 == score_col2)
- script = SCRIPT_AHEAD_TWO_FRAMES;
- else
- script = SCRIPT_NEXT_PLUS_TWO_FRAMES;
- }
- else {
- if (score_other_frame2 == score_col3)
- script = SCRIPT_AHEAD_ONE_FRAME;
- else
- script = SCRIPT_NEXT_PLUS_ONE_FRAME;
- }
- score += matrix_row[ B[ b_index * increment ] ];
- score_other_frame1 = MAX(score_col1, score_array[b_index].best);
- score_col1 = score_array[b_index].best;
- score_gap_col = score_array[b_index].best_gap;
- if (score < MAX(score_gap_col, score_row1)) {
- if (score_gap_col > score_row1) {
- score = score_gap_col;
- script = SCRIPT_OOF_OPEN_GAP | SCRIPT_GAP_IN_A;
- }
- else {
- score = score_row1;
- script = SCRIPT_OOF_OPEN_GAP | SCRIPT_GAP_IN_B;
- }
- if (best_score - score > x_dropoff) {
- if (first_b_index == b_index)
- first_b_index = b_index + 1;
- else
- score_array[b_index].best = MININT;
- }
- else {
- last_b_index = b_index + 1;
- score_array[b_index].best = score;
- score_array[b_index].best_gap = score_gap_col - gap_extend;
- score_row1 -= gap_extend;
- }
- }
- else {
- if (best_score - score > x_dropoff) {
- if (first_b_index == b_index)
- first_b_index = b_index + 1;
- else
- score_array[b_index].best = MININT;
- }
- else {
- last_b_index = b_index + 1;
- score_array[b_index].best = score;
- if (score > best_score) {
- best_score = score;
- *a_offset = a_index;
- *b_offset = b_index;
- }
- score -= gap_open_extend;
- score_row1 -= gap_extend;
- if (score > score_row1)
- score_row1 = score;
- else
- script |= SCRIPT_EXTEND_GAP_IN_B;
- score_gap_col -= gap_extend;
- if (score < score_gap_col) {
- score_array[b_index].best_gap = score_gap_col;
- script |= SCRIPT_EXTEND_GAP_IN_A;
- }
- else {
- score_array[b_index].best_gap = score;
- }
- }
- }
- edit_script_row[b_index] = script;
- if (++b_index >= b_size) {
- score = score_row1;
- score_row1 = score_row2;
- score_row2 = score_row3;
- score_row3 = score;
- break;
- }
- /* FRAME 1 */
- score = MAX(score_other_frame1, score_other_frame2) - shift_penalty;
- score = MAX(score, score_col2);
- if (score == score_col2) {
- script = SCRIPT_NEXT_IN_FRAME;
- }
- else if (score + shift_penalty == score_other_frame1) {
- if (score_other_frame1 == score_col1)
- script = SCRIPT_AHEAD_ONE_FRAME;
- else
- script = SCRIPT_NEXT_PLUS_ONE_FRAME;
- }
- else {
- if (score_other_frame2 == score_col3)
- script = SCRIPT_AHEAD_TWO_FRAMES;
- else
- script = SCRIPT_NEXT_PLUS_TWO_FRAMES;
- }
- score += matrix_row[ B[ b_index * increment ] ];
- score_other_frame2 = MAX(score_col2, score_array[b_index].best);
- score_col2 = score_array[b_index].best;
- score_gap_col = score_array[b_index].best_gap;
- if (score < MAX(score_gap_col, score_row2)) {
- score = MAX(score_gap_col, score_row2);
- if (best_score - score > x_dropoff) {
- if (first_b_index == b_index)
- first_b_index = b_index + 1;
- else
- score_array[b_index].best = MININT;
- }
- else {
- if (score == score_gap_col)
- script = SCRIPT_OOF_OPEN_GAP | SCRIPT_GAP_IN_A;
- else
- script = SCRIPT_OOF_OPEN_GAP | SCRIPT_GAP_IN_B;
- last_b_index = b_index + 1;
- score_array[b_index].best = score;
- score_array[b_index].best_gap = score_gap_col - gap_extend;
- score_row2 -= gap_extend;
- }
- }
- else {
- if (best_score - score > x_dropoff) {
- if (first_b_index == b_index)
- first_b_index = b_index + 1;
- else
- score_array[b_index].best = MININT;
- }
- else {
- last_b_index = b_index + 1;
- score_array[b_index].best = score;
- if (score > best_score) {
- best_score = score;
- *a_offset = a_index;
- *b_offset = b_index;
- }
- score -= gap_open_extend;
- score_row2 -= gap_extend;
- if (score > score_row2)
- score_row2 = score;
- else
- script |= SCRIPT_EXTEND_GAP_IN_B;
- score_gap_col -= gap_extend;
- if (score < score_gap_col) {
- score_array[b_index].best_gap = score_gap_col;
- script |= SCRIPT_EXTEND_GAP_IN_A;
- }
- else {
- score_array[b_index].best_gap = score;
- }
- }
- }
- edit_script_row[b_index] = script;
- ++b_index;
- if (b_index >= b_size) {
- score = score_row2;
- score_row2 = score_row1;
- score_row1 = score_row3;
- score_row3 = score;
- break;
- }
- /* FRAME 2 */
- score = MAX(score_other_frame1, score_other_frame2) - shift_penalty;
- score = MAX(score, score_col3);
- if (score == score_col3) {
- script = SCRIPT_NEXT_IN_FRAME;
- }
- else if (score + shift_penalty == score_other_frame1) {
- if (score_other_frame1 == score_col1)
- script = SCRIPT_AHEAD_TWO_FRAMES;
- else
- script = SCRIPT_NEXT_PLUS_TWO_FRAMES;
- }
- else {
- if (score_other_frame2 == score_col2)
- script = SCRIPT_AHEAD_ONE_FRAME;
- else
- script = SCRIPT_NEXT_PLUS_ONE_FRAME;
- }
- score += matrix_row[ B[ b_index * increment ] ];
- score_other_frame1 = score_other_frame2;
- score_other_frame2 = MAX(score_col3, score_array[b_index].best);
- score_col3 = score_array[b_index].best;
- score_gap_col = score_array[b_index].best_gap;
- if (score < MAX(score_gap_col, score_row3)) {
- score = MAX(score_gap_col, score_row3);
- if (best_score - score > x_dropoff) {
- if (first_b_index == b_index)
- first_b_index = b_index + 1;
- else
- score_array[b_index].best = MININT;
- }
- else {
- if (score == score_gap_col)
- script = SCRIPT_OOF_OPEN_GAP | SCRIPT_GAP_IN_A;
- else
- script = SCRIPT_OOF_OPEN_GAP | SCRIPT_GAP_IN_B;
- last_b_index = b_index + 1;
- score_array[b_index].best = score;
- score_array[b_index].best_gap = score_gap_col - gap_extend;
- score_row3 -= gap_extend;
- }
- }
- else {
- if (best_score - score > x_dropoff) {
- if (first_b_index == b_index)
- first_b_index = b_index + 1;
- else
- score_array[b_index].best = MININT;
- }
- else {
- last_b_index = b_index + 1;
- score_array[b_index].best = score;
- if (score > best_score) {
- best_score = score;
- *a_offset = a_index;
- *b_offset = b_index;
- }
- score -= gap_open_extend;
- score_row3 -= gap_extend;
- if (score > score_row3)
- score_row3 = score;
- else
- script |= SCRIPT_EXTEND_GAP_IN_B;
- score_gap_col -= gap_extend;
- if (score < score_gap_col) {
- score_array[b_index].best_gap = score_gap_col;
- script |= SCRIPT_EXTEND_GAP_IN_A;
- }
- else {
- score_array[b_index].best_gap = score;
- }
- }
- }
- edit_script_row[b_index] = script;
- b_index++;
- }
-
- /* Finish aligning if the best scores for all positions
- of B will fail the X-dropoff test, i.e. the inner loop
- bounds have converged to each other */
- if (first_b_index == b_size)
- break;
- if (last_b_index < b_size) {
- /* This row failed the X-dropoff test earlier than
- the last row did; just shorten the loop bounds
- before doing the next row */
- b_size = last_b_index;
- }
- else {
- /* The inner loop finished without failing the X-dropoff
- test; initialize extra bookkeeping structures until
- the X dropoff test fails or we run out of letters in B.
- The next inner loop will have larger bounds.
-
- Keep initializing extra structures until the X dropoff
- test fails in all frames for this row */
- score = MAX(score_row1, MAX(score_row2, score_row3));
- last_b_index = b_size + (score - (best_score - x_dropoff)) /
- gap_extend;
- last_b_index = MIN(last_b_index, N + 2);
- while (b_size < last_b_index) {
- score_array[b_size].best = score_row1;
- score_array[b_size].best_gap = score_row1 - gap_open_extend;
- score_row1 -= gap_extend;
- edit_script_row[b_size] = SCRIPT_OOF_OPEN_GAP | SCRIPT_GAP_IN_B;
- if (++b_size > last_b_index)
- break;
- score_array[b_size].best = score_row2;
- score_array[b_size].best_gap = score_row2 - gap_open_extend;
- score_row2 -= gap_extend;
- edit_script_row[b_size] = SCRIPT_OOF_OPEN_GAP | SCRIPT_GAP_IN_B;
- if (++b_size > last_b_index)
- break;
- score_array[b_size].best = score_row3;
- score_array[b_size].best_gap = score_row3 - gap_open_extend;
- score_row3 -= gap_extend;
- edit_script_row[b_size] = SCRIPT_OOF_OPEN_GAP | SCRIPT_GAP_IN_B;
- if (++b_size > last_b_index)
- break;
- }
- }
- /* chop off the best score in each frame */
- last_b_index = MIN(b_size + 4, N + 3);
- while (b_size < last_b_index) {
- score_array[b_size].best = MININT;
- score_array[b_size].best_gap = MININT;
- b_size++;
- }
- state_struct->used += MAX(b_index, b_size) - orig_b_index + 1;
- }
- a_index = *a_offset;
- b_index = *b_offset;
- script = 1;
- align_len = 0;
- edit_script_row = (Uint1 *)malloc(a_index + b_index);
- while (a_index > 0 || b_index > 0) {
- next_script = edit_script[a_index][b_index];
- switch (script) {
- case SCRIPT_GAP_IN_A:
- script = next_script & SCRIPT_OOF_OP_MASK;
- if (next_script & (SCRIPT_OOF_OPEN_GAP | SCRIPT_EXTEND_GAP_IN_A))
- script = SCRIPT_GAP_IN_A;
- break;
- case SCRIPT_GAP_IN_B:
- script = next_script & SCRIPT_OOF_OP_MASK;
- if (next_script & (SCRIPT_OOF_OPEN_GAP | SCRIPT_EXTEND_GAP_IN_B))
- script = SCRIPT_GAP_IN_B;
- break;
- default:
- script = next_script & SCRIPT_OOF_OP_MASK;
- break;
- }
- if (script == SCRIPT_GAP_IN_B) {
- b_index -= 3;
- }
- else {
- b_index -= script;
- a_index--;
- }
- edit_script_row[align_len++] = script;
- }
- /* Traceback proceeded backwards through edit_script,
- so the output traceback information is written
- in reverse order */
- for (align_len--; align_len >= 0; align_len--)
- *data.sapp++ = edit_script_row[align_len];
-
- sfree(edit_script_row);
- sfree(edit_script);
- sfree(score_array);
- *sapp = data.sapp;
- if (!reversed)
- *b_offset -= 2;
- return best_score;
- }
- /** Low level function to perform gapped extension with out-of-frame
- * gapping with or without traceback.
- * @param A The query sequence [in]
- * @param B The subject sequence [in]
- * @param M Maximal extension length in query [in]
- * @param N Maximal extension length in subject [in]
- * @param S The traceback information from previous extension [in]
- * @param a_offset Resulting starting offset in query [out]
- * @param b_offset Resulting starting offset in subject [out]
- * @param score_only Only find the score, without saving traceback [in]
- * @param sapp the traceback information [out]
- * @param gap_align Structure holding various information and allocated
- * memory for the gapped alignment [in]
- * @param score_params Parameters related to scoring [in]
- * @param query_offset The starting offset in query [in]
- * @param reversed Has the sequence been reversed? Used for psi-blast [in]
- * @return The best alignment score found.
- */
- static Int4 OOF_SEMI_G_ALIGN(Uint1* A, Uint1* B, Int4 M, Int4 N,
- Int4* S, Int4* a_offset, Int4* b_offset, Boolean score_only, Int4** sapp,
- BlastGapAlignStruct* gap_align, const BlastScoringParameters* score_params,
- Int4 query_offset, Boolean reversed)
- {
- BlastGapDP* score_array; /* sequence pointers and indices */
- Int4 i, increment;
- Int4 a_index;
- Int4 b_index, b_size, first_b_index, last_b_index;
-
- Int4 gap_open; /* alignment penalty variables */
- Int4 gap_extend;
- Int4 gap_open_extend;
- Int4 shift_penalty;
- Int4 x_dropoff;
-
- Int4* *matrix; /* pointers to the score matrix */
- Int4* matrix_row;
-
- Int4 score; /* score tracking variables */
- Int4 score_row1;
- Int4 score_row2;
- Int4 score_row3;
- Int4 score_gap_col;
- Int4 score_col1;
- Int4 score_col2;
- Int4 score_col3;
- Int4 score_other_frame1;
- Int4 score_other_frame2;
- Int4 best_score;
-
- if (!score_only) {
- return OOF_ALIGN(A, B, M, N, S, a_offset, b_offset, sapp, gap_align,
- score_params, query_offset, reversed);
- }
-
- /* do initialization and sanity-checking */
- matrix = gap_align->sbp->matrix;
- *a_offset = 0;
- *b_offset = -2;
- gap_open = score_params->gap_open;
- gap_extend = score_params->gap_extend;
- gap_open_extend = gap_open + gap_extend;
- shift_penalty = score_params->shift_pen;
- x_dropoff = gap_align->gap_x_dropoff;
-
- if (x_dropoff < gap_open_extend)
- x_dropoff = gap_open_extend;
-
- if(N <= 0 || M <= 0)
- return 0;
-
- /* Allocate and fill in the auxiliary
- bookeeping structures (one struct
- per letter of B) */
- score_array = (BlastGapDP*)calloc((N + 5), sizeof(BlastGapDP));
- score = -gap_open_extend;
- score_array[0].best = 0;
- score_array[0].best_gap = -gap_open_extend;
-
- for (i = 3; i <= N + 2; i += 3) {
- score_array[i].best = score;
- score_array[i].best_gap = score - gap_open_extend;
- score_array[i-1].best = MININT;
- score_array[i-1].best_gap = MININT;
- score_array[i-2].best = MININT;
- score_array[i-2].best_gap = MININT;
- if (score < -x_dropoff)
- break;
- score -= gap_extend;
- }
-
- /* The inner loop below examines letters of B from
- index 'first_b_index' to 'b_size' */
- b_size = i - 2;
- score_array[b_size].best = MININT;
- score_array[b_size].best_gap = MININT;
- best_score = 0;
- first_b_index = 0;
- if (reversed) {
- increment = -1;
- }
- else {
- /* Allow for a backwards frame shift */
- B -= 2;
- increment = 1;
- }
-
- for (a_index = 1; a_index <= M; a_index++) {
- /* XXX Why is this here? */
- score_array[2].best = MININT;
- score_array[2].best_gap = MININT;
- /* pick out the row of the score matrix
- appropriate for A[a_index] */
- if (!(gap_align->positionBased)) {
- matrix_row = matrix[ A[ a_index * increment ] ];
- }
- else {
- if(reversed)
- matrix_row = gap_align->sbp->posMatrix[M - a_index];
- else
- matrix_row = gap_align->sbp->posMatrix[a_index + query_offset];
- }
- /* initialize running-score variables */
- score = MININT;
- score_row1 = MININT;
- score_row2 = MININT;
- score_row3 = MININT;
- score_gap_col = MININT;
- score_col1 = MININT;
- score_col2 = MININT;
- score_col3 = MININT;
- score_other_frame1 = MININT;
- score_other_frame2 = MININT;
- last_b_index = first_b_index;
- b_index = first_b_index;
- while (b_index < b_size) {
- /* FRAME 0 */
- /* Pick the best score among all frames */
- score = MAX(score_other_frame1, score_other_frame2) - shift_penalty;
- score = MAX(score, score_col1) +
- matrix_row[ B[ b_index * increment ] ];
- score_other_frame1 = MAX(score_col1, score_array[b_index].best);
- score_col1 = score_array[b_index].best;
- score_gap_col = score_array[b_index].best_gap;
- /* Use the row and column scores if they improve
- the score overall */
- if (score < MAX(score_gap_col, score_row1)) {
- score = MAX(score_gap_col, score_row1);
- if (best_score - score > x_dropoff) {
- /* the current best score failed the X-dropoff
- criterion. Note that this does not stop the
- inner loop, only forces future iterations to
- skip this column of B.
-
- Also, if the very first letter of B that was
- tested failed the X dropoff criterion, make
- sure future inner loops start one letter to
- the right */
- if (first_b_index == b_index)
- first_b_index = b_index + 1;
- else
- score_array[b_index].best = MININT;
- }
- else {
- /* update the row and column running scores */
- last_b_index = b_index + 1;
- score_array[b_index].best = score;
- score_array[b_index].best_gap = score_gap_col - gap_extend;
- score_row1 -= gap_extend;
- }
- }
- else {
- if (best_score - score > x_dropoff) {
- /* the current best score failed the X-dropoff
- criterion. */
- if (first_b_index == b_index)
- first_b_index = b_index + 1;
- else
- score_array[b_index].best = MININT;
- }
- else {
- /* The current best score exceeds the
- row and column scores, and thus may
- improve on the current optimal score */
- last_b_index = b_index + 1;
- score_array[b_index].best = score;
- if (score > best_score) {
- best_score = score;
- *a_offset = a_index;
- *b_offset = b_index;
- }
- /* compute the best scores that include gaps
- or gap extensions */
- score -= gap_open_extend;
- score_row1 -= gap_extend;
- score_row1 = MAX(score, score_row1);
- score_array[b_index].best_gap = MAX(score,
- score_gap_col - gap_extend);
- }
- }
- /* If this was the last letter of B checked, rotate
- the row scores so that code beyond the inner loop
- works correctly */
- if (++b_index >= b_size) {
- score = score_row1;
- score_row1 = score_row2;
- score_row2 = score_row3;
- score_row3 = score;
- break;
- }
- /* FRAME 1 */
- /* This code, and that for Frame 2, are essentially the
- same as the preceeding code. The only real difference
- is the updating of the other_frame best scores */
- score = MAX(score_other_frame1, score_other_frame2) - shift_penalty;
- score = MAX(score, score_col2) +
- matrix_row[ B[ b_index * increment ] ];
- score_other_frame2 = MAX(score_col2, score_array[b_index].best);
- score_col2 = score_array[b_index].best;
- score_gap_col = score_array[b_index].best_gap;
- if (score < MAX(score_gap_col, score_row2)) {
- score = MAX(score_gap_col, score_row2);