blast_psi.c
上传用户:yhdzpy8989
上传日期:2007-06-13
资源大小:13604k
文件大小:11k
源码类别:

生物技术

开发平台:

C/C++

  1. /*
  2.  * ===========================================================================
  3.  * PRODUCTION $Log: blast_psi.c,v $
  4.  * PRODUCTION Revision 1000.0  2004/06/01 18:13:53  gouriano
  5.  * PRODUCTION PRODUCTION: IMPORTED [GCC34_MSVC7] Dev-tree R1.1
  6.  * PRODUCTION
  7.  * ===========================================================================
  8.  */
  9. static char const rcsid[] =
  10.     "$Id: blast_psi.c,v 1000.0 2004/06/01 18:13:53 gouriano Exp $";
  11. /* ===========================================================================
  12.  *
  13.  *                            PUBLIC DOMAIN NOTICE
  14.  *               National Center for Biotechnology Information
  15.  *
  16.  *  This software/database is a "United States Government Work" under the
  17.  *  terms of the United States Copyright Act.  It was written as part of
  18.  *  the author's offical duties as a United States Government employee and
  19.  *  thus cannot be copyrighted.  This software/database is freely available
  20.  *  to the public for use. The National Library of Medicine and the U.S.
  21.  *  Government have not placed any restriction on its use or reproduction.
  22.  *
  23.  *  Although all reasonable efforts have been taken to ensure the accuracy
  24.  *  and reliability of the software and data, the NLM and the U.S.
  25.  *  Government do not and cannot warrant the performance or results that
  26.  *  may be obtained by using this software or data. The NLM and the U.S.
  27.  *  Government disclaim all warranties, express or implied, including
  28.  *  warranties of performance, merchantability or fitness for any particular
  29.  *  purpose.
  30.  *
  31.  *  Please cite the author in any work or product based on this material.
  32.  *
  33.  * ===========================================================================
  34.  *
  35.  * Author:  Christiam Camacho
  36.  *
  37.  */
  38. /** @file blast_psi.c
  39.  * Implementation of the high level PSI-BLAST API
  40.  */
  41.     
  42. #include <algo/blast/core/blast_psi.h>
  43. #include <algo/blast/core/blast_stat.h>
  44. #include <algo/blast/core/blast_encoding.h>
  45. #include "blast_psi_priv.h"
  46. /* FIXME: document all local variables */
  47. /****************************************************************************/
  48. /* Use the following #define's to enable/disable functionality */
  49. /* Taking gaps into account when constructing a PSSM was introduced in the 
  50.  * 2001 paper "Improving the accuracy of PSI-BLAST protein database searches
  51.  * with composition based-statistics and other refinements". This feature 
  52.  * can be disabled by defining the PSI_IGNORE_GAPS_IN_COLUMNS symbol below */
  53. /* #define PSI_IGNORE_GAPS_IN_COLUMNS */
  54. /****************************************************************************/
  55. PsiMatrix*
  56. PSICreatePSSM(PsiAlignmentData* alignment,      /* [in] */
  57.               const PSIBlastOptions* options,   /* [in] */
  58.               BlastScoreBlk* sbp,               /* [in] */
  59.               PsiDiagnostics* diagnostics)      /* [out] */
  60. {
  61.     PsiMatrix* retval = NULL;
  62.     PsiAlignedBlock* aligned_block = NULL;
  63.     PsiSequenceWeights* seq_weights = NULL; 
  64.     aligned_block = _PSIAlignedBlockNew(alignment->dimensions->query_sz);
  65.     seq_weights = _PSISequenceWeightsNew(alignment->dimensions, sbp);
  66.     retval = PSIMatrixNew(alignment->dimensions->query_sz, sbp->alphabet_size);
  67.     PSIPurgeBiasedSegments(alignment);
  68.     PSIComputeAlignmentBlocks(alignment, aligned_block);
  69.     PSIComputeSequenceWeights(alignment, aligned_block, seq_weights);
  70.     PSIComputeResidueFrequencies(alignment, seq_weights, sbp, aligned_block,
  71.                                  options, retval);
  72.     PSIConvertResidueFreqsToPSSM(retval, alignment->query, sbp, 
  73.                                  seq_weights->std_prob);
  74.     PSIScaleMatrix(alignment->query, alignment->dimensions->query_sz, 
  75.                    seq_weights->std_prob, NULL, retval, sbp);
  76.     if (diagnostics) {
  77.         diagnostics = _PSISaveDiagnostics(alignment, aligned_block,
  78.                                           seq_weights);
  79.     } else {
  80.         /* FIXME: Deallocate structures selectively as some of these will be
  81.          * copied into the diagnostics structure */
  82.         _PSIAlignedBlockFree(aligned_block);
  83.         _PSISequenceWeightsFree(seq_weights);
  84.     }
  85.     return retval;
  86. }
  87. /****************************************************************************/
  88. /** Initializes the alignment data structure with the query sequence
  89.  * information. 
  90.  */
  91. static void
  92. PSIExtractQuerySequenceInfo(PsiAlignmentData* alignment);
  93. PsiAlignmentData*
  94. PSIAlignmentDataNew(const Uint1* query, const PsiInfo* info)
  95. {
  96.     PsiAlignmentData* retval = NULL;        /* the return value */
  97.     Uint4 s = 0;                            /* index in sequences */
  98.     Uint4 p = 0;                            /* index on positions */
  99.     if ( !query || !info ) {
  100.         return NULL;
  101.     }
  102.     if ( !(retval = (PsiAlignmentData*) calloc(1, sizeof(PsiAlignmentData)))) {
  103.          return NULL;
  104.     }
  105.     /* This doesn't need to be query_sz + 1 (posC) */
  106.     retval->res_counts = (Uint4**) _PSIAllocateMatrix(info->query_sz,
  107.                                                       PSI_ALPHABET_SIZE,
  108.                                                       sizeof(Uint4));
  109.     if ( !(retval->res_counts) ) {
  110.         return PSIAlignmentDataFree(retval);
  111.     }
  112.     retval->match_seqs = (Uint4*) calloc(info->query_sz, sizeof(int));
  113.     if ( !(retval->match_seqs)) {
  114.         return PSIAlignmentDataFree(retval);
  115.     }
  116.     retval->desc_matrix = (PsiDesc**) _PSIAllocateMatrix(info->num_seqs + 1,
  117.                                                          info->query_sz,
  118.                                                          sizeof(PsiDesc));
  119.     if (!retval->desc_matrix) {
  120.         return PSIAlignmentDataFree(retval);
  121.     }
  122.     for (s = 0; s < info->num_seqs + 1; s++) {
  123.         for (p = 0; p < info->query_sz; p++) {
  124.             retval->desc_matrix[s][p].letter = -1;
  125.             retval->desc_matrix[s][p].used = FALSE;
  126.             retval->desc_matrix[s][p].e_value = kDefaultEvalueForPosition;
  127.             retval->desc_matrix[s][p].extents.left = -1;
  128.             retval->desc_matrix[s][p].extents.right = info->query_sz;
  129.         }
  130.     }
  131.     retval->use_sequences = (Boolean*) calloc(info->num_seqs + 1, 
  132.                                               sizeof(Boolean));
  133.     if (!retval->use_sequences) {
  134.         return PSIAlignmentDataFree(retval);
  135.     }
  136.     /* All sequences are valid candidates for taking part in 
  137.        PSSM construction */
  138.     for (s = 0; s < info->num_seqs + 1; s++) {
  139.         retval->use_sequences[s] = TRUE;
  140.     }
  141.     if ( !(retval->dimensions = (PsiInfo*) calloc(1, sizeof(PsiInfo)))) {
  142.         return PSIAlignmentDataFree(retval);
  143.     }
  144.     memcpy((void*) retval->dimensions, (void*) info, sizeof(*info));
  145.     retval->query = (Uint1*) malloc(info->query_sz * sizeof(Uint1));
  146.     if ( !retval->query ) {
  147.         return PSIAlignmentDataFree(retval);
  148.     }
  149.     memcpy((void*) retval->query, (void*) query, info->query_sz);
  150.     PSIExtractQuerySequenceInfo(retval);
  151.     return retval;
  152. }
  153. PsiAlignmentData*
  154. PSIAlignmentDataFree(PsiAlignmentData* alignment)
  155. {
  156.     if ( !alignment ) {
  157.         return NULL;
  158.     }
  159.     if (alignment->res_counts) {
  160.         _PSIDeallocateMatrix((void**) alignment->res_counts,
  161.                              alignment->dimensions->query_sz);
  162.         alignment->res_counts = NULL;
  163.     }
  164.     if (alignment->match_seqs) {
  165.         sfree(alignment->match_seqs);
  166.     }
  167.     if (alignment->desc_matrix) {
  168.         _PSIDeallocateMatrix((void**) alignment->desc_matrix,
  169.                              alignment->dimensions->num_seqs + 1);
  170.         alignment->desc_matrix = NULL;
  171.     }
  172.     if (alignment->use_sequences) {
  173.         sfree(alignment->use_sequences);
  174.     }
  175.     if (alignment->dimensions) {
  176.         sfree(alignment->dimensions);
  177.     }
  178.     if (alignment->query) {
  179.         sfree(alignment->query);
  180.     }
  181.     sfree(alignment);
  182.     return NULL;
  183. }
  184. PsiMatrix*
  185. PSIMatrixNew(Uint4 query_sz, Uint4 alphabet_size)
  186. {
  187.     PsiMatrix* retval = NULL;
  188.     if ( !(retval = (PsiMatrix*) calloc(1, sizeof(PsiMatrix)))) {
  189.         return NULL;
  190.     }
  191.     retval->ncols = query_sz + 1;
  192.     retval->pssm = (int**) _PSIAllocateMatrix(query_sz + 1, alphabet_size,
  193.                                               sizeof(int));
  194.     if ( !(retval->pssm) ) {
  195.         return PSIMatrixFree(retval);
  196.     }
  197.     retval->scaled_pssm = (int**) _PSIAllocateMatrix(query_sz + 1, 
  198.                                                      alphabet_size,
  199.                                                      sizeof(int));
  200.     if ( !(retval->scaled_pssm) ) {
  201.         return PSIMatrixFree(retval);
  202.     }
  203.     retval->res_freqs = (double**) _PSIAllocateMatrix(query_sz + 1, 
  204.                                                       alphabet_size, 
  205.                                                       sizeof(double));
  206.     if ( !(retval->res_freqs) ) {
  207.         return PSIMatrixFree(retval);
  208.     }
  209.     return retval;
  210. }
  211. PsiMatrix*
  212. PSIMatrixFree(PsiMatrix* matrix)
  213. {
  214.     if ( !matrix ) {
  215.         return NULL;
  216.     }
  217.     if (matrix->pssm) {
  218.         _PSIDeallocateMatrix((void**) matrix->pssm, matrix->ncols);
  219.     }
  220.     if (matrix->scaled_pssm) {
  221.         _PSIDeallocateMatrix((void**) matrix->scaled_pssm, matrix->ncols);
  222.     }
  223.     if (matrix->res_freqs) {
  224.         _PSIDeallocateMatrix((void**) matrix->res_freqs, matrix->ncols);
  225.     }
  226.     sfree(matrix);
  227.     return NULL;
  228. }
  229. PsiDiagnostics*
  230. PSIDiagnosticsNew(Uint4 query_sz, Uint4 alphabet_size)
  231. {
  232.     PsiDiagnostics* retval = NULL;
  233.     if ( !(retval = (PsiDiagnostics*) calloc(1, sizeof(PsiDiagnostics)))) {
  234.         return NULL;
  235.     }
  236.     retval->info_content = (double**) _PSIAllocateMatrix(query_sz,
  237.                                                          alphabet_size,
  238.                                                          sizeof(double));
  239.     if ( !(retval->info_content) ) {
  240.         return PSIDiagnosticsFree(retval);
  241.     }
  242.     retval->ncols = query_sz;
  243.     return retval;
  244. }
  245. PsiDiagnostics*
  246. PSIDiagnosticsFree(PsiDiagnostics* diags)
  247. {
  248.     if ( !diags )
  249.         return NULL;
  250.     if (diags->info_content) {
  251.         _PSIDeallocateMatrix((void**) diags->info_content, diags->ncols);
  252.     }
  253.     sfree(diags);
  254.     return NULL;
  255. }
  256. /****************************************************************************/
  257. /* Auxiliary functions to populate PsiAlignmentData structure */
  258. static void
  259. PSIExtractQuerySequenceInfo(PsiAlignmentData* alignment)
  260. {
  261.     Uint4 i = 0;
  262.     ASSERT(alignment);
  263.     for (i = 0; i < alignment->dimensions->query_sz; i++) {
  264.         alignment->desc_matrix[kQueryIndex][i].letter = alignment->query[i];
  265.         alignment->desc_matrix[kQueryIndex][i].used = TRUE;
  266.         alignment->desc_matrix[kQueryIndex][i].e_value = 
  267.             PSI_INCLUSION_ETHRESH / 2;
  268.         alignment->desc_matrix[kQueryIndex][i].extents.left = 0;
  269.         alignment->desc_matrix[kQueryIndex][i].extents.right =
  270.             alignment->dimensions->query_sz;
  271.         alignment->res_counts[i][alignment->query[i]]++;
  272.         alignment->match_seqs[i]++;
  273.     }
  274. }