scoremat.asn
上传用户:yhdzpy8989
上传日期:2007-06-13
资源大小:13604k
文件大小:8k
- --$Id: scoremat.asn,v 1000.1 2004/04/12 17:26:09 gouriano Exp $
- -- ===========================================================================
- --
- -- PUBLIC DOMAIN NOTICE
- -- National Center for Biotechnology Information
- --
- -- This software/database is a "United States Government Work" under the
- -- terms of the United States Copyright Act. It was written as part of
- -- the author's official duties as a United States Government employee and
- -- thus cannot be copyrighted. This software/database is freely available
- -- to the public for use. The National Library of Medicine and the U.S.
- -- Government have not placed any restriction on its use or reproduction.
- --
- -- Although all reasonable efforts have been taken to ensure the accuracy
- -- and reliability of the software and data, the NLM and the U.S.
- -- Government do not and cannot warrant the performance or results that
- -- may be obtained by using this software or data. The NLM and the U.S.
- -- Government disclaim all warranties, express or implied, including
- -- warranties of performance, merchantability or fitness for any particular
- -- purpose.
- --
- -- Please cite the author in any work or product based on this material.
- --
- -- ===========================================================================
- --
- -- Author: Christiam Camacho
- --
- -- File Description:
- -- ASN.1 definitions for scoring matrix
- --
- -- ===========================================================================
- -- $Log: scoremat.asn,v $
- -- Revision 1000.1 2004/04/12 17:26:09 gouriano
- -- PRODUCTION: UPGRADED [CATCHUP_003] Dev-tree R1.7
- --
- -- Revision 1.7 2004/02/23 15:42:24 camacho
- -- 1. Gave default value to Score-matrix::byrow field
- -- 2. Updated documentation
- --
- -- Revision 1.6 2003/08/25 19:03:28 bauer
- -- added raw frequencies and weights as optional score-matrix components
- --
- -- Revision 1.5 2003/06/16 12:19:56 madden
- -- Do not use Blast4-ka-block, other changes worked out with structure group
- --
- -- Revision 1.2 2002/12/03 14:01:45 camacho
- -- Data type changes
- --
- -- Revision 1.1 2002/12/02 22:15:27 camacho
- -- Initial revision
- --
- -- ===========================================================================
- NCBI-ScoreMat DEFINITIONS ::= BEGIN
- EXPORTS Score-matrix, Score-matrix-parameters;
-
- IMPORTS Object-id FROM NCBI-General
- Seq-entry FROM NCBI-Seqset;
- -- a rudimentary block/core-model, to be used with block-based alignment routines
- -- and threading
- BlockProperty ::= SEQUENCE {
- type INTEGER { unassigned (0),
- threshold (1), -- score threshold for heuristics
- minscore (2), -- observed minimum score in CD
- maxscore (3), -- observed maximum score in CD
- meanscore (4), -- observed mean score in CD
- variance (5), -- observed score variance
- name (10), -- just name the block
- is-optional(20), -- block may not have to be used
- other (255) },
- intvalue INTEGER OPTIONAL,
- textvalue VisibleString OPTIONAL
- }
- CoreBlock ::= SEQUENCE {
- start INTEGER, -- begin of block on query
- stop INTEGER, -- end of block on query
- minstart INTEGER OPTIONAL, -- optional N-terminal extension
- maxstop INTEGER OPTIONAL, -- optional C-terminal extension
- property SEQUENCE OF BlockProperty OPTIONAL
- }
- LoopConstraint ::= SEQUENCE {
- minlength INTEGER DEFAULT 0, -- minimum length of unaligned region
- maxlength INTEGER DEFAULT 100000 -- maximum length of unaligned region
- }
- CoreDef ::= SEQUENCE {
- nblocks INTEGER, -- number of core elements/blocks
- blocks SEQUENCE OF CoreBlock, -- nblocks locations
- loops SEQUENCE OF LoopConstraint -- (nblocks+1) constraints
- }
- -- Scoring matrix that allows to store symmetric and non-symmetric matrices
- -- (PSSMs).
- --
- -- Column indices on the PSSM refer to the positions corresponding to the
- -- query/master sequence, i.e. the number of columns (N) is the same
- -- as the length of the query/master sequence.
- -- Row indices refer to individual amino acid types, i.e. the number of
- -- rows (M) is the same as the number of different residues in the
- -- alphabet we use. Consequently, row labels are amino acid identifiers.
- --
- -- PSSMs are stored as linear arrays of integers. By default, we store
- -- them column-by-column, M values for the first column followed by M
- -- values for the second column, and so on. In order to provide
- -- flexibility for external applications, the boolean field "byrow" is
- -- provided to specify the storage order.
- Score-matrix ::= SEQUENCE {
- is-protein BOOLEAN, -- Is the matrix for proteins
- identifier Object-id, -- name of matrix (e.g., "blosum62") or number
- comments SEQUENCE OF VisibleString OPTIONAL, -- comments on matrix.
- -- The dimensions of the matrix are returned so the client can
- -- verify that all data was received. Both dimensions are returned for
- -- non-symmetric matrices (psi-blast uses these).
- nrows INTEGER, -- number of rows
- ncolumns INTEGER, -- number of columns
- -- row-labels is given to note the order of residue types so that it can
- -- be cross-checked between applications.
- -- If this field is not given, the matrix values are presented in
- -- order of the alphabet ncbistdaa is used for protein, ncbi4na for nucl.
- -- for proteins the values returned correspond to
- -- (-,-), (-,A), (-,B), (-,C) ... (A,-), (A,A), (A,B), (A,C) ...
- row-labels SEQUENCE OF VisibleString OPTIONAL,
- -- values of scoring matrix. Non-symmetric matrices are stored as described
- -- by the byrow field
- scores SEQUENCE OF INTEGER OPTIONAL,
- -- are matrices stored row by row?
- byrow BOOLEAN DEFAULT FALSE,
- -- pseudocount constant used for PSSM
- pseudocounts INTEGER OPTIONAL,
- -- PSSM representative sequence (master)
- query Seq-entry OPTIONAL,
- -- frequencies observed in alignment
- rawFreqs SEQUENCE OF INTEGER OPTIONAL,
- -- residue frequencies
- posFreqs SEQUENCE OF INTEGER OPTIONAL,
- -- sequence weights for individual cells
- weights SEQUENCE OF INTEGER OPTIONAL,
- -- frequencies used in pseudocount method
- freq-Ratios INTEGER { unassigned (0),
- blosum62 (1),
- blosum45 (2),
- blosum80 (3),
- blosum50 (4),
- blosum90 (5),
- pam30 (6),
- pam70 (7),
- pam250 (8),
- other (255)
- } OPTIONAL,
- -- scaling factor for the scores
- score-scale-factor INTEGER OPTIONAL,
- -- scaling factor for the frequencies
- posFreqs-scale-factor INTEGER OPTIONAL,
- -- scaling factor for frequencies
- rawFreqs-scale-factor INTEGER OPTIONAL,
- -- and their scaling factor
- weights-scale-factor INTEGER OPTIONAL,
- -- scaled by score-scale-factor (above)
- gapOpen INTEGER OPTIONAL,
- -- scaled by score-scale-factor (above)
- gapExtend INTEGER OPTIONAL
- }
- -- Envelope containing matrix or pssm, Karlin-Altschul parameters, and block constraints.
- Score-matrix-parameters ::= SEQUENCE {
- matrix Score-matrix,
- lambda REAL OPTIONAL,
- kappa REAL OPTIONAL,
- h REAL OPTIONAL,
- constraints CoreDef OPTIONAL
- }
- END