all.asn
上传用户:yhdzpy8989
上传日期:2007-06-13
资源大小:13604k
文件大小:140k
- enum Num-enum , -- enumerated names for residues
- ref Num-ref , -- by reference to another sequence
- real Num-real } -- supports mapping to a float system
-
- Num-cont ::= SEQUENCE { -- continuous display numbering system
- refnum INTEGER DEFAULT 1, -- number assigned to first residue
- has-zero BOOLEAN DEFAULT FALSE , -- 0 used?
- ascending BOOLEAN DEFAULT TRUE } -- ascending numbers?
- Num-enum ::= SEQUENCE { -- any tags to residues
- num INTEGER , -- number of tags to follow
- names SEQUENCE OF VisibleString } -- the tags
- Num-ref ::= SEQUENCE { -- by reference to other sequences
- type ENUMERATED { -- type of reference
- not-set (0) ,
- sources (1) , -- by segmented or const seq sources
- aligns (2) } , -- by alignments given below
- aligns Seq-align OPTIONAL }
- Num-real ::= SEQUENCE { -- mapping to floating point system
- a REAL , -- from an integer system used by Bioseq
- b REAL , -- position = (a * int_position) + b
- units VisibleString OPTIONAL }
- Pubdesc ::= SEQUENCE { -- how sequence presented in pub
- pub Pub-equiv , -- the citation(s)
- name VisibleString OPTIONAL , -- name used in paper
- fig VisibleString OPTIONAL , -- figure in paper
- num Numbering OPTIONAL , -- numbering from paper
- numexc BOOLEAN OPTIONAL , -- numbering problem with paper
- poly-a BOOLEAN OPTIONAL , -- poly A tail indicated in figure?
- maploc VisibleString OPTIONAL , -- map location reported in paper
- seq-raw StringStore OPTIONAL , -- original sequence from paper
- align-group INTEGER OPTIONAL , -- this seq aligned with others in paper
- comment VisibleString OPTIONAL, -- any comment on this pub in context
- reftype INTEGER { -- type of reference in a GenBank record
- seq (0) , -- refers to sequence
- sites (1) , -- refers to unspecified features
- feats (2) , -- refers to specified features
- no-target (3) } -- nothing specified (EMBL)
- DEFAULT seq }
- Heterogen ::= VisibleString -- cofactor, prosthetic group, inibitor, etc
- --*** Instances of sequences *******************************
- --*
- Seq-inst ::= SEQUENCE { -- the sequence data itself
- repr ENUMERATED { -- representation class
- not-set (0) , -- empty
- virtual (1) , -- no seq data
- raw (2) , -- continuous sequence
- seg (3) , -- segmented sequence
- const (4) , -- constructed sequence
- ref (5) , -- reference to another sequence
- consen (6) , -- consensus sequence or pattern
- map (7) , -- ordered map of any kind
- delta (8) , -- sequence made by changes (delta) to others
- other (255) } ,
- mol ENUMERATED { -- molecule class in living organism
- not-set (0) , -- > cdna = rna
- dna (1) ,
- rna (2) ,
- aa (3) ,
- na (4) , -- just a nucleic acid
- other (255) } ,
- length INTEGER OPTIONAL , -- length of sequence in residues
- fuzz Int-fuzz OPTIONAL , -- length uncertainty
- topology ENUMERATED { -- topology of molecule
- not-set (0) ,
- linear (1) ,
- circular (2) ,
- tandem (3) , -- some part of tandem repeat
- other (255) } DEFAULT linear ,
- strand ENUMERATED { -- strandedness in living organism
- not-set (0) ,
- ss (1) , -- single strand
- ds (2) , -- double strand
- mixed (3) ,
- other (255) } OPTIONAL , -- default ds for DNA, ss for RNA, pept
- seq-data Seq-data OPTIONAL , -- the sequence
- ext Seq-ext OPTIONAL , -- extensions for special types
- hist Seq-hist OPTIONAL } -- sequence history
- --*** Sequence Extensions **********************************
- --* for representing more complex types
- --* const type uses Seq-hist.assembly
- Seq-ext ::= CHOICE {
- seg Seg-ext , -- segmented sequences
- ref Ref-ext , -- hot link to another sequence (a view)
- map Map-ext , -- ordered map of markers
- delta Delta-ext }
- Seg-ext ::= SEQUENCE OF Seq-loc
- Ref-ext ::= Seq-loc
- Map-ext ::= SEQUENCE OF Seq-feat
- Delta-ext ::= SEQUENCE OF Delta-seq
- Delta-seq ::= CHOICE {
- loc Seq-loc , -- point to a sequence
- literal Seq-literal } -- a piece of sequence
- Seq-literal ::= SEQUENCE {
- length INTEGER , -- must give a length in residues
- fuzz Int-fuzz OPTIONAL , -- could be unsure
- seq-data Seq-data OPTIONAL } -- may have the data
- --*** Sequence History Record ***********************************
- --** assembly = records how seq was assembled from others
- --** replaces = records sequences made obsolete by this one
- --** replaced-by = this seq is made obsolete by another(s)
- Seq-hist ::= SEQUENCE {
- assembly SET OF Seq-align OPTIONAL ,-- how was this assembled?
- replaces Seq-hist-rec OPTIONAL , -- seq makes these seqs obsolete
- replaced-by Seq-hist-rec OPTIONAL , -- these seqs make this one obsolete
- deleted CHOICE {
- bool BOOLEAN ,
- date Date } OPTIONAL }
- Seq-hist-rec ::= SEQUENCE {
- date Date OPTIONAL ,
- ids SET OF Seq-id }
-
- --*** Various internal sequence representations ************
- --* all are controlled, fixed length forms
- Seq-data ::= CHOICE { -- sequence representations
- iupacna IUPACna , -- IUPAC 1 letter nuc acid code
- iupacaa IUPACaa , -- IUPAC 1 letter amino acid code
- ncbi2na NCBI2na , -- 2 bit nucleic acid code
- ncbi4na NCBI4na , -- 4 bit nucleic acid code
- ncbi8na NCBI8na , -- 8 bit extended nucleic acid code
- ncbipna NCBIpna , -- nucleic acid probabilities
- ncbi8aa NCBI8aa , -- 8 bit extended amino acid codes
- ncbieaa NCBIeaa , -- extended ASCII 1 letter aa codes
- ncbipaa NCBIpaa , -- amino acid probabilities
- ncbistdaa NCBIstdaa } -- consecutive codes for std aas
- IUPACna ::= StringStore -- IUPAC 1 letter codes, no spaces
- IUPACaa ::= StringStore -- IUPAC 1 letter codes, no spaces
- NCBI2na ::= OCTET STRING -- 00=A, 01=C, 10=G, 11=T
- NCBI4na ::= OCTET STRING -- 1 bit each for agct
- -- 0001=A, 0010=C, 0100=G, 1000=T/U
- -- 0101=Purine, 1010=Pyrimidine, etc
- NCBI8na ::= OCTET STRING -- for modified nucleic acids
- NCBIpna ::= OCTET STRING -- 5 octets/base, prob for a,c,g,t,n
- -- probabilities are coded 0-255 = 0.0-1.0
- NCBI8aa ::= OCTET STRING -- for modified amino acids
- NCBIeaa ::= StringStore -- ASCII extended 1 letter aa codes
- -- IUPAC codes + U=selenocysteine
- NCBIpaa ::= OCTET STRING -- 25 octets/aa, prob for IUPAC aas in order:
- -- A-Y,B,Z,X,(ter),anything
- -- probabilities are coded 0-255 = 0.0-1.0
- NCBIstdaa ::= OCTET STRING -- codes 0-25, 1 per byte
- --*** Sequence Annotation *************************************
- --*
- Annot-id ::= CHOICE {
- local Object-id ,
- ncbi INTEGER ,
- general Dbtag }
-
- Annot-descr ::= SET OF Annotdesc
- Annotdesc ::= CHOICE {
- name VisibleString , -- a short name for this collection
- title VisibleString , -- a title for this collection
- comment VisibleString , -- a more extensive comment
- pub Pubdesc , -- a reference to the publication
- user User-object , -- user defined object
- create-date Date , -- date entry first created/released
- update-date Date , -- date of last update
- src Seq-id , -- source sequence from which annot came
- align Align-def, -- definition of the SeqAligns
- region Seq-loc } -- all contents cover this region
- Align-def ::= SEQUENCE {
- align-type INTEGER { -- class of align Seq-annot
- ref (1) , -- set of alignments to the same sequence
- alt (2) , -- set of alternate alignments of the same seqs
- blocks (3) , -- set of aligned blocks in the same seqs
- other (255) } ,
- ids SET OF Seq-id OPTIONAL } -- used for the one ref seqid for now
- Seq-annot ::= SEQUENCE {
- id SET OF Annot-id OPTIONAL ,
- db INTEGER { -- source of annotation
- genbank (1) ,
- embl (2) ,
- ddbj (3) ,
- pir (4) ,
- sp (5) ,
- bbone (6) ,
- pdb (7) ,
- other (255) } OPTIONAL ,
- name VisibleString OPTIONAL ,-- source if "other" above
- desc Annot-descr OPTIONAL , -- used only for stand alone Seq-annots
- data CHOICE {
- ftable SET OF Seq-feat ,
- align SET OF Seq-align ,
- graph SET OF Seq-graph ,
- ids SET OF Seq-id , -- used for communication between tools
- locs SET OF Seq-loc } } -- used for communication between tools
- END
- --$Revision: 1000.1 $
- --********************************************************************
- --
- -- Direct Submission of Sequence Data
- -- James Ostell, 1991
- --
- -- This is a trial specification for direct submission of sequence
- -- data worked out between NCBI and EMBL
- -- Later revised to reflect work with GenBank and Integrated database
- --
- -- Version 3.0, 1994
- -- This is the official NCBI sequence submission format now.
- --
- --********************************************************************
- NCBI-Submit DEFINITIONS ::=
- BEGIN
- EXPORTS Seq-submit, Contact-info;
- IMPORTS Cit-sub, Author FROM NCBI-Biblio
- Date, Object-id FROM NCBI-General
- Seq-annot FROM NCBI-Sequence
- Seq-id FROM NCBI-Seqloc
- Seq-entry FROM NCBI-Seqset;
- Seq-submit ::= SEQUENCE {
- sub Submit-block ,
- data CHOICE {
- entrys SET OF Seq-entry , -- sequence(s)
- annots SET OF Seq-annot , -- annotation(s)
- delete SET OF Seq-id } } -- deletions of entries
- Submit-block ::= SEQUENCE {
- contact Contact-info , -- who to contact
- cit Cit-sub , -- citation for this submission
- hup BOOLEAN DEFAULT FALSE , -- hold until publish
- reldate Date OPTIONAL , -- release by date
- subtype INTEGER { -- type of submission
- new (1) , -- new data
- update (2) , -- update by author
- revision (3) , -- 3rd party (non-author) update
- other (255) } OPTIONAL ,
- tool VisibleString OPTIONAL, -- tool used to make submission
- user-tag VisibleString OPTIONAL, -- user supplied id for this submission
- comment VisibleString OPTIONAL } -- user comments/advice to database
- Contact-info ::= SEQUENCE { -- who to contact to discuss the submission
- name VisibleString OPTIONAL , -- OBSOLETE: will be removed
- address SEQUENCE OF VisibleString OPTIONAL ,
- phone VisibleString OPTIONAL ,
- fax VisibleString OPTIONAL ,
- email VisibleString OPTIONAL ,
- telex VisibleString OPTIONAL ,
- owner-id Object-id OPTIONAL , -- for owner accounts
- password OCTET STRING OPTIONAL ,
- last-name VisibleString OPTIONAL , -- structured to replace name above
- first-name VisibleString OPTIONAL ,
- middle-initial VisibleString OPTIONAL ,
- contact Author OPTIONAL } -- WARNING: this will replace the above
- END
- --$Revision: 1000.1 $
- --****************************************************************
- --
- -- NCBI Project Definition Module
- -- by Jim Ostell and Jonathan Kans, 1998
- --
- --****************************************************************
- NCBI-Project DEFINITIONS ::=
- BEGIN
- EXPORTS Project, Project-item;
- IMPORTS Date FROM NCBI-General
- PubMedId FROM NCBI-Biblio
- Seq-id, Seq-loc FROM NCBI-Seqloc
- Seq-annot, Pubdesc FROM NCBI-Sequence
- Seq-entry FROM NCBI-Seqset
- Pubmed-entry FROM NCBI-PubMed;
- Project ::= SEQUENCE {
- descr Project-descr OPTIONAL ,
- data Project-item }
- Project-item ::= CHOICE {
- pmuid SET OF INTEGER ,
- protuid SET OF INTEGER ,
- nucuid SET OF INTEGER ,
- sequid SET OF INTEGER ,
- genomeuid SET OF INTEGER ,
- structuid SET OF INTEGER ,
- pmid SET OF PubMedId ,
- protid SET OF Seq-id ,
- nucid SET OF Seq-id ,
- seqid SET OF Seq-id ,
- genomeid SET OF Seq-id ,
- structid NULL ,
- pment SET OF Pubmed-entry ,
- protent SET OF Seq-entry ,
- nucent SET OF Seq-entry ,
- seqent SET OF Seq-entry ,
- genomeent SET OF Seq-entry ,
- structent NULL ,
- seqannot SET OF Seq-annot ,
- loc SET OF Seq-loc ,
- proj SET OF Project
- }
- Project-descr ::= SEQUENCE {
- id SET OF Project-id ,
- name VisibleString OPTIONAL ,
- descr SET OF Projdesc OPTIONAL }
- Projdesc ::= CHOICE {
- pub Pubdesc ,
- date Date ,
- comment VisibleString ,
- title VisibleString
- }
- Project-id ::= VisibleString
- END
- --$Revision: 1000.1 $
- --**********************************************************************
- --
- -- Biological Macromolecule 3-D Structure Data Types for MMDB,
- -- A Molecular Modeling Database
- --
- -- Definitions for a biomolecular assembly and the MMDB database
- --
- -- By Hitomi Ohkawa, Jim Ostell, Chris Hogue, and Steve Bryant
- --
- -- National Center for Biotechnology Information
- -- National Institutes of Health
- -- Bethesda, MD 20894 USA
- --
- -- July 1995
- --
- --**********************************************************************
- -- Contents of the MMDB database are currently based on files distributed by
- -- the Protein Data Bank, PDB. These data are changed in form, as described
- -- in this specification. To some extent they are also changed in content, in
- -- that many data items implicit in PDB are made explicit, and others are
- -- corrected or omitted as a consequence of validation checks. The semantics
- -- of MMDB data items are indicated by comments within the specification below.
- -- These comments explain in detail the manner in which data items from PDB
- -- have been mapped into MMDB.
- MMDB DEFINITIONS ::=
- BEGIN
- EXPORTS Biostruc, Biostruc-id, Biostruc-set, Biostruc-annot-set,
- Biostruc-residue-graph-set;
- IMPORTS Biostruc-graph, Biomol-descr, Residue-graph FROM MMDB-Chemical-graph
- Biostruc-model FROM MMDB-Structural-model
- Biostruc-feature-set FROM MMDB-Features
- Pub FROM NCBI-Pub
- Date, Object-id, Dbtag FROM NCBI-General;
- -- A structure report or "biostruc" describes the components of a biomolecular
- -- assembly in terms of their names and descriptions, and a chemical graph
- -- giving atomic formula, connectivity and chirality. It also gives one or more
- -- three-dimensional model structures, literally a mapping of the atoms,
- -- residues and/or molecules of each component into a measured three-
- -- dimensional space. Structure may also be described by named features, which
- -- associate nodes in the chemical graph, or regions in space, with text or
- -- numeric descriptors.
- -- Note that a biostruc may also contain cross references to other databases,
- -- including citations to relevant scientific literature. These cross
- -- references use object types from other NCBI data specifications, which are
- -- "imported" into MMDB, and not repeated in this specification.
- Biostruc ::= SEQUENCE {
- id SEQUENCE OF Biostruc-id,
- descr SEQUENCE OF Biostruc-descr OPTIONAL,
- chemical-graph Biostruc-graph,
- features SEQUENCE OF Biostruc-feature-set OPTIONAL,
- model SEQUENCE OF Biostruc-model OPTIONAL }
- -- A Biostruc-id is a collection identifiers for the molecular assembly.
- -- Mmdb-id's are NCBI-assigned, and are intended to be unique and stable
- -- identifiers. Other-id's are synonyms.
- Biostruc-id ::= CHOICE {
- mmdb-id Mmdb-id,
- other-database Dbtag,
- local-id Object-id }
- Mmdb-id ::= INTEGER
- -- The description of a biostruc refers to both the reported chemical and
- -- spatial structure of a biomolecular assembly. PDB-derived descriptors
- -- which refer specifically to the chemical components or spatial structure
- -- are not provided here, but instead as descriptors of the biostruc-graph or
- -- biostruc-model. For PDB-derived structures the biostruc name is the PDB
- -- id-code. PDB-derived citations appear as publications within the biostruc
- -- description, and include a data-submission citation derived from PDB AUTHOR
- -- records. Citations are described using the NCBI Pub specification.
- Biostruc-descr ::= CHOICE {
- name VisibleString,
- pdb-comment VisibleString,
- other-comment VisibleString,
- history Biostruc-history,
- attribution Pub }
- -- The history of a biostruc indicates it's origin and it's update history
- -- within MMDB, the NCBI-maintained molecular structure database.
- Biostruc-history ::= SEQUENCE {
- replaces Biostruc-replace OPTIONAL,
- replaced-by Biostruc-replace OPTIONAL,
- data-source Biostruc-source OPTIONAL }
- Biostruc-replace ::= SEQUENCE {
- id Biostruc-id,
- date Date }
- -- The origin of a biostruc is a reference to another database. PDB release
- -- date and PDB-assigned id codes are recorded here, as are the PDB-assigned
- -- entry date and replacement history.
- Biostruc-source ::= SEQUENCE {
- name-of-database VisibleString,
- version-of-database CHOICE {
- release-date Date,
- release-code VisibleString } OPTIONAL,
- database-entry-id Biostruc-id,
- database-entry-date Date,
- database-entry-history SEQUENCE OF VisibleString OPTIONAL}
- -- A biostruc set is a means to collect ASN.1 data for many biostrucs in
- -- one file, as convenient for application programs. The object type is not
- -- inteded to imply similarity of the biostrucs grouped together.
- Biostruc-set ::= SEQUENCE {
- id SEQUENCE OF Biostruc-id OPTIONAL,
- descr SEQUENCE OF Biostruc-descr OPTIONAL,
- biostrucs SEQUENCE OF Biostruc }
- -- A biostruc annotation set is a means to collect ASN.1 data for biostruc
- -- features into one file. The object type is intended as a means to store
- -- feature annotation of similar type, such as "core" definitions for a
- -- threading program, or structure-structure alignments for a structure-
- -- similarity browser.
- Biostruc-annot-set ::= SEQUENCE {
- id SEQUENCE OF Biostruc-id OPTIONAL,
- descr SEQUENCE OF Biostruc-descr OPTIONAL,
- features SEQUENCE OF Biostruc-feature-set }
- -- A biostruc residue graph set is a collection of residue graphs. The object
- -- type is intended as a means to record dictionaries containing the chemical
- -- subgraphs of "standard" residue types, which are used as a means to
- -- simplify discription of the covalent structure of a biomolecular assembly.
- -- The standard residue graph dictionary supplied with the MMDB database
- -- contains 20 standard L amino acids and 8 standard ribonucleotide groups.
- -- These graphs are complete, including explicit hydrogen atoms and separate
- -- instances for the terminal polypeptide and polynucleotide residues.
- Biostruc-residue-graph-set ::= SEQUENCE {
- id SEQUENCE OF Biostruc-id OPTIONAL,
- descr SEQUENCE OF Biomol-descr OPTIONAL,
- residue-graphs SEQUENCE OF Residue-graph }
- END
- --**********************************************************************
- --
- -- Biological Macromolecule 3-D Structure Data Types for MMDB,
- -- A Molecular Modeling Database
- --
- -- Definitions for a chemical graph
- --
- -- By Hitomi Ohkawa, Jim Ostell, Chris Hogue and Steve Bryant
- --
- -- National Center for Biotechnology Information
- -- National Institutes of Health
- -- Bethesda, MD 20894 USA
- --
- -- July, 1995
- --
- --**********************************************************************
- MMDB-Chemical-graph DEFINITIONS ::=
- BEGIN
- EXPORTS Biostruc-graph, Biomol-descr, Residue-graph,
- Molecule-id, Residue-id, Atom-id;
- IMPORTS Pub FROM NCBI-Pub
- BioSource FROM NCBI-BioSource
- Seq-id FROM NCBI-Seqloc
- Biostruc-id FROM MMDB;
- -- A biostruc graph contains the complete chemical graph of the biomolecular
- -- assembly. The assembly graph is defined hierarchically, in terms of
- -- subgraphs graphs of component molecules. For PDB-derived biostrucs,
- -- the molecules forming the assembly are the individual biopolymer chains and
- -- any non-polymer or "heterogen" groups which are present.
- -- The PDB-derived "compound name" field appears as the name within the
- -- biostruc-graph description. PDB "class" and "source" fields appear as
- -- explicit attributes. PDB-derived structures are assigned an assembly type
- -- of "other" unless they have been further classified as the "physiological
- -- form" or "crystallographic cell" contents. If they have, the source of the
- -- type classification appears as a citation within the assembly description.
- -- Note that the biostruc-graph also includes as literals the subgraphs of
- -- any nonstandard residues present within it. For PDB-derived biostrucs these
- -- subgraphs are constructed automatically, with validation as described below.
- Biostruc-graph ::= SEQUENCE {
- descr SEQUENCE OF Biomol-descr OPTIONAL,
- molecule-graphs SEQUENCE OF Molecule-graph,
- inter-molecule-bonds SEQUENCE OF Inter-residue-bond OPTIONAL,
- residue-graphs SEQUENCE OF Residue-graph OPTIONAL }
- -- A biomolecule description refers to the chemical structure of a molecule or
- -- component substructures. This descriptor type is used at the level of
- -- assemblies, molecules and residues, and also for residue-graph dictionaries.
- -- The BioSource object type is drawn from NCBI taxonomy data specifications,
- -- and is not repeated here.
- Biomol-descr ::= CHOICE {
- name VisibleString,
- pdb-class VisibleString,
- pdb-source VisibleString,
- pdb-comment VisibleString,
- other-comment VisibleString,
- organism BioSource,
- attribution Pub,
- assembly-type INTEGER { physiological-form(1),
- crystallographic-cell(2),
- other(255) },
- molecule-type INTEGER { dna(1),
- rna(2),
- protein(3),
- other-biopolymer(4),
- solvent(5),
- other-nonpolymer(6),
- other(255) } }
- -- A molecule chemical graph is defined by a sequence of residues. Nonpolymers
- -- are described in the same way, but may contain only a single residue.
- -- Biopolymer molecules are identified within PDB entries according to their
- -- appearance on SEQRES records, which formally define a biopolymer as such.
- -- Biopolymers are defined by the distinction between ATOM and HETATM
- -- coordinate records only in cases where the chemical sequence from SEQRES
- -- is in conflict with coordinate data. The PDB-assigned chain code appears as
- -- the name within the molecule descriptions of the biopolymers.
- -- Nonpolymer molecules from PDB correspond to individual HETEROGEN groups,
- -- excluding any HETEROGEN groups which represent modified biopolymer residues.
- -- These molecules are named according to the chain, residue type and residue
- -- number fields as assigned by PDB. Any description appearing in the PDB HET
- -- record appears as a pdb-comment within the molecule description.
- -- Molecule types for PDB-derived molecule graphs are assigned by matching
- -- residue and atom names against the PDB-documented standard types for protein,
- -- DNA and RNA, and against residue codes commonly used to indicate solvent.
- -- Classification is by "majority rule". If more than half of the residues in
- -- a biopolymer are standard groups of one type, then the molecule is of that
- -- type, and otherwise classified as "other". Note that this classification does
- -- not preclude the presence of modified residues, but insists they constitute
- -- less than half the biopolymer. Non-polymers are classified only as "solvent"
- -- or "other".
- -- Note that a molecule graph may also contain a set of cross references
- -- to biopolymer sequence databases. All biopolymer molecules in MMDB contain
- -- appropriate identifiers for the corresponding entry in the NCBI-Sequences
- -- database, in particular the NCBI "gi" number, which may be used for sequence
- -- retrieval. The Seq-id object type is defined in the NCBI molecular sequence
- -- specification, and not repeated here.
- Molecule-graph ::= SEQUENCE {
- id Molecule-id,
- descr SEQUENCE OF Biomol-descr OPTIONAL,
- seq-id Seq-id OPTIONAL,
- residue-sequence SEQUENCE OF Residue,
- inter-residue-bonds SEQUENCE OF Inter-residue-bond OPTIONAL }
-
- Molecule-id ::= INTEGER
- -- Residues may be assigned a text-string name as well as an id number. PDB
- -- assigned residue numbers appear as the residue name.
- Residue ::= SEQUENCE {
- id Residue-id,
- name VisibleString OPTIONAL,
- residue-graph Residue-graph-pntr }
- Residue-id ::= INTEGER
- -- Residue graphs from different sources may be referenced within a molecule
- -- graph. The allowed choices are the nonstandard residue graphs included in
- -- the present biostruc, residue graphs within other biostrucs, or residue
- -- graphs within tables of standard residue definitions.
- Residue-graph-pntr ::= CHOICE {
- local Residue-graph-id,
- biostruc Biostruc-graph-pntr,
- standard Biostruc-residue-graph-set-pntr }
-
- Biostruc-graph-pntr ::= SEQUENCE {
- biostruc-id Biostruc-id,
- residue-graph-id Residue-graph-id }
- Biostruc-residue-graph-set-pntr ::= SEQUENCE {
- biostruc-residue-graph-set-id Biostruc-id,
- residue-graph-id Residue-graph-id }
- -- Residue graphs define atomic formulae, connectivity, chirality, and names.
- -- For standard residue graphs from the MMDB dictionary the PDB-assigned
- -- residue-type code appears as the name within the residue graph description,
- -- and the full trivial name of the residue as a comment within that
- -- description. For any nonstandard residue graphs provided with an MMDB
- -- biostruc the PDB-assigned residue-type code similarly appears as the name
- -- within the description, and any information provided on PDB HET records as
- -- a pdb-comment within that description.
- -- Note that nonstandard residue graphs for a PDB-derived biostruc may be
- -- incomplete. Current PDB format cannot represent connectivity for groups
- -- which are disordered, and for which no coordinates are given. In these
- -- cases the residue graph defined in MMDB represents only the subgraph that
- -- could be identified from available ATOM, HETATM and CONECT records.
- Residue-graph ::= SEQUENCE {
- id Residue-graph-id,
- descr SEQUENCE OF Biomol-descr OPTIONAL,
- residue-type INTEGER { deoxyribonucleotide(1),
- ribonucleotide(2),
- amino-acid(3),
- other(255) } OPTIONAL,
- iupac-code SEQUENCE OF VisibleString OPTIONAL,
- atoms SEQUENCE OF Atom,
- bonds SEQUENCE OF Intra-residue-bond,
- chiral-centers SEQUENCE OF Chiral-center OPTIONAL }
-
- Residue-graph-id ::= INTEGER
- -- Atoms in residue graphs are defined by elemental symbols and names. PDB-
- -- assigned atom names appear here in the name field, except in cases of known
- -- PDB synonyms. In these cases atom names are mapped to the names used in the
- -- MMDB standard dictionary. This occurs primarily for hydrogen atoms, where
- -- PDB practice allows synonyms for several atom types. For PDB atoms the
- -- elemental symbol is obtained by parsing the PDB atom name field, allowing
- -- for known special-semantics cases where the atom name does not follow the
- -- documented encoding rule. Ionizable protons are identified within standard
- -- residue graphs in the MMDB dictionary, but not within automatically-defined
- -- nonstandard graphs.
- Atom ::= SEQUENCE {
- id Atom-id,
- name VisibleString OPTIONAL,
- iupac-code SEQUENCE OF VisibleString OPTIONAL,
- element ENUMERATED {
- h(1), he(2), li(3), be(4), b(5),
- c(6), n(7), o(8), f(9), ne(10),
- na(11), mg(12), al(13), si(14), p(15),
- s(16), cl(17), ar(18), k(19), ca(20),
- sc(21), ti(22), v(23), cr(24), mn(25),
- fe(26), co(27), ni(28), cu(29), zn(30),
- ga(31), ge(32), as(33), se(34), br(35),
- kr(36), rb(37), sr(38), y(39), zr(40),
- nb(41), mo(42), tc(43), ru(44), rh(45),
- pd(46), ag(47), cd(48), in(49), sn(50),
- sb(51), te(52), i(53), xe(54), cs(55),
- ba(56), la(57), ce(58), pr(59), nd(60),
- pm(61), sm(62), eu(63), gd(64), tb(65),
- dy(66), ho(67), er(68), tm(69), yb(70),
- lu(71), hf(72), ta(73), w(74), re(75),
- os(76), ir(77), pt(78), au(79), hg(80),
- tl(81), pb(82), bi(83), po(84), at(85),
- rn(86), fr(87), ra(88), ac(89), th(90),
- pa(91), u(92), np(93), pu(94), am(95),
- cm(96), bk(97), cf(98), es(99),
- fm(100), md(101), no(102), lr(103),
- other(254), unknown(255) },
- ionizable-proton ENUMERATED {
- true(1),
- false(2),
- unknown(255) } OPTIONAL }
-
- Atom-id ::= INTEGER
- -- Intra-residue-bond specifies connectivity between atoms in Residue-graph.
- -- Unlike Inter-residue-bond defined later, its participating atoms are part of
- -- a residue subgraph dictionary, not part of a specific biostruc-graph.
- -- For residue graphs in the standard MMDB dictionary bonds are defined from
- -- the known chemical structures of amino acids and nucleotides. For
- -- nonstandard residue graphs bonds are defined from PDB CONECT records, with
- -- validation for consistency with coordinate data, and from stereochemical
- -- calculation to identify unreported bonds. Validation and bond identification
- -- are based on comparison of inter-atomic distances to the sum of covalent
- -- radii for the corresponding elements.
- Intra-residue-bond ::= SEQUENCE {
- atom-id-1 Atom-id,
- atom-id-2 Atom-id,
- bond-order INTEGER {
- single(1),
- partial-double(2),
- aromatic(3),
- double(4),
- triple(5),
- other(6),
- unknown(255)} OPTIONAL }
- -- Chiral centers are atoms with tetrahedral geometry. Chirality is defined
- -- by a chiral volume involving the chiral center and 3 other atoms bonded to
- -- it. For any coordinates assigned to atoms c, n1, n2, and n3, the vector
- -- triple product (n1-c) dot ( (n2-c) cross (n3-c) ) must have the indicated
- -- sign. The calculation assumes an orthogonal right-handed coordinate system
- -- as is used for MMDB model structures.
- -- Chirality is defined for standard residues in the MMDB dictionary, but is
- -- not assigned automatically for PDB-derived nonstandard residues. If assigned
- -- for nonstandard residues, the source of chirality information is described
- -- by a citation within the residue description.
- Chiral-center ::= SEQUENCE {
- c Atom-id,
- n1 Atom-id,
- n2 Atom-id,
- n3 Atom-id,
- sign ENUMERATED { positive(1),
- negative(2) } }
- -- Inter-residue bonds are defined by a reference to two atoms. For PDB-derived
- -- structures bonds are identified from biopolymer connectivity according to
- -- SEQRES and from other connectivity information on SSBOND and CONECT
- -- records. These data are validated and unreported bonds identified by
- -- stereochemical calculation, using the same criteria as for intra-residue
- -- bonds.
- Inter-residue-bond ::= SEQUENCE {
- atom-id-1 Atom-pntr,
- atom-id-2 Atom-pntr,
- bond-order INTEGER {
- single(1),
- partial-double(2),
- aromatic(3),
- double(4),
- triple(5),
- other(6),
- unknown(255)} OPTIONAL }
- -- Atoms, residues and molecules within the current biostruc are referenced
- -- by hierarchical pointers.
- Atom-pntr ::= SEQUENCE {
- molecule-id Molecule-id,
- residue-id Residue-id,
- atom-id Atom-id }
- Atom-pntr-set ::= SEQUENCE OF Atom-pntr
- END
- --$Revision: 1000.1 $
- --**********************************************************************
- --
- -- Biological Macromolecule 3-D Structure Data Types for MMDB,
- -- A Molecular Modeling Database
- --
- -- Definitions for structural models
- --
- -- By Hitomi Ohkawa, Jim Ostell, Chris Hogue and Steve Bryant
- --
- -- National Center for Biotechnology Information
- -- National Institutes of Health
- -- Bethesda, MD 20894 USA
- --
- -- July, 1996
- --
- --**********************************************************************
- MMDB-Structural-model DEFINITIONS ::=
- BEGIN
- EXPORTS Biostruc-model, Model-id, Model-coordinate-set-id;
- IMPORTS Chem-graph-pntrs, Atom-pntrs, Chem-graph-alignment,
- Sphere, Cone, Cylinder, Brick, Transform FROM MMDB-Features
- Biostruc-id FROM MMDB
- Pub FROM NCBI-Pub;
- -- A structural model maps chemical components into a measured three-
- -- dimensional space. PDB-derived biostrucs generally contain 4 models,
- -- corresponding to "views" of the structure of a biomolecular assemble with
- -- increasing levels of complexity. Model types indicate the complexity of the
- -- view.
- -- The model named "NCBI all atom" represents a view suitable for most
- -- computational biology applications. It provides complete atomic coordinate
- -- data for a "single best" model, omitting statistical disorder information
- -- and/or ensemble structure descriptions provided in the source PDB file.
- -- Construction of the single best model is based on the assumption that the
- -- contents of the "alternate conformation" field from pdb imply no correlation
- -- among the occupancies of multiple sites assigned to sets of atoms: the best
- -- site is chosen only on the basis of highest occupancy. Note, however, that
- -- alternate conformation sets where correlation is implied are generally
- -- constrained in crystallographic refinement to have uniform occupancy, and
- -- will thus be selected as a set. For ensemble models the model which assigns
- -- coordinates to the most atoms is chosen. If numbers of coordinates are the
- -- same, the model occurring first in the PDB file is selected. The single
- -- best model includes complete coordinates for all nonpolymer components, but
- -- omits those classified as "solvent". Model type is 3 for this model.
- -- The model named "NCBI backbone" represents a simple view intended for
- -- graphic displays and rapid transmission over a network. It includes only
- -- alpha carbon or backbone phosphate coordinates for biopolymers. It is based
- -- on selection of alpha-carbon and backbone phosphate atoms from the "NCBI
- -- all atom" model. The model type is set to 2. An even simpler model gives
- -- only a cartoon representation, using cylinders corresponding to secondary
- -- structure elements. This is named "NCBI vector", and has model type 1.
- -- The models named "PDB Model 1", "PDB Model 2", etc. represent the complete
- -- information provided by PDB, including full descriptions of statistical
- -- disorder. The name of the model is based on the contents of the PDB MODEL
- -- record, with a default name of "PDB Model 1" for PDB files which contain
- -- only a single model. Construction of these models is based on the
- -- assumption that contents of the PDB "alternate conformation" field are
- -- intended to imply correlation among the occupancies of atom sets flagged by
- -- the same identifier. The special flag " " (blank) is assumed to indicate
- -- sites occupied in all alternate conformations, and sites flagged otherwise,
- -- together with " ", to indicate a distinct member of an ensemble of
- -- alternate conformations. Note that construction of ensemble members
- -- according to these assumption requires two validation checks on PDB
- -- "alternate conformation" flags: they must be unique among sites assigned to
- -- the same atom, and that the special " " flag must occur only for unique
- -- sites. Sites which violate the first check are flagged as "u", for
- -- "unknown"; they are omitted from all ensemble definitions but are
- -- nontheless retained in the coordinate list. Sites which violate the second
- -- check are flagged "b" for "blank", and are included in an appropriately
- -- named ensemble. The model type for pdb all models is 4.
- -- Note that in the MMDB database models are stored in the ASN.1 stream in
- -- order of increasing model type value. Since models occur as the last item
- -- in a biostruc, parsers may avoid reading the entire stream if the desired
- -- model is one of the simplified types, which occur first in the stream. This
- -- can save considerable I/O time, particularly for large ensemble models from
- -- NMR determinations.
- Biostruc-model ::= SEQUENCE {
- id Model-id,
- type Model-type,
- descr SEQUENCE OF Model-descr OPTIONAL,
- model-space Model-space OPTIONAL,
- model-coordinates SEQUENCE OF Model-coordinate-set OPTIONAL }
- Model-id ::= INTEGER
- Model-type ::= INTEGER {
- ncbi-vector(1),
- ncbi-backbone(2),
- ncbi-all-atom(3),
- pdb-model(4),
- other(255)}
- Model-descr ::= CHOICE {
- name VisibleString,
- pdb-reso VisibleString,
- pdb-method VisibleString,
- pdb-comment VisibleString,
- other-comment VisibleString,
- attribution Pub }
- -- The model space defines measurement units and any external reference frame.
- -- Coordinates refer to a right-handed orthogonal system defined on axes
- -- tagged x, y and z in the coordinate and feature definitions of a biostruc.
- -- Coordinates from PDB-derived structures are reported without change, in
- -- angstrom units. The units of temperature and occupancy factors are not
- -- defined explicitly in PDB, but are inferred from their value range.
- Model-space ::= SEQUENCE {
- coordinate-units ENUMERATED {
- angstroms(1),
- nanometers(2),
- other(3),
- unknown(255)},
- thermal-factor-units ENUMERATED {
- b(1),
- u(2),
- other(3),
- unknown(255)} OPTIONAL,
- occupancy-factor-units ENUMERATED {
- fractional(1),
- electrons(2),
- other(3),
- unknown(255)} OPTIONAL,
- density-units ENUMERATED {
- electrons-per-unit-volume(1),
- arbitrary-scale(2),
- other(3),
- unknown(255)} OPTIONAL,
- reference-frame Reference-frame OPTIONAL }
- -- An external reference frame is a pointer to another biostruc, with an
- -- optional operator to rotate and translate coordinates into its model space.
- -- This item is intended for representation of homology-derived model
- -- structures, and is not present for structures from PDB.
- Reference-frame ::= SEQUENCE {
- biostruc-id Biostruc-id,
- rotation-translation Transform OPTIONAL }
- -- Atomic coordinates may be assigned literally or by reference to another
- -- biostruc. The reference coordinate type is used to represent homology-
- -- derived model structures. PDB-derived structures have literal coordinates.
- -- Referenced coordinates identify another biostruc, any transformation to be
- -- applied to coordinates from that biostruc, and a mapping of the chemical
- -- graph of the present biostruc onto that of the referenced biostruc. They
- -- give an "alignment" of atoms in the current biostruc with those in another,
- -- from which the coordinates of matched atoms may be retrieved. For non-
- -- atomic models "alignment" may also be represented by molecule and residue
- -- equivalence lists. Referenced coordinates are a data item inteded for
- -- representation of homology models, with an explicit pointer to their source
- -- information. They do not occur in PDB-derived models.
- Model-coordinate-set ::= SEQUENCE {
- id Model-coordinate-set-id OPTIONAL,
- descr SEQUENCE OF Model-descr OPTIONAL,
- coordinates CHOICE {
- literal Coordinates,
- reference Chem-graph-alignment } }
-
- Model-coordinate-set-id ::= INTEGER
- -- Literal coordinates map chemical components into the model space. Three
- -- mapping types are allowed, atomic coordinate models, density-grid models,
- -- and surface models. A model consists of a sequence of such coordinate sets,
- -- and may thus combine coordinate subsets which have a different source.
- -- PDB-derived models contain a single atomic coordinate set, as they by
- -- definition represent information from a single source.
- Coordinates ::= CHOICE {
- atomic Atomic-coordinates,
- surface Surface-coordinates,
- density Density-coordinates }
- -- Literal atomic coordinate values give location, occupancy and order
- -- parameters, and a pointer to a specific atom defined in the biostruc graph.
- -- Temperature and occupancy factors have their conventional crystallographic
- -- definitions, with units defined in the model space declaration. Atoms,
- -- sites, temperature-factors, occupancies and alternate-conformation-ids
- -- are parallel arrays, i.e. the have the same number of values as given by
- -- number-of-points. Conformation ensembles represent distinct correlated-
- -- disorder subsets of the coordinates. They will be present only for certain
- -- "views" of PDB structures, as described above. Their derivation from PDB-
- -- supplied "alternate-conformation" ids is described below.
- Atomic-coordinates ::= SEQUENCE {
- number-of-points INTEGER,
- atoms Atom-pntrs,
- sites Model-space-points,
- temperature-factors Atomic-temperature-factors OPTIONAL,
- occupancies Atomic-occupancies OPTIONAL,
- alternate-conf-ids Alternate-conformation-ids OPTIONAL,
- conf-ensembles SEQUENCE OF Conformation-ensemble OPTIONAL }
- -- The atoms whose location is described by each coordinate are identified
- -- via a hierarchical pointer to the chemical graph of the biomolecular
- -- assembly. Coordinates may be matched with atoms in the chemical structure
- -- by the values of the molecule, residue and atom id's given here, which
- -- match exactly the items of the same type defined in Biostruc-graph.
- -- Coordinates are given as integer values, with a scale factor to convert
- -- to real values for each x, y or z, in the units indicated in model-space.
- -- Integer values must be divided by the the scale factor. This use of integer
- -- values reduces the ASN.1 stream size. The scale factors for temperature
- -- factors and occupancies are given separately, but must be used in the same
- -- fashion to produce properly scaled real values.
- Model-space-points ::= SEQUENCE {
- scale-factor INTEGER,
- x SEQUENCE OF INTEGER,
- y SEQUENCE OF INTEGER,
- z SEQUENCE OF INTEGER }
- Atomic-temperature-factors ::= CHOICE {
- isotropic Isotropic-temperature-factors,
- anisotropic Anisotropic-temperature-factors }
- Isotropic-temperature-factors ::= SEQUENCE {
- scale-factor INTEGER,
- b SEQUENCE OF INTEGER }
- Anisotropic-temperature-factors ::= SEQUENCE {
- scale-factor INTEGER,
- b-11 SEQUENCE OF INTEGER,
- b-12 SEQUENCE OF INTEGER,
- b-13 SEQUENCE OF INTEGER,
- b-22 SEQUENCE OF INTEGER,
- b-23 SEQUENCE OF INTEGER,
- b-33 SEQUENCE OF INTEGER }
- Atomic-occupancies ::= SEQUENCE {
- scale-factor INTEGER,
- o SEQUENCE OF INTEGER }
- -- An alternate conformation id is optionally associated with each coordinate.
- -- Aside from corrections due to the validation checks described above, the
- -- contents of MMDB Alternate-conformation-ids are identical to the PDB
- -- "alternate conformation" field.
- Alternate-conformation-ids ::= SEQUENCE OF Alternate-conformation-id
- Alternate-conformation-id ::= VisibleString
- -- Correlated disorder ensemble is defined by a set of alternate conformation
- -- id's which identify coordinates relevant to that ensemble. These are
- -- defined from the validated and corrected contents of the PDB "alternate
- -- conformation" field as described above. A given ensemble, for example, may
- -- consist of atom sites flagged by " " and "A" Alternate-conformation-ids.
- -- Names for ensembles are constructed from these flags. This example would be
- -- named, in its description, "PDB Ensemble blank plus A".
- -- Note that this interpretation is consistent with common PDB usage of the
- -- "alternate conformation" field, but that PDB specifications do not formally
- -- distinguish between correlated and uncorrelated disorder in crystallographic
- -- models. Ensembles identified in MMDB thus may not correspond to the meaning
- -- intended by PDB or the depositor. No information is lost, however, and
- -- if the intended meaning is known alternative ensemble descriptions may be
- -- reconstructed directly from the Alternate-conformation-ids.
- -- Note that correlated disorder as defined here is allowed within an atomic
- -- coordinate set but not between the multiple sets which may define a model.
- -- Multiple sets within the same model are intended as a means to represent
- -- assemblies modeled from different experimentally determined structures,
- -- where correlated disorder between coordinate sets is not relevant.
- Conformation-ensemble ::= SEQUENCE {
- name VisibleString,
- alt-conf-ids SEQUENCE OF Alternate-conformation-id }
- -- Literal surface coordinates define the chemical components whose structure
- -- is described by a surface, and the surface itself. The surface may be
- -- either a regular geometric solid or a triangle-mesh of arbitrary shape.
- Surface-coordinates ::= SEQUENCE {
- contents Chem-graph-pntrs,
- surface CHOICE { sphere Sphere,
- cone Cone,
- cylinder Cylinder,
- brick Brick,
- tmesh T-mesh,
- triangles Triangles } }
- T-mesh ::= SEQUENCE {
- number-of-points INTEGER,
- scale-factor INTEGER,
- swap SEQUENCE OF BOOLEAN,
- x SEQUENCE OF INTEGER,
- y SEQUENCE OF INTEGER,
- z SEQUENCE OF INTEGER }
- Triangles ::= SEQUENCE {
- number-of-points INTEGER,
- scale-factor INTEGER,
- x SEQUENCE OF INTEGER,
- y SEQUENCE OF INTEGER,
- z SEQUENCE OF INTEGER,
- number-of-triangles INTEGER,
- v1 SEQUENCE OF INTEGER,
- v2 SEQUENCE OF INTEGER,
- v3 SEQUENCE OF INTEGER }
- -- Literal density coordinates define the chemical components whose structure
- -- is described by a density grid, parameters of this grid, and density values.
- Density-coordinates ::= SEQUENCE {
- contents Chem-graph-pntrs,
- grid-corners Brick,
- grid-steps-x INTEGER,
- grid-steps-y INTEGER,
- grid-steps-z INTEGER,
- fastest-varying ENUMERATED {
- x(1),
- y(2),
- z(3)},
- slowest-varying ENUMERATED {
- x(1),
- y(2),
- z(3)},
- scale-factor INTEGER,
- density SEQUENCE OF INTEGER }
- END
- --$Revision: 1000.1 $
- --**********************************************************************
- --
- -- Biological Macromolecule 3-D Structure Data Types for MMDB,
- -- A Molecular Modeling Database
- --
- -- Definitions for structural features and biostruc addressing
- --
- -- By Hitomi Ohkawa, Jim Ostell, Chris Hogue and Steve Bryant
- --
- -- National Center for Biotechnology Information
- -- National Institutes of Health
- -- Bethesda, MD 20894 USA
- --
- -- July, 1996
- --
- --**********************************************************************
- MMDB-Features DEFINITIONS ::=
- BEGIN
- EXPORTS Biostruc-feature-set, Chem-graph-pntrs, Atom-pntrs,
- Chem-graph-alignment, Sphere, Cone, Cylinder, Brick, Transform,
- Biostruc-feature-set-id, Biostruc-feature-id;
- IMPORTS Biostruc-id FROM MMDB
- Molecule-id, Residue-id, Atom-id FROM MMDB-Chemical-graph
- Model-id, Model-coordinate-set-id FROM MMDB-Structural-model
- User-object FROM NCBI-General
- Pub FROM NCBI-Pub;
- -- Named model features refer to sets of residues or atoms, or a region in
- -- the model space. A few specific feature types are allowed for compatibility
- -- with PDB usage, but the purpose of a named model feature is simply to
- -- associate various types of information with a set of atoms or
- -- residues, or a spatially-defined region of the model structure. They also
- -- support association of various properties with each residue or atom of a
- -- set.
- -- PDB-derived secondary structure defines a single feature, represented as a
- -- sequence of residue motifs, as are the contents of PDB SITE and
- -- FTNOTE records. NCBI-assigned core and secondary structure descriptions
- -- are also represented as a sequence of residue motifs.
- Biostruc-feature-set ::= SEQUENCE {
- id Biostruc-feature-set-id,
- descr SEQUENCE OF Biostruc-feature-set-descr OPTIONAL,
- features SEQUENCE OF Biostruc-feature }
- Biostruc-feature-set-id ::= INTEGER
- Biostruc-feature-set-descr ::= CHOICE {
- name VisibleString,
- pdb-comment VisibleString,
- other-comment VisibleString,
- attribution Pub }
- -- An explicitly specified type in Biostruc-feature allows for
- -- efficient extraction and indexing of feature sets of a specific type.
- -- Special types are provided for coloring and rendering, as
- -- as needed by molecular graphics programs.
-
- Biostruc-feature ::= SEQUENCE {
- id Biostruc-feature-id OPTIONAL,
- name VisibleString OPTIONAL,
- type INTEGER { helix(1),
- strand(2),
- sheet(3),
- turn(4),
- site(5),
- footnote(6),
- comment(7), -- new
- subgraph(100), -- NCBI domain reserved
- region(101),
- core(102), -- user core definition
- supercore(103), -- NCBI reserved
- color(150), -- new
- render(151), -- new
- label(152), -- new
- transform(153), -- new
- camera(154), -- new
- script(155), -- for scripts
- alignment(200), -- VAST reserved
- similarity(201),
- multalign(202), -- multiple alignment
- indirect(203), -- new
- cn3dstate(254), -- Cn3D reserved
- other(255) } OPTIONAL,
- property CHOICE {
- color Color-prop,
- render Render-prop,
- transform Transform,
- camera Camera,
- script Biostruc-script,
- user User-object } OPTIONAL,
- location CHOICE {
- subgraph Chem-graph-pntrs,
- region Region-pntrs,
- alignment Chem-graph-alignment,
- similarity Region-similarity,
- indirect Other-feature } OPTIONAL } -- new
- -- Other-feature allows for specifying location via reference to another
- -- Biostruc-feature and its location.
- Other-feature ::= SEQUENCE {
- biostruc-id Biostruc-id,
- set Biostruc-feature-set-id,
- feature Biostruc-feature-id }
-
- Biostruc-feature-id ::= INTEGER
- -- Atom, residue or molecule motifs describe a substructure defined by a set
- -- of nodes from the chemical graph. PDB secondary structure features are
- -- described as a residue motif, since they are not associated with any one of
- -- the multiple models that may be provided in a PDB file. NCBI-assigned
- -- secondary structure is represented in the same way, even though it is
- -- model specific, since this allows for simple mapping of the structural
- -- feature onto a sequence-only representation. This addressing mode may also
- -- be used to describe features to be associated with particular atoms,
- -- as, for example, the chemical shift observed in an NMR experiment.
- Chem-graph-pntrs ::= CHOICE {
- atoms Atom-pntrs,
- residues Residue-pntrs,
- molecules Molecule-pntrs }
- Atom-pntrs ::= SEQUENCE {
- number-of-ptrs INTEGER,
- molecule-ids SEQUENCE OF Molecule-id,
- residue-ids SEQUENCE OF Residue-id,
- atom-ids SEQUENCE OF Atom-id }
- Residue-pntrs ::= CHOICE {
- explicit Residue-explicit-pntrs,
- interval SEQUENCE OF Residue-interval-pntr }
- Residue-explicit-pntrs ::= SEQUENCE {
- number-of-ptrs INTEGER,
- molecule-ids SEQUENCE OF Molecule-id,
- residue-ids SEQUENCE OF Residue-id }
- Residue-interval-pntr ::= SEQUENCE {
- molecule-id Molecule-id,
- from Residue-id,
- to Residue-id }
- Molecule-pntrs ::= SEQUENCE {
- number-of-ptrs INTEGER,
- molecule-ids SEQUENCE OF Molecule-id }
- -- Region motifs describe features defined by spatial location, such as the
- -- site specified by a coordinate value, or a rgeion within a bounding volume.
- Region-pntrs ::= SEQUENCE {
- model-id Model-id,
- region CHOICE {
- site SEQUENCE OF Region-coordinates,
- boundary SEQUENCE OF Region-boundary } }
- -- Coordinate sites describe a region in space by reference to individual
- -- coordinates, in a particular model. These coordinates may be either the
- -- x, y and z values of atomic coordinates, the triangles of a surface mesh,
- -- or the grid points of a density model. All are addressed in the same manner,
- -- as coordinate indices which give offsets from the beginning of the
- -- coordinate data arrays. A coordinate-index of 5, for example, refers to
- -- the 5th x, y and z values of an atomic coordinate set, the 5th v1, v2, and v3
- -- values of a triangle mesh, or the 5th value in a density grid.
- -- PDB SITE and FTNOTE records refer to particular atomic coordinates, and they
- -- are represented as a region motif with addresses of type Region-coordinates.
- -- Any names or descriptions provided by PDB are thus associated with the
- -- indicated sites, in the indicated model.
- Region-coordinates ::= SEQUENCE {
- model-coord-set-id Model-coordinate-set-id,
- number-of-coords INTEGER OPTIONAL,
- coordinate-indices SEQUENCE OF INTEGER OPTIONAL }
- -- Region boundaries are defined by regular solids located in the model space.
- Region-boundary ::= CHOICE { sphere Sphere,
- cone Cone,
- cylinder Cylinder,
- brick Brick }
- -- A biostruc alignment establishes an equivalence of nodes in the chemical
- -- graphs of two or more biostrucs. This may be mapped to a sequence
- -- alignment in the case of biopolymers.
- -- The 'dimension' component indicates the number of participants
- -- in the alignment. For pairwise alignments, such as VAST
- -- structure-structure alignments, the dimension will be always 2, with
- -- biostruc-ids, alignment, and domain each containing two entries for an
- -- aligned pair. The 'alignment' component contains a pair of Chem-graph-pntrs
- -- specifying a like number of corresponding residues in each structure.
- -- The 'domain' component specifies a region of each structure considered
- -- in the alignment. Only one transform (for the second structure) and
- -- one aligndata (for the pair) are provided for each VAST alignment.
- --
- -- For multiple alignments, a set of components are treated as
- -- parallel arrays of length 'dimension'.
- -- The 'transform' component moves each structure to align it with
- -- the structure specified as the first element in the "parallel" array,
- -- so necessarily the first transform is a NULL transform.
- -- Align-stats are placeholders for scores.
- Chem-graph-alignment ::= SEQUENCE {
- dimension INTEGER DEFAULT 2,
- biostruc-ids SEQUENCE OF Biostruc-id,
- alignment SEQUENCE OF Chem-graph-pntrs,
- domain SEQUENCE OF Chem-graph-pntrs OPTIONAL,
- transform SEQUENCE OF Transform OPTIONAL,
- aligndata SEQUENCE OF Align-stats OPTIONAL }
- Align-stats ::= SEQUENCE {
- descr VisibleString OPTIONAL,
- scale-factor INTEGER OPTIONAL,
- vast-score INTEGER OPTIONAL,
- vast-mlogp INTEGER OPTIONAL,
- align-res INTEGER OPTIONAL,
- rmsd INTEGER OPTIONAL,
- blast-score INTEGER OPTIONAL,
- blast-mlogp INTEGER OPTIONAL,
- other-score INTEGER OPTIONAL }
- -- A biostruc similarity describes spatial features which are similar between
- -- two or more biostrucs. Similarities are model dependent, and the model and
- -- coordinate set ids of the biostrucs must be specified. They do not
- -- necessarily map to a sequence alignment, as the regions referenced may
- -- be pieces of a surface or grid, and thus not uniquely mapable to particular
- -- chemical components.
- Region-similarity ::= SEQUENCE {
- dimension INTEGER DEFAULT 2,
- biostruc-ids SEQUENCE OF Biostruc-id,
- similarity SEQUENCE OF Region-pntrs,
- transform SEQUENCE OF Transform }
- -- Geometrical primitives are used in the definition of region motifs, and
- -- also non-atomic coordinates. Spheres, cones, cylinders and bricks are
- -- defined by a few points in the model space.
- Sphere ::= SEQUENCE {
- center Model-space-point,
- radius RealValue }
- Cone ::= SEQUENCE {
- axis-top Model-space-point,
- axis-bottom Model-space-point,
- radius-bottom RealValue }
- Cylinder ::= SEQUENCE {
- axis-top Model-space-point,
- axis-bottom Model-space-point,
- radius RealValue }
- -- A brick is defined by the coordinates of eight corners. These are assumed
- -- to appear in the order 000, 001, 010, 011, 100, 101, 110, 111, where the
- -- digits 0 and 1 refer to respectively to the x, y and z axes of a unit cube.
- -- Opposite edges are assumed to be parallel.
- Brick ::= SEQUENCE {
- corner-000 Model-space-point,
- corner-001 Model-space-point,
- corner-010 Model-space-point,
- corner-011 Model-space-point,
- corner-100 Model-space-point,
- corner-101 Model-space-point,
- corner-110 Model-space-point,
- corner-111 Model-space-point }
- Model-space-point ::= SEQUENCE {
- scale-factor INTEGER,
- x INTEGER,
- y INTEGER,
- z INTEGER }
- RealValue ::= SEQUENCE {
- scale-factor INTEGER,
- scaled-integer-value INTEGER }
- Transform ::= SEQUENCE {
- id INTEGER,
- moves SEQUENCE OF Move }
-
- Move ::= CHOICE {
- rotate Rot-matrix,
- translate Trans-matrix }
-
- -- A rotation matrix is defined by 9 numbers, given by row, i.e.,
- -- with column indices varying fastest.
- -- Coordinates, as a matrix with columns x, y, an z, are rotated
- -- via multiplication with the rotation matrix.
- -- A translation matrix is defined by 3 numbers, which is added to
- -- the rotated coordinates for specified amount of translation.
- Rot-matrix ::= SEQUENCE {
- scale-factor INTEGER,
- rot-11 INTEGER,
- rot-12 INTEGER,
- rot-13 INTEGER,
- rot-21 INTEGER,
- rot-22 INTEGER,
- rot-23 INTEGER,
- rot-31 INTEGER,
- rot-32 INTEGER,
- rot-33 INTEGER }
- Trans-matrix ::= SEQUENCE {
- scale-factor INTEGER,
- tran-1 INTEGER,
- tran-2 INTEGER,
- tran-3 INTEGER }
- -- The camera is a position relative to the world coordinates
- -- of the structure referred to by a location.
- -- this is used to set the initial position of the
- -- camera using OpenGL. scale is the value used to scale the
- -- other values from floating point to integer
- Camera ::= SEQUENCE {
- x INTEGER,
- y INTEGER,
- distance INTEGER,
- angle INTEGER,
- scale INTEGER,
- modelview GL-matrix }
-
-
- GL-matrix ::= SEQUENCE {
- scale INTEGER,
- m11 INTEGER,
- m12 INTEGER,
- m13 INTEGER,
- m14 INTEGER,
- m21 INTEGER,
- m22 INTEGER,
- m23 INTEGER,
- m24 INTEGER,
- m31 INTEGER,
- m32 INTEGER,
- m33 INTEGER,
- m34 INTEGER,
- m41 INTEGER,
- m42 INTEGER,
- m43 INTEGER,
- m44 INTEGER }
- Color-prop ::= SEQUENCE {
- r INTEGER OPTIONAL,
- g INTEGER OPTIONAL,
- b INTEGER OPTIONAL,
- name VisibleString OPTIONAL }
- -- Note that Render-prop is compatible with the Annmm specification,
- -- i.e., its numbering schemes do not clash with those in Render-prop.
- Render-prop ::= INTEGER {
- default (0), -- Default view
- wire (1), -- use wireframe
- space (2), -- use spacefill
- stick (3), -- use stick model (thin cylinders)
- ballNStick (4), -- use ball & stick model
- thickWire (5), -- thicker wireframe
- hide (9), -- don't show this
- name (10), -- display its name next to it
- number (11), -- display its number next to it
- pdbNumber (12), -- display its PDB number next to it
- objWireFrame (150), -- display MMDB surface object as wireframe
- objPolygons (151), -- display MMDB surface object as polygons
- colorsetCPK (225), -- color atoms like CPK models
- colorsetbyChain (226), -- color each chain different
- colorsetbyTemp (227), -- color using isotropic Temp factors
- colorsetbyRes (228), -- color using residue properties
- colorsetbyLen (229), -- color changes along chain length
- colorsetbySStru (230), -- color by secondary structure
- colorsetbyHydro (231), -- color by hydrophobicity
- colorsetbyObject(246), -- color each object differently
- colorsetbyDomain(247), -- color each domain differently
- other (255)
- }
- -- When a Biostruc-Feature with a Biostruc-script is initiated,
- -- it should play the specified steps one at a time, setting the feature-do
- -- list as the active display.
- -- The camera can be set using a feature-do,
- -- but it may be moved independently with
- -- camera-move, which specifies how to move
- -- the camera dynamically during the step along the path defined (e.g.,
- -- a zoom, a rotate).
- -- Any value of pause (in 1:10th's of a second) will force a pause
- -- after an image is shown.
- -- If waitevent is TRUE, it will await a mouse or keypress and ignore
- -- the pause value.
- Biostruc-script ::= SEQUENCE OF Biostruc-script-step
- Biostruc-script-step ::= SEQUENCE {
- step-id Step-id,
- step-name VisibleString OPTIONAL,
- feature-do SEQUENCE OF Other-feature OPTIONAL,
- camera-move Transform OPTIONAL,
- pause INTEGER DEFAULT 10,
- waitevent BOOLEAN,
- extra INTEGER,
- jump Step-id OPTIONAL }
- Step-id ::= INTEGER
- END
- --$Revision: 1000.1 $
- --**********************************************************************
- --
- -- Definitions for CDD's
- --
- -- NCBI Structure Group
- --
- -- National Center for Biotechnology Information
- -- National Institutes of Health
- -- Bethesda, MD 20894 USA
- --
- -- October 1999
- --
- -- asntool -m cdd.asn -w 100 -o cdd.h
- -- asntool -B objcdd -m cdd.asn -G -w 100 -I objseq.h objsset.h -K cdd.h -M asn.all
- --**********************************************************************
- NCBI-Cdd DEFINITIONS ::=
- -- NCBI Conserved Domain Definition
- BEGIN
- EXPORTS Cdd-id, Cdd-id-set, Cdd, Cdd-set, Cdd-tree, Cdd-tree-set;
- IMPORTS Date FROM NCBI-General
- Pub FROM NCBI-Pub
- Biostruc-annot-set FROM MMDB
- Bioseq FROM NCBI-Sequence
- Seq-annot FROM NCBI-Sequence
- Seq-entry FROM NCBI-Seqset
- Org-ref FROM NCBI-Organism
- Seq-interval FROM NCBI-Seqloc
- Score-set FROM NCBI-Seqalign;
- -- Cdd's should not exist without a unique integer id, but alternative
- -- id's may be present as well.
- Global-id ::= SEQUENCE {
- accession VisibleString,
- release VisibleString OPTIONAL,
- version INTEGER OPTIONAL, -- version 0 is the seed
- database VisibleString OPTIONAL -- this is NOT the source!
- } -- rather the database the
- -- object resides in
- Cdd-id ::= CHOICE {
- uid INTEGER,
- gid Global-id
- }
- Cdd-id-set ::= SEQUENCE OF Cdd-id
- -- The description of CDD's refers to the specific set of aligned sequences,
- -- the region that is being aligned and the information contained in the
- -- alignment. It may contain a lengthy comment
- -- describing the function of the domain as well as its origin and all
- -- other anecdotal information that can't be pressed into a rigid scheme.
- -- Crosslinks to reference papers available in PubMed are possible as well.
- -- There can be as many of these as you want in the CDD.
- Cdd-descr ::= CHOICE {
- othername VisibleString, -- alternative names for the CDD
- category VisibleString, -- intracellular, extracellular, etc.
- comment VisibleString, -- this is where annotations go
- reference Pub, -- a citation
- create-date Date, -- valid for the current version
- tax-source Org-ref, -- holds the highest common node
- source VisibleString, -- the database the seeds were created
- -- from, e.g. SMART, PFAM, etc..
- status INTEGER { unassigned(0),
- finished-ok(1), -- to indicate
- pending-release(2), -- processing status
- other-asis(3), -- or final type
- matrix-only(4), --
- other(255) } -- for CD production
- }
- Cdd-descr-set ::= SET OF Cdd-descr
- -- the Cdd-tree contains the hierarchy of CDDs. This object is separate from
- -- the Cdd's themselves to allow it to be retrieved separately and to
- -- operate as an index.
- Cdd-tree ::= SEQUENCE {
- name VisibleString,
- id Cdd-id-set,
- description Cdd-descr-set OPTIONAL,
- parents Cdd-id-set OPTIONAL,
- children Cdd-id-set OPTIONAL,
- siblings Cdd-id-set OPTIONAL
- }
- Cdd-tree-set ::= SEQUENCE OF Cdd-tree
- -- Matrix definitions, these are supposed to store PSSMs and corresponding
- -- matrices of relative residue frequencies.
- -- the number of columns and rows is listed explicitly, values in columns
- -- are stored column by column, i.e. in groups of nrows values for each column
- Matrix ::= SEQUENCE {
- ncolumns INTEGER,
- nrows INTEGER,
- row-labels SEQUENCE OF VisibleString OPTIONAL,
- scale-factor INTEGER,
- columns SEQUENCE OF INTEGER
- }
- -- definition for matrix of pairwise "distances", stored as the upper
- -- triangle of a sqared n x n matrix (excluding the diagonal), this is
- -- supposed to store pairwise percentages of identical residues, pairwise
- -- alignment scores or E-values from pairwise BLAST sequence comparisons
- Triangle ::= SEQUENCE {
- nelements INTEGER,
- scores Score-set
- }
- -- the Cdd is the basic ASN.1 object storing an annotated and curated
- -- set of alignments (formulated as a set of pairwise master-slave
- -- alignments).
- -- The alignment data are contained in Seq-align-sets and Biostruc-feature-sets.
- -- Version numbers in Global-ids are meant to be updated every time the Cdd is changed
- -- in a way that does not require Global-ids to be changed (sequences added in update
- -- cycle, annotation changed)
- Cdd ::= SEQUENCE {
- name VisibleString,
- id Cdd-id-set,
- description Cdd-descr-set OPTIONAL,
- seqannot SEQUENCE OF Seq-annot OPTIONAL, -- contains the alignment
- features Biostruc-annot-set OPTIONAL, -- contains structure alignments
- -- or "core" definitions
- sequences Seq-entry OPTIONAL, -- store as bioseq-set inside seq-entry
- profile-range Seq-interval OPTIONAL, -- profile for this region only
- -- also stores the Seq-id of the master
- trunc-master Bioseq OPTIONAL, -- holds the truncated master
- -- which may be something like a
- -- consensus, but still refers to the
- -- sequence coord. frame in profile-range
- posfreq Matrix OPTIONAL, -- relative residue frequencies
- scoremat Matrix OPTIONAL, -- Position dependent score matrix
- distance Triangle OPTIONAL -- pairwise distances for all seqs.
- }
- Cdd-set ::= SET OF Cdd
- END
- --$Revision: 1000.1 $
- --****************************************************************
- --
- -- NCBI MIME type (chemical/ncbi-asn1-ascii and chemical/ncbi-asn1-binary)
- -- by Jonathan Epstein, February 1996
- --
- --****************************************************************
- NCBI-Mime DEFINITIONS ::=
- BEGIN
- EXPORTS Ncbi-mime-asn1;
- IMPORTS Biostruc, Biostruc-annot-set FROM MMDB
- Seq-entry FROM NCBI-Seqset
- Seq-annot FROM NCBI-Sequence
- Medline-entry FROM NCBI-Medline;
-
- Ncbi-mime-asn1 ::= CHOICE {
- entrez Entrez-general, -- just a structure
- alignstruc Biostruc-align, -- structures & sequences & alignments
- alignseq Biostruc-align-seq, -- sequence alignment
- strucseq Biostruc-seq, -- structure & sequences
- strucseqs Biostruc-seqs -- structure & sequences & alignments
- -- others may be added here in the future
- }
- Biostruc-align ::= SEQUENCE {
- master Biostruc,
- slaves SET OF Biostruc,
- alignments Biostruc-annot-set, -- structure alignments
- sequences SET OF Seq-entry, -- sequences
- seqalign SET OF Seq-annot }
- Biostruc-align-seq ::= SEQUENCE { -- display seq structure align only
- sequences SET OF Seq-entry, -- sequences
- seqalign SET OF Seq-annot }
- Biostruc-seq ::= SEQUENCE { -- display structure seq added by yanli
- structure Biostruc,
- sequences SET OF Seq-entry }
- Biostruc-seqs ::= SEQUENCE { -- display blast alignment along with neighbor's structure added by yanli
- structure Biostruc,
- sequences SET OF Seq-entry, -- sequences
- seqalign SET OF Seq-annot }
- Entrez-style ::= ENUMERATED {
- docsum (1),
- genbank (2) ,
- genpept (3) ,
- fasta (4) ,
- asn1 (5) ,
- graphic (6) ,
- alignment (7) ,
- globalview (8) ,
- report (9) ,
- medlars (10) ,
- embl (11) ,
- pdb (12) ,
- kinemage (13) }
- Entrez-general ::= SEQUENCE {
- title VisibleString OPTIONAL,
- data CHOICE {
- ml Medline-entry ,
- prot Seq-entry ,
- nuc Seq-entry ,
- genome Seq-entry ,
- structure Biostruc ,
- strucAnnot Biostruc-annot-set } ,
- style Entrez-style ,
- location VisibleString OPTIONAL }
- END
- --$Revision: 1000.1 $
- --*********************************************************************
- --
- -- access.asn
- --
- -- messages for data access
- --
- --*********************************************************************
- NCBI-Access DEFINITIONS ::=
- BEGIN
- EXPORTS Link-set;
- -- links between same class = neighbors
- -- links between other classes = links
- Link-set ::= SEQUENCE {
- num INTEGER , -- number of links to this doc type
- uids SEQUENCE OF INTEGER OPTIONAL , -- the links
- weights SEQUENCE OF INTEGER OPTIONAL } -- the weights
- END
- --$Revision: 1000.1 $
- --**********************************************************************
- --
- -- NCBI Sequence Feature Definition Module
- -- by James Ostell, 1994
- --
- --**********************************************************************
- NCBI-FeatDef DEFINITIONS ::=
- BEGIN
- EXPORTS FeatDef, FeatDefSet, FeatDispGroup, FeatDispGroupSet;
- FeatDef ::= SEQUENCE {
- typelabel VisibleString , -- short label for type eg "CDS"
- menulabel VisibleString , -- label for a menu eg "Coding Region"
- featdef-key INTEGER , -- unique for this feature definition
- seqfeat-key INTEGER , -- SeqFeat.data.choice from objfeat.h
- entrygroup INTEGER , -- Group for data entry
- displaygroup INTEGER , -- Group for data display
- molgroup FeatMolType -- Type of Molecule used for
- }
- FeatMolType ::= ENUMERATED {
- aa (1), -- proteins
- na (2), -- nucleic acids
- both (3) } -- both
- FeatDefSet ::= SEQUENCE OF FeatDef -- collections of defintions
- FeatDispGroup ::= SEQUENCE {
- groupkey INTEGER ,
- groupname VisibleString }
- FeatDispGroupSet ::= SEQUENCE OF FeatDispGroup
- FeatDefGroupSet ::= SEQUENCE {
- groups FeatDispGroupSet ,
- defs FeatDefSet }
- END
-
- --$Revision: 1000.1 $
- --********************************************************************
- --
- -- Print Templates
- -- James Ostell, 1993
- --
- --
- --********************************************************************
- NCBI-ObjPrt DEFINITIONS ::=
- BEGIN
- EXPORTS PrintTemplate, PrintTemplateSet;
- PrintTemplate ::= SEQUENCE {
- name TemplateName , -- name for this template
- labelfrom VisibleString OPTIONAL, -- ASN.1 path to get label from
- format PrintFormat }
- TemplateName ::= VisibleString
- PrintTemplateSet ::= SEQUENCE OF PrintTemplate
- PrintFormat ::= SEQUENCE {
- asn1 VisibleString , -- ASN.1 partial path for this
- label VisibleString OPTIONAL , -- printable label
- prefix VisibleString OPTIONAL,
- suffix VisibleString OPTIONAL,
- form PrintForm }
- PrintForm ::= CHOICE { -- Forms for various ASN.1 components
- block PrintFormBlock,
- boolean PrintFormBoolean,
- enum PrintFormEnum,
- text PrintFormText,
- use-template TemplateName,
- user UserFormat ,
- null NULL } -- rarely used
- UserFormat ::= SEQUENCE {
- printfunc VisibleString ,
- defaultfunc VisibleString OPTIONAL }
- PrintFormBlock ::= SEQUENCE { -- for SEQUENCE, SET
- separator VisibleString OPTIONAL ,
- components SEQUENCE OF PrintFormat }
- PrintFormBoolean ::= SEQUENCE {
- true VisibleString OPTIONAL ,
- false VisibleString OPTIONAL }
- PrintFormEnum ::= SEQUENCE {
- values SEQUENCE OF VisibleString OPTIONAL }
- PrintFormText ::= SEQUENCE {
- textfunc VisibleString OPTIONAL }
-
- END
- --$Revision: 1000.1 $
- -- *********************************************************************
- --
- -- These are code and conversion tables for NCBI sequence codes
- -- ASN.1 for the sequences themselves are define in seq.asn
- --
- -- Seq-map-table and Seq-code-table REQUIRE that codes start with 0
- -- and increase continuously. So IUPAC codes, which are upper case
- -- letters will always have 65 0 cells before the codes begin. This
- -- allows all codes to do indexed lookups for things
- --
- -- Valid names for code tables are:
- -- IUPACna
- -- IUPACaa
- -- IUPACeaa
- -- IUPACaa3 3 letter amino acid codes : parallels IUPACeaa
- -- display only, not a data exchange type
- -- NCBI2na
- -- NCBI4na
- -- NCBI8na
- -- NCBI8aa
- -- NCBIstdaa
- -- probability types map to IUPAC types for display as characters
- NCBI-SeqCode DEFINITIONS ::=
- BEGIN
- EXPORTS Seq-code-table, Seq-map-table, Seq-code-set;
- Seq-code-type ::= ENUMERATED { -- sequence representations
- iupacna (1) , -- IUPAC 1 letter nuc acid code
- iupacaa (2) , -- IUPAC 1 letter amino acid code
- ncbi2na (3) , -- 2 bit nucleic acid code
- ncbi4na (4) , -- 4 bit nucleic acid code
- ncbi8na (5) , -- 8 bit extended nucleic acid code
- ncbipna (6) , -- nucleic acid probabilities
- ncbi8aa (7) , -- 8 bit extended amino acid codes
- ncbieaa (8) , -- extended ASCII 1 letter aa codes
- ncbipaa (9) , -- amino acid probabilities
- iupacaa3 (10) , -- 3 letter code only for display
- ncbistdaa (11) } -- consecutive codes for std aas, 0-25
- Seq-map-table ::= SEQUENCE { -- for tables of sequence mappings
- from Seq-code-type , -- code to map from
- to Seq-code-type , -- code to map to
- num INTEGER , -- number of rows in table
- start-at INTEGER DEFAULT 0 , -- index offset of first element
- table SEQUENCE OF INTEGER } -- table of values, in from-to order
- Seq-code-table ::= SEQUENCE { -- for names of coded values
- code Seq-code-type , -- name of code
- num INTEGER , -- number of rows in table
- one-letter BOOLEAN , -- symbol is ALWAYS 1 letter?
- start-at INTEGER DEFAULT 0 , -- index offset of first element
- table SEQUENCE OF
- SEQUENCE {
- symbol VisibleString , -- the printed symbol or letter
- name VisibleString } , -- an explanatory name or string
- comps SEQUENCE OF INTEGER OPTIONAL } -- pointers to complement nuc acid
- Seq-code-set ::= SEQUENCE { -- for distribution
- codes SET OF Seq-code-table OPTIONAL ,
- maps SET OF Seq-map-table OPTIONAL }
- END