mmdb1.asn
上传用户:yhdzpy8989
上传日期:2007-06-13
资源大小:13604k
文件大小:18k
- --
- -- ===========================================================================
- -- PRODUCTION $Log: mmdb1.asn,v $
- -- PRODUCTION Revision 1000.0 2003/10/29 21:21:26 gouriano
- -- PRODUCTION PRODUCTION: IMPORTED [ORIGINAL] Dev-tree R6.0
- -- PRODUCTION
- -- ===========================================================================
- --
- --$Revision: 1000.0 $
- --**********************************************************************
- --
- -- Biological Macromolecule 3-D Structure Data Types for MMDB,
- -- A Molecular Modeling Database
- --
- -- Definitions for a biomolecular assembly and the MMDB database
- --
- -- By Hitomi Ohkawa, Jim Ostell, Chris Hogue, and Steve Bryant
- --
- -- National Center for Biotechnology Information
- -- National Institutes of Health
- -- Bethesda, MD 20894 USA
- --
- -- July 1995
- --
- --**********************************************************************
- -- Contents of the MMDB database are currently based on files distributed by
- -- the Protein Data Bank, PDB. These data are changed in form, as described
- -- in this specification. To some extent they are also changed in content, in
- -- that many data items implicit in PDB are made explicit, and others are
- -- corrected or omitted as a consequence of validation checks. The semantics
- -- of MMDB data items are indicated by comments within the specification below.
- -- These comments explain in detail the manner in which data items from PDB
- -- have been mapped into MMDB.
- MMDB DEFINITIONS ::=
- BEGIN
- EXPORTS Biostruc, Biostruc-id, Biostruc-set, Biostruc-annot-set,
- Biostruc-residue-graph-set;
- IMPORTS Biostruc-graph, Biomol-descr, Residue-graph FROM MMDB-Chemical-graph
- Biostruc-model FROM MMDB-Structural-model
- Biostruc-feature-set FROM MMDB-Features
- Pub FROM NCBI-Pub
- Date, Object-id, Dbtag FROM NCBI-General;
- -- A structure report or "biostruc" describes the components of a biomolecular
- -- assembly in terms of their names and descriptions, and a chemical graph
- -- giving atomic formula, connectivity and chirality. It also gives one or more
- -- three-dimensional model structures, literally a mapping of the atoms,
- -- residues and/or molecules of each component into a measured three-
- -- dimensional space. Structure may also be described by named features, which
- -- associate nodes in the chemical graph, or regions in space, with text or
- -- numeric descriptors.
- -- Note that a biostruc may also contain cross references to other databases,
- -- including citations to relevant scientific literature. These cross
- -- references use object types from other NCBI data specifications, which are
- -- "imported" into MMDB, and not repeated in this specification.
- Biostruc ::= SEQUENCE {
- id SEQUENCE OF Biostruc-id,
- descr SEQUENCE OF Biostruc-descr OPTIONAL,
- chemical-graph Biostruc-graph,
- features SEQUENCE OF Biostruc-feature-set OPTIONAL,
- model SEQUENCE OF Biostruc-model OPTIONAL }
- -- A Biostruc-id is a collection identifiers for the molecular assembly.
- -- Mmdb-id's are NCBI-assigned, and are intended to be unique and stable
- -- identifiers. Other-id's are synonyms.
- Biostruc-id ::= CHOICE {
- mmdb-id Mmdb-id,
- other-database Dbtag,
- local-id Object-id }
- Mmdb-id ::= INTEGER
- -- The description of a biostruc refers to both the reported chemical and
- -- spatial structure of a biomolecular assembly. PDB-derived descriptors
- -- which refer specifically to the chemical components or spatial structure
- -- are not provided here, but instead as descriptors of the biostruc-graph or
- -- biostruc-model. For PDB-derived structures the biostruc name is the PDB
- -- id-code. PDB-derived citations appear as publications within the biostruc
- -- description, and include a data-submission citation derived from PDB AUTHOR
- -- records. Citations are described using the NCBI Pub specification.
- Biostruc-descr ::= CHOICE {
- name VisibleString,
- pdb-comment VisibleString,
- other-comment VisibleString,
- history Biostruc-history,
- attribution Pub }
- -- The history of a biostruc indicates it's origin and it's update history
- -- within MMDB, the NCBI-maintained molecular structure database.
- Biostruc-history ::= SEQUENCE {
- replaces Biostruc-replace OPTIONAL,
- replaced-by Biostruc-replace OPTIONAL,
- data-source Biostruc-source OPTIONAL }
- Biostruc-replace ::= SEQUENCE {
- id Biostruc-id,
- date Date }
- -- The origin of a biostruc is a reference to another database. PDB release
- -- date and PDB-assigned id codes are recorded here, as are the PDB-assigned
- -- entry date and replacement history.
- Biostruc-source ::= SEQUENCE {
- name-of-database VisibleString,
- version-of-database CHOICE {
- release-date Date,
- release-code VisibleString } OPTIONAL,
- database-entry-id Biostruc-id,
- database-entry-date Date,
- database-entry-history SEQUENCE OF VisibleString OPTIONAL}
- -- A biostruc set is a means to collect ASN.1 data for many biostrucs in
- -- one file, as convenient for application programs. The object type is not
- -- inteded to imply similarity of the biostrucs grouped together.
- Biostruc-set ::= SEQUENCE {
- id SEQUENCE OF Biostruc-id OPTIONAL,
- descr SEQUENCE OF Biostruc-descr OPTIONAL,
- biostrucs SEQUENCE OF Biostruc }
- -- A biostruc annotation set is a means to collect ASN.1 data for biostruc
- -- features into one file. The object type is intended as a means to store
- -- feature annotation of similar type, such as "core" definitions for a
- -- threading program, or structure-structure alignments for a structure-
- -- similarity browser.
- Biostruc-annot-set ::= SEQUENCE {
- id SEQUENCE OF Biostruc-id OPTIONAL,
- descr SEQUENCE OF Biostruc-descr OPTIONAL,
- features SEQUENCE OF Biostruc-feature-set }
- -- A biostruc residue graph set is a collection of residue graphs. The object
- -- type is intended as a means to record dictionaries containing the chemical
- -- subgraphs of "standard" residue types, which are used as a means to
- -- simplify discription of the covalent structure of a biomolecular assembly.
- -- The standard residue graph dictionary supplied with the MMDB database
- -- contains 20 standard L amino acids and 8 standard ribonucleotide groups.
- -- These graphs are complete, including explicit hydrogen atoms and separate
- -- instances for the terminal polypeptide and polynucleotide residues.
- Biostruc-residue-graph-set ::= SEQUENCE {
- id SEQUENCE OF Biostruc-id OPTIONAL,
- descr SEQUENCE OF Biomol-descr OPTIONAL,
- residue-graphs SEQUENCE OF Residue-graph }
- END
- --**********************************************************************
- --
- -- Biological Macromolecule 3-D Structure Data Types for MMDB,
- -- A Molecular Modeling Database
- --
- -- Definitions for a chemical graph
- --
- -- By Hitomi Ohkawa, Jim Ostell, Chris Hogue and Steve Bryant
- --
- -- National Center for Biotechnology Information
- -- National Institutes of Health
- -- Bethesda, MD 20894 USA
- --
- -- July, 1995
- --
- --**********************************************************************
- MMDB-Chemical-graph DEFINITIONS ::=
- BEGIN
- EXPORTS Biostruc-graph, Biomol-descr, Residue-graph,
- Molecule-id, Residue-id, Atom-id;
- IMPORTS Pub FROM NCBI-Pub
- BioSource FROM NCBI-BioSource
- Seq-id FROM NCBI-Seqloc
- Biostruc-id FROM MMDB;
- -- A biostruc graph contains the complete chemical graph of the biomolecular
- -- assembly. The assembly graph is defined hierarchically, in terms of
- -- subgraphs graphs of component molecules. For PDB-derived biostrucs,
- -- the molecules forming the assembly are the individual biopolymer chains and
- -- any non-polymer or "heterogen" groups which are present.
- -- The PDB-derived "compound name" field appears as the name within the
- -- biostruc-graph description. PDB "class" and "source" fields appear as
- -- explicit attributes. PDB-derived structures are assigned an assembly type
- -- of "other" unless they have been further classified as the "physiological
- -- form" or "crystallographic cell" contents. If they have, the source of the
- -- type classification appears as a citation within the assembly description.
- -- Note that the biostruc-graph also includes as literals the subgraphs of
- -- any nonstandard residues present within it. For PDB-derived biostrucs these
- -- subgraphs are constructed automatically, with validation as described below.
- Biostruc-graph ::= SEQUENCE {
- descr SEQUENCE OF Biomol-descr OPTIONAL,
- molecule-graphs SEQUENCE OF Molecule-graph,
- inter-molecule-bonds SEQUENCE OF Inter-residue-bond OPTIONAL,
- residue-graphs SEQUENCE OF Residue-graph OPTIONAL }
- -- A biomolecule description refers to the chemical structure of a molecule or
- -- component substructures. This descriptor type is used at the level of
- -- assemblies, molecules and residues, and also for residue-graph dictionaries.
- -- The BioSource object type is drawn from NCBI taxonomy data specifications,
- -- and is not repeated here.
- Biomol-descr ::= CHOICE {
- name VisibleString,
- pdb-class VisibleString,
- pdb-source VisibleString,
- pdb-comment VisibleString,
- other-comment VisibleString,
- organism BioSource,
- attribution Pub,
- assembly-type INTEGER { physiological-form(1),
- crystallographic-cell(2),
- other(255) },
- molecule-type INTEGER { dna(1),
- rna(2),
- protein(3),
- other-biopolymer(4),
- solvent(5),
- other-nonpolymer(6),
- other(255) } }
- -- A molecule chemical graph is defined by a sequence of residues. Nonpolymers
- -- are described in the same way, but may contain only a single residue.
- -- Biopolymer molecules are identified within PDB entries according to their
- -- appearance on SEQRES records, which formally define a biopolymer as such.
- -- Biopolymers are defined by the distinction between ATOM and HETATM
- -- coordinate records only in cases where the chemical sequence from SEQRES
- -- is in conflict with coordinate data. The PDB-assigned chain code appears as
- -- the name within the molecule descriptions of the biopolymers.
- -- Nonpolymer molecules from PDB correspond to individual HETEROGEN groups,
- -- excluding any HETEROGEN groups which represent modified biopolymer residues.
- -- These molecules are named according to the chain, residue type and residue
- -- number fields as assigned by PDB. Any description appearing in the PDB HET
- -- record appears as a pdb-comment within the molecule description.
- -- Molecule types for PDB-derived molecule graphs are assigned by matching
- -- residue and atom names against the PDB-documented standard types for protein,
- -- DNA and RNA, and against residue codes commonly used to indicate solvent.
- -- Classification is by "majority rule". If more than half of the residues in
- -- a biopolymer are standard groups of one type, then the molecule is of that
- -- type, and otherwise classified as "other". Note that this classification does
- -- not preclude the presence of modified residues, but insists they constitute
- -- less than half the biopolymer. Non-polymers are classified only as "solvent"
- -- or "other".
- -- Note that a molecule graph may also contain a set of cross references
- -- to biopolymer sequence databases. All biopolymer molecules in MMDB contain
- -- appropriate identifiers for the corresponding entry in the NCBI-Sequences
- -- database, in particular the NCBI "gi" number, which may be used for sequence
- -- retrieval. The Seq-id object type is defined in the NCBI molecular sequence
- -- specification, and not repeated here.
- Molecule-graph ::= SEQUENCE {
- id Molecule-id,
- descr SEQUENCE OF Biomol-descr OPTIONAL,
- seq-id Seq-id OPTIONAL,
- residue-sequence SEQUENCE OF Residue,
- inter-residue-bonds SEQUENCE OF Inter-residue-bond OPTIONAL }
-
- Molecule-id ::= INTEGER
- -- Residues may be assigned a text-string name as well as an id number. PDB
- -- assigned residue numbers appear as the residue name.
- Residue ::= SEQUENCE {
- id Residue-id,
- name VisibleString OPTIONAL,
- residue-graph Residue-graph-pntr }
- Residue-id ::= INTEGER
- -- Residue graphs from different sources may be referenced within a molecule
- -- graph. The allowed choices are the nonstandard residue graphs included in
- -- the present biostruc, residue graphs within other biostrucs, or residue
- -- graphs within tables of standard residue definitions.
- Residue-graph-pntr ::= CHOICE {
- local Residue-graph-id,
- biostruc Biostruc-graph-pntr,
- standard Biostruc-residue-graph-set-pntr }
-
- Biostruc-graph-pntr ::= SEQUENCE {
- biostruc-id Biostruc-id,
- residue-graph-id Residue-graph-id }
- Biostruc-residue-graph-set-pntr ::= SEQUENCE {
- biostruc-residue-graph-set-id Biostruc-id,
- residue-graph-id Residue-graph-id }
- -- Residue graphs define atomic formulae, connectivity, chirality, and names.
- -- For standard residue graphs from the MMDB dictionary the PDB-assigned
- -- residue-type code appears as the name within the residue graph description,
- -- and the full trivial name of the residue as a comment within that
- -- description. For any nonstandard residue graphs provided with an MMDB
- -- biostruc the PDB-assigned residue-type code similarly appears as the name
- -- within the description, and any information provided on PDB HET records as
- -- a pdb-comment within that description.
- -- Note that nonstandard residue graphs for a PDB-derived biostruc may be
- -- incomplete. Current PDB format cannot represent connectivity for groups
- -- which are disordered, and for which no coordinates are given. In these
- -- cases the residue graph defined in MMDB represents only the subgraph that
- -- could be identified from available ATOM, HETATM and CONECT records.
- Residue-graph ::= SEQUENCE {
- id Residue-graph-id,
- descr SEQUENCE OF Biomol-descr OPTIONAL,
- residue-type INTEGER { deoxyribonucleotide(1),
- ribonucleotide(2),
- amino-acid(3),
- other(255) } OPTIONAL,
- iupac-code SEQUENCE OF VisibleString OPTIONAL,
- atoms SEQUENCE OF Atom,
- bonds SEQUENCE OF Intra-residue-bond,
- chiral-centers SEQUENCE OF Chiral-center OPTIONAL }
-
- Residue-graph-id ::= INTEGER
- -- Atoms in residue graphs are defined by elemental symbols and names. PDB-
- -- assigned atom names appear here in the name field, except in cases of known
- -- PDB synonyms. In these cases atom names are mapped to the names used in the
- -- MMDB standard dictionary. This occurs primarily for hydrogen atoms, where
- -- PDB practice allows synonyms for several atom types. For PDB atoms the
- -- elemental symbol is obtained by parsing the PDB atom name field, allowing
- -- for known special-semantics cases where the atom name does not follow the
- -- documented encoding rule. Ionizable protons are identified within standard
- -- residue graphs in the MMDB dictionary, but not within automatically-defined
- -- nonstandard graphs.
- Atom ::= SEQUENCE {
- id Atom-id,
- name VisibleString OPTIONAL,
- iupac-code SEQUENCE OF VisibleString OPTIONAL,
- element ENUMERATED {
- h(1), he(2), li(3), be(4), b(5),
- c(6), n(7), o(8), f(9), ne(10),
- na(11), mg(12), al(13), si(14), p(15),
- s(16), cl(17), ar(18), k(19), ca(20),
- sc(21), ti(22), v(23), cr(24), mn(25),
- fe(26), co(27), ni(28), cu(29), zn(30),
- ga(31), ge(32), as(33), se(34), br(35),
- kr(36), rb(37), sr(38), y(39), zr(40),
- nb(41), mo(42), tc(43), ru(44), rh(45),
- pd(46), ag(47), cd(48), in(49), sn(50),
- sb(51), te(52), i(53), xe(54), cs(55),
- ba(56), la(57), ce(58), pr(59), nd(60),
- pm(61), sm(62), eu(63), gd(64), tb(65),
- dy(66), ho(67), er(68), tm(69), yb(70),
- lu(71), hf(72), ta(73), w(74), re(75),
- os(76), ir(77), pt(78), au(79), hg(80),
- tl(81), pb(82), bi(83), po(84), at(85),
- rn(86), fr(87), ra(88), ac(89), th(90),
- pa(91), u(92), np(93), pu(94), am(95),
- cm(96), bk(97), cf(98), es(99),
- fm(100), md(101), no(102), lr(103),
- other(254), unknown(255) },
- ionizable-proton ENUMERATED {
- true(1),
- false(2),
- unknown(255) } OPTIONAL }
-
- Atom-id ::= INTEGER
- -- Intra-residue-bond specifies connectivity between atoms in Residue-graph.
- -- Unlike Inter-residue-bond defined later, its participating atoms are part of
- -- a residue subgraph dictionary, not part of a specific biostruc-graph.
- -- For residue graphs in the standard MMDB dictionary bonds are defined from
- -- the known chemical structures of amino acids and nucleotides. For
- -- nonstandard residue graphs bonds are defined from PDB CONECT records, with
- -- validation for consistency with coordinate data, and from stereochemical
- -- calculation to identify unreported bonds. Validation and bond identification
- -- are based on comparison of inter-atomic distances to the sum of covalent
- -- radii for the corresponding elements.
- Intra-residue-bond ::= SEQUENCE {
- atom-id-1 Atom-id,
- atom-id-2 Atom-id,
- bond-order INTEGER {
- single(1),
- partial-double(2),
- aromatic(3),
- double(4),
- triple(5),
- other(6),
- unknown(255)} OPTIONAL }
- -- Chiral centers are atoms with tetrahedral geometry. Chirality is defined
- -- by a chiral volume involving the chiral center and 3 other atoms bonded to
- -- it. For any coordinates assigned to atoms c, n1, n2, and n3, the vector
- -- triple product (n1-c) dot ( (n2-c) cross (n3-c) ) must have the indicated
- -- sign. The calculation assumes an orthogonal right-handed coordinate system
- -- as is used for MMDB model structures.
- -- Chirality is defined for standard residues in the MMDB dictionary, but is
- -- not assigned automatically for PDB-derived nonstandard residues. If assigned
- -- for nonstandard residues, the source of chirality information is described
- -- by a citation within the residue description.
- Chiral-center ::= SEQUENCE {
- c Atom-id,
- n1 Atom-id,
- n2 Atom-id,
- n3 Atom-id,
- sign ENUMERATED { positive(1),
- negative(2) } }
- -- Inter-residue bonds are defined by a reference to two atoms. For PDB-derived
- -- structures bonds are identified from biopolymer connectivity according to
- -- SEQRES and from other connectivity information on SSBOND and CONECT
- -- records. These data are validated and unreported bonds identified by
- -- stereochemical calculation, using the same criteria as for intra-residue
- -- bonds.
- Inter-residue-bond ::= SEQUENCE {
- atom-id-1 Atom-pntr,
- atom-id-2 Atom-pntr,
- bond-order INTEGER {
- single(1),
- partial-double(2),
- aromatic(3),
- double(4),
- triple(5),
- other(6),
- unknown(255)} OPTIONAL }
- -- Atoms, residues and molecules within the current biostruc are referenced
- -- by hierarchical pointers.
- Atom-pntr ::= SEQUENCE {
- molecule-id Molecule-id,
- residue-id Residue-id,
- atom-id Atom-id }
- Atom-pntr-set ::= SEQUENCE OF Atom-pntr
- END