生物技术

开发平台：

C/C++

all.asn：源码内容

enum Num-enum , -- enumerated names for residues
ref Num-ref , -- by reference to another sequence
real Num-real } -- supports mapping to a float system
Num-cont ::= SEQUENCE { -- continuous display numbering system
refnum INTEGER DEFAULT 1, -- number assigned to first residue
has-zero BOOLEAN DEFAULT FALSE , -- 0 used?
ascending BOOLEAN DEFAULT TRUE } -- ascending numbers?
Num-enum ::= SEQUENCE { -- any tags to residues
num INTEGER , -- number of tags to follow
names SEQUENCE OF VisibleString } -- the tags
Num-ref ::= SEQUENCE { -- by reference to other sequences
type ENUMERATED { -- type of reference
not-set (0) ,
sources (1) , -- by segmented or const seq sources
aligns (2) } , -- by alignments given below
aligns Seq-align OPTIONAL }
Num-real ::= SEQUENCE { -- mapping to floating point system
a REAL , -- from an integer system used by Bioseq
b REAL , -- position = (a * int_position) + b
units VisibleString OPTIONAL }
Pubdesc ::= SEQUENCE { -- how sequence presented in pub
pub Pub-equiv , -- the citation(s)
name VisibleString OPTIONAL , -- name used in paper
fig VisibleString OPTIONAL , -- figure in paper
num Numbering OPTIONAL , -- numbering from paper
numexc BOOLEAN OPTIONAL , -- numbering problem with paper
poly-a BOOLEAN OPTIONAL , -- poly A tail indicated in figure?
maploc VisibleString OPTIONAL , -- map location reported in paper
seq-raw StringStore OPTIONAL , -- original sequence from paper
align-group INTEGER OPTIONAL , -- this seq aligned with others in paper
comment VisibleString OPTIONAL, -- any comment on this pub in context
reftype INTEGER { -- type of reference in a GenBank record
seq (0) , -- refers to sequence
sites (1) , -- refers to unspecified features
feats (2) , -- refers to specified features
no-target (3) } -- nothing specified (EMBL)
DEFAULT seq }
Heterogen ::= VisibleString -- cofactor, prosthetic group, inibitor, etc
--*** Instances of sequences *******************************
--*
Seq-inst ::= SEQUENCE { -- the sequence data itself
repr ENUMERATED { -- representation class
not-set (0) , -- empty
virtual (1) , -- no seq data
raw (2) , -- continuous sequence
seg (3) , -- segmented sequence
const (4) , -- constructed sequence
ref (5) , -- reference to another sequence
consen (6) , -- consensus sequence or pattern
map (7) , -- ordered map of any kind
delta (8) , -- sequence made by changes (delta) to others
other (255) } ,
mol ENUMERATED { -- molecule class in living organism
not-set (0) , -- > cdna = rna
dna (1) ,
rna (2) ,
aa (3) ,
na (4) , -- just a nucleic acid
other (255) } ,
length INTEGER OPTIONAL , -- length of sequence in residues
fuzz Int-fuzz OPTIONAL , -- length uncertainty
topology ENUMERATED { -- topology of molecule
not-set (0) ,
linear (1) ,
circular (2) ,
tandem (3) , -- some part of tandem repeat
other (255) } DEFAULT linear ,
strand ENUMERATED { -- strandedness in living organism
not-set (0) ,
ss (1) , -- single strand
ds (2) , -- double strand
mixed (3) ,
other (255) } OPTIONAL , -- default ds for DNA, ss for RNA, pept
seq-data Seq-data OPTIONAL , -- the sequence
ext Seq-ext OPTIONAL , -- extensions for special types
hist Seq-hist OPTIONAL } -- sequence history
--*** Sequence Extensions **********************************
--* for representing more complex types
--* const type uses Seq-hist.assembly
Seq-ext ::= CHOICE {
seg Seg-ext , -- segmented sequences
ref Ref-ext , -- hot link to another sequence (a view)
map Map-ext , -- ordered map of markers
delta Delta-ext }
Seg-ext ::= SEQUENCE OF Seq-loc
Ref-ext ::= Seq-loc
Map-ext ::= SEQUENCE OF Seq-feat
Delta-ext ::= SEQUENCE OF Delta-seq
Delta-seq ::= CHOICE {
loc Seq-loc , -- point to a sequence
literal Seq-literal } -- a piece of sequence
Seq-literal ::= SEQUENCE {
length INTEGER , -- must give a length in residues
fuzz Int-fuzz OPTIONAL , -- could be unsure
seq-data Seq-data OPTIONAL } -- may have the data
--*** Sequence History Record ***********************************
--** assembly = records how seq was assembled from others
--** replaces = records sequences made obsolete by this one
--** replaced-by = this seq is made obsolete by another(s)
Seq-hist ::= SEQUENCE {
assembly SET OF Seq-align OPTIONAL ,-- how was this assembled?
replaces Seq-hist-rec OPTIONAL , -- seq makes these seqs obsolete
replaced-by Seq-hist-rec OPTIONAL , -- these seqs make this one obsolete
deleted CHOICE {
bool BOOLEAN ,
date Date } OPTIONAL }
Seq-hist-rec ::= SEQUENCE {
date Date OPTIONAL ,
ids SET OF Seq-id }
--*** Various internal sequence representations ************
--* all are controlled, fixed length forms
Seq-data ::= CHOICE { -- sequence representations
iupacna IUPACna , -- IUPAC 1 letter nuc acid code
iupacaa IUPACaa , -- IUPAC 1 letter amino acid code
ncbi2na NCBI2na , -- 2 bit nucleic acid code
ncbi4na NCBI4na , -- 4 bit nucleic acid code
ncbi8na NCBI8na , -- 8 bit extended nucleic acid code
ncbipna NCBIpna , -- nucleic acid probabilities
ncbi8aa NCBI8aa , -- 8 bit extended amino acid codes
ncbieaa NCBIeaa , -- extended ASCII 1 letter aa codes
ncbipaa NCBIpaa , -- amino acid probabilities
ncbistdaa NCBIstdaa } -- consecutive codes for std aas
IUPACna ::= StringStore -- IUPAC 1 letter codes, no spaces
IUPACaa ::= StringStore -- IUPAC 1 letter codes, no spaces
NCBI2na ::= OCTET STRING -- 00=A, 01=C, 10=G, 11=T
NCBI4na ::= OCTET STRING -- 1 bit each for agct
-- 0001=A, 0010=C, 0100=G, 1000=T/U
-- 0101=Purine, 1010=Pyrimidine, etc
NCBI8na ::= OCTET STRING -- for modified nucleic acids
NCBIpna ::= OCTET STRING -- 5 octets/base, prob for a,c,g,t,n
-- probabilities are coded 0-255 = 0.0-1.0
NCBI8aa ::= OCTET STRING -- for modified amino acids
NCBIeaa ::= StringStore -- ASCII extended 1 letter aa codes
-- IUPAC codes + U=selenocysteine
NCBIpaa ::= OCTET STRING -- 25 octets/aa, prob for IUPAC aas in order:
-- A-Y,B,Z,X,(ter),anything
-- probabilities are coded 0-255 = 0.0-1.0
NCBIstdaa ::= OCTET STRING -- codes 0-25, 1 per byte
--*** Sequence Annotation *************************************
--*
Annot-id ::= CHOICE {
local Object-id ,
ncbi INTEGER ,
general Dbtag }
Annot-descr ::= SET OF Annotdesc
Annotdesc ::= CHOICE {
name VisibleString , -- a short name for this collection
title VisibleString , -- a title for this collection
comment VisibleString , -- a more extensive comment
pub Pubdesc , -- a reference to the publication
user User-object , -- user defined object
create-date Date , -- date entry first created/released
update-date Date , -- date of last update
src Seq-id , -- source sequence from which annot came
align Align-def, -- definition of the SeqAligns
region Seq-loc } -- all contents cover this region
Align-def ::= SEQUENCE {
align-type INTEGER { -- class of align Seq-annot
ref (1) , -- set of alignments to the same sequence
alt (2) , -- set of alternate alignments of the same seqs
blocks (3) , -- set of aligned blocks in the same seqs
other (255) } ,
ids SET OF Seq-id OPTIONAL } -- used for the one ref seqid for now
Seq-annot ::= SEQUENCE {
id SET OF Annot-id OPTIONAL ,
db INTEGER { -- source of annotation
genbank (1) ,
embl (2) ,
ddbj (3) ,
pir (4) ,
sp (5) ,
bbone (6) ,
pdb (7) ,
other (255) } OPTIONAL ,
name VisibleString OPTIONAL ,-- source if "other" above
desc Annot-descr OPTIONAL , -- used only for stand alone Seq-annots
data CHOICE {
ftable SET OF Seq-feat ,
align SET OF Seq-align ,
graph SET OF Seq-graph ,
ids SET OF Seq-id , -- used for communication between tools
locs SET OF Seq-loc } } -- used for communication between tools
END
--$Revision: 1000.1 $
--********************************************************************
--
-- Direct Submission of Sequence Data
-- James Ostell, 1991
--
-- This is a trial specification for direct submission of sequence
-- data worked out between NCBI and EMBL
-- Later revised to reflect work with GenBank and Integrated database
--
-- Version 3.0, 1994
-- This is the official NCBI sequence submission format now.
--
--********************************************************************
NCBI-Submit DEFINITIONS ::=
BEGIN
EXPORTS Seq-submit, Contact-info;
IMPORTS Cit-sub, Author FROM NCBI-Biblio
Date, Object-id FROM NCBI-General
Seq-annot FROM NCBI-Sequence
Seq-id FROM NCBI-Seqloc
Seq-entry FROM NCBI-Seqset;
Seq-submit ::= SEQUENCE {
sub Submit-block ,
data CHOICE {
entrys SET OF Seq-entry , -- sequence(s)
annots SET OF Seq-annot , -- annotation(s)
delete SET OF Seq-id } } -- deletions of entries
Submit-block ::= SEQUENCE {
contact Contact-info , -- who to contact
cit Cit-sub , -- citation for this submission
hup BOOLEAN DEFAULT FALSE , -- hold until publish
reldate Date OPTIONAL , -- release by date
subtype INTEGER { -- type of submission
new (1) , -- new data
update (2) , -- update by author
revision (3) , -- 3rd party (non-author) update
other (255) } OPTIONAL ,
tool VisibleString OPTIONAL, -- tool used to make submission
user-tag VisibleString OPTIONAL, -- user supplied id for this submission
comment VisibleString OPTIONAL } -- user comments/advice to database
Contact-info ::= SEQUENCE { -- who to contact to discuss the submission
name VisibleString OPTIONAL , -- OBSOLETE: will be removed
address SEQUENCE OF VisibleString OPTIONAL ,
phone VisibleString OPTIONAL ,
fax VisibleString OPTIONAL ,
email VisibleString OPTIONAL ,
telex VisibleString OPTIONAL ,
owner-id Object-id OPTIONAL , -- for owner accounts
password OCTET STRING OPTIONAL ,
last-name VisibleString OPTIONAL , -- structured to replace name above
first-name VisibleString OPTIONAL ,
middle-initial VisibleString OPTIONAL ,
contact Author OPTIONAL } -- WARNING: this will replace the above
END
--$Revision: 1000.1 $
--****************************************************************
--
-- NCBI Project Definition Module
-- by Jim Ostell and Jonathan Kans, 1998
--
--****************************************************************
NCBI-Project DEFINITIONS ::=
BEGIN
EXPORTS Project, Project-item;
IMPORTS Date FROM NCBI-General
PubMedId FROM NCBI-Biblio
Seq-id, Seq-loc FROM NCBI-Seqloc
Seq-annot, Pubdesc FROM NCBI-Sequence
Seq-entry FROM NCBI-Seqset
Pubmed-entry FROM NCBI-PubMed;
Project ::= SEQUENCE {
descr Project-descr OPTIONAL ,
data Project-item }
Project-item ::= CHOICE {
pmuid SET OF INTEGER ,
protuid SET OF INTEGER ,
nucuid SET OF INTEGER ,
sequid SET OF INTEGER ,
genomeuid SET OF INTEGER ,
structuid SET OF INTEGER ,
pmid SET OF PubMedId ,
protid SET OF Seq-id ,
nucid SET OF Seq-id ,
seqid SET OF Seq-id ,
genomeid SET OF Seq-id ,
structid NULL ,
pment SET OF Pubmed-entry ,
protent SET OF Seq-entry ,
nucent SET OF Seq-entry ,
seqent SET OF Seq-entry ,
genomeent SET OF Seq-entry ,
structent NULL ,
seqannot SET OF Seq-annot ,
loc SET OF Seq-loc ,
proj SET OF Project
}
Project-descr ::= SEQUENCE {
id SET OF Project-id ,
name VisibleString OPTIONAL ,
descr SET OF Projdesc OPTIONAL }
Projdesc ::= CHOICE {
pub Pubdesc ,
date Date ,
comment VisibleString ,
title VisibleString
}
Project-id ::= VisibleString
END
--$Revision: 1000.1 $
--**********************************************************************
--
-- Biological Macromolecule 3-D Structure Data Types for MMDB,
-- A Molecular Modeling Database
--
-- Definitions for a biomolecular assembly and the MMDB database
--
-- By Hitomi Ohkawa, Jim Ostell, Chris Hogue, and Steve Bryant
--
-- National Center for Biotechnology Information
-- National Institutes of Health
-- Bethesda, MD 20894 USA
--
-- July 1995
--
--**********************************************************************
-- Contents of the MMDB database are currently based on files distributed by
-- the Protein Data Bank, PDB. These data are changed in form, as described
-- in this specification. To some extent they are also changed in content, in
-- that many data items implicit in PDB are made explicit, and others are
-- corrected or omitted as a consequence of validation checks. The semantics
-- of MMDB data items are indicated by comments within the specification below.
-- These comments explain in detail the manner in which data items from PDB
-- have been mapped into MMDB.
MMDB DEFINITIONS ::=
BEGIN
EXPORTS Biostruc, Biostruc-id, Biostruc-set, Biostruc-annot-set,
Biostruc-residue-graph-set;
IMPORTS Biostruc-graph, Biomol-descr, Residue-graph FROM MMDB-Chemical-graph
Biostruc-model FROM MMDB-Structural-model
Biostruc-feature-set FROM MMDB-Features
Pub FROM NCBI-Pub
Date, Object-id, Dbtag FROM NCBI-General;
-- A structure report or "biostruc" describes the components of a biomolecular
-- assembly in terms of their names and descriptions, and a chemical graph
-- giving atomic formula, connectivity and chirality. It also gives one or more
-- three-dimensional model structures, literally a mapping of the atoms,
-- residues and/or molecules of each component into a measured three-
-- dimensional space. Structure may also be described by named features, which
-- associate nodes in the chemical graph, or regions in space, with text or
-- numeric descriptors.
-- Note that a biostruc may also contain cross references to other databases,
-- including citations to relevant scientific literature. These cross
-- references use object types from other NCBI data specifications, which are
-- "imported" into MMDB, and not repeated in this specification.
Biostruc ::= SEQUENCE {
id SEQUENCE OF Biostruc-id,
descr SEQUENCE OF Biostruc-descr OPTIONAL,
chemical-graph Biostruc-graph,
features SEQUENCE OF Biostruc-feature-set OPTIONAL,
model SEQUENCE OF Biostruc-model OPTIONAL }
-- A Biostruc-id is a collection identifiers for the molecular assembly.
-- Mmdb-id's are NCBI-assigned, and are intended to be unique and stable
-- identifiers. Other-id's are synonyms.
Biostruc-id ::= CHOICE {
mmdb-id Mmdb-id,
other-database Dbtag,
local-id Object-id }
Mmdb-id ::= INTEGER
-- The description of a biostruc refers to both the reported chemical and
-- spatial structure of a biomolecular assembly. PDB-derived descriptors
-- which refer specifically to the chemical components or spatial structure
-- are not provided here, but instead as descriptors of the biostruc-graph or
-- biostruc-model. For PDB-derived structures the biostruc name is the PDB
-- id-code. PDB-derived citations appear as publications within the biostruc
-- description, and include a data-submission citation derived from PDB AUTHOR
-- records. Citations are described using the NCBI Pub specification.
Biostruc-descr ::= CHOICE {
name VisibleString,
pdb-comment VisibleString,
other-comment VisibleString,
history Biostruc-history,
attribution Pub }
-- The history of a biostruc indicates it's origin and it's update history
-- within MMDB, the NCBI-maintained molecular structure database.
Biostruc-history ::= SEQUENCE {
replaces Biostruc-replace OPTIONAL,
replaced-by Biostruc-replace OPTIONAL,
data-source Biostruc-source OPTIONAL }
Biostruc-replace ::= SEQUENCE {
id Biostruc-id,
date Date }
-- The origin of a biostruc is a reference to another database. PDB release
-- date and PDB-assigned id codes are recorded here, as are the PDB-assigned
-- entry date and replacement history.
Biostruc-source ::= SEQUENCE {
name-of-database VisibleString,
version-of-database CHOICE {
release-date Date,
release-code VisibleString } OPTIONAL,
database-entry-id Biostruc-id,
database-entry-date Date,
database-entry-history SEQUENCE OF VisibleString OPTIONAL}
-- A biostruc set is a means to collect ASN.1 data for many biostrucs in
-- one file, as convenient for application programs. The object type is not
-- inteded to imply similarity of the biostrucs grouped together.
Biostruc-set ::= SEQUENCE {
id SEQUENCE OF Biostruc-id OPTIONAL,
descr SEQUENCE OF Biostruc-descr OPTIONAL,
biostrucs SEQUENCE OF Biostruc }
-- A biostruc annotation set is a means to collect ASN.1 data for biostruc
-- features into one file. The object type is intended as a means to store
-- feature annotation of similar type, such as "core" definitions for a
-- threading program, or structure-structure alignments for a structure-
-- similarity browser.
Biostruc-annot-set ::= SEQUENCE {
id SEQUENCE OF Biostruc-id OPTIONAL,
descr SEQUENCE OF Biostruc-descr OPTIONAL,
features SEQUENCE OF Biostruc-feature-set }
-- A biostruc residue graph set is a collection of residue graphs. The object
-- type is intended as a means to record dictionaries containing the chemical
-- subgraphs of "standard" residue types, which are used as a means to
-- simplify discription of the covalent structure of a biomolecular assembly.
-- The standard residue graph dictionary supplied with the MMDB database
-- contains 20 standard L amino acids and 8 standard ribonucleotide groups.
-- These graphs are complete, including explicit hydrogen atoms and separate
-- instances for the terminal polypeptide and polynucleotide residues.
Biostruc-residue-graph-set ::= SEQUENCE {
id SEQUENCE OF Biostruc-id OPTIONAL,
descr SEQUENCE OF Biomol-descr OPTIONAL,
residue-graphs SEQUENCE OF Residue-graph }
END
--**********************************************************************
--
-- Biological Macromolecule 3-D Structure Data Types for MMDB,
-- A Molecular Modeling Database
--
-- Definitions for a chemical graph
--
-- By Hitomi Ohkawa, Jim Ostell, Chris Hogue and Steve Bryant
--
-- National Center for Biotechnology Information
-- National Institutes of Health
-- Bethesda, MD 20894 USA
--
-- July, 1995
--
--**********************************************************************
MMDB-Chemical-graph DEFINITIONS ::=
BEGIN
EXPORTS Biostruc-graph, Biomol-descr, Residue-graph,
Molecule-id, Residue-id, Atom-id;
IMPORTS Pub FROM NCBI-Pub
BioSource FROM NCBI-BioSource
Seq-id FROM NCBI-Seqloc
Biostruc-id FROM MMDB;
-- A biostruc graph contains the complete chemical graph of the biomolecular
-- assembly. The assembly graph is defined hierarchically, in terms of
-- subgraphs graphs of component molecules. For PDB-derived biostrucs,
-- the molecules forming the assembly are the individual biopolymer chains and
-- any non-polymer or "heterogen" groups which are present.
-- The PDB-derived "compound name" field appears as the name within the
-- biostruc-graph description. PDB "class" and "source" fields appear as
-- explicit attributes. PDB-derived structures are assigned an assembly type
-- of "other" unless they have been further classified as the "physiological
-- form" or "crystallographic cell" contents. If they have, the source of the
-- type classification appears as a citation within the assembly description.
-- Note that the biostruc-graph also includes as literals the subgraphs of
-- any nonstandard residues present within it. For PDB-derived biostrucs these
-- subgraphs are constructed automatically, with validation as described below.
Biostruc-graph ::= SEQUENCE {
descr SEQUENCE OF Biomol-descr OPTIONAL,
molecule-graphs SEQUENCE OF Molecule-graph,
inter-molecule-bonds SEQUENCE OF Inter-residue-bond OPTIONAL,
residue-graphs SEQUENCE OF Residue-graph OPTIONAL }
-- A biomolecule description refers to the chemical structure of a molecule or
-- component substructures. This descriptor type is used at the level of
-- assemblies, molecules and residues, and also for residue-graph dictionaries.
-- The BioSource object type is drawn from NCBI taxonomy data specifications,
-- and is not repeated here.
Biomol-descr ::= CHOICE {
name VisibleString,
pdb-class VisibleString,
pdb-source VisibleString,
pdb-comment VisibleString,
other-comment VisibleString,
organism BioSource,
attribution Pub,
assembly-type INTEGER { physiological-form(1),
crystallographic-cell(2),
other(255) },
molecule-type INTEGER { dna(1),
rna(2),
protein(3),
other-biopolymer(4),
solvent(5),
other-nonpolymer(6),
other(255) } }
-- A molecule chemical graph is defined by a sequence of residues. Nonpolymers
-- are described in the same way, but may contain only a single residue.
-- Biopolymer molecules are identified within PDB entries according to their
-- appearance on SEQRES records, which formally define a biopolymer as such.
-- Biopolymers are defined by the distinction between ATOM and HETATM
-- coordinate records only in cases where the chemical sequence from SEQRES
-- is in conflict with coordinate data. The PDB-assigned chain code appears as
-- the name within the molecule descriptions of the biopolymers.
-- Nonpolymer molecules from PDB correspond to individual HETEROGEN groups,
-- excluding any HETEROGEN groups which represent modified biopolymer residues.
-- These molecules are named according to the chain, residue type and residue
-- number fields as assigned by PDB. Any description appearing in the PDB HET
-- record appears as a pdb-comment within the molecule description.
-- Molecule types for PDB-derived molecule graphs are assigned by matching
-- residue and atom names against the PDB-documented standard types for protein,
-- DNA and RNA, and against residue codes commonly used to indicate solvent.
-- Classification is by "majority rule". If more than half of the residues in
-- a biopolymer are standard groups of one type, then the molecule is of that
-- type, and otherwise classified as "other". Note that this classification does
-- not preclude the presence of modified residues, but insists they constitute
-- less than half the biopolymer. Non-polymers are classified only as "solvent"
-- or "other".
-- Note that a molecule graph may also contain a set of cross references
-- to biopolymer sequence databases. All biopolymer molecules in MMDB contain
-- appropriate identifiers for the corresponding entry in the NCBI-Sequences
-- database, in particular the NCBI "gi" number, which may be used for sequence
-- retrieval. The Seq-id object type is defined in the NCBI molecular sequence
-- specification, and not repeated here.
Molecule-graph ::= SEQUENCE {
id Molecule-id,
descr SEQUENCE OF Biomol-descr OPTIONAL,
seq-id Seq-id OPTIONAL,
residue-sequence SEQUENCE OF Residue,
inter-residue-bonds SEQUENCE OF Inter-residue-bond OPTIONAL }
Molecule-id ::= INTEGER
-- Residues may be assigned a text-string name as well as an id number. PDB
-- assigned residue numbers appear as the residue name.
Residue ::= SEQUENCE {
id Residue-id,
name VisibleString OPTIONAL,
residue-graph Residue-graph-pntr }
Residue-id ::= INTEGER
-- Residue graphs from different sources may be referenced within a molecule
-- graph. The allowed choices are the nonstandard residue graphs included in
-- the present biostruc, residue graphs within other biostrucs, or residue
-- graphs within tables of standard residue definitions.
Residue-graph-pntr ::= CHOICE {
local Residue-graph-id,
biostruc Biostruc-graph-pntr,
standard Biostruc-residue-graph-set-pntr }
Biostruc-graph-pntr ::= SEQUENCE {
biostruc-id Biostruc-id,
residue-graph-id Residue-graph-id }
Biostruc-residue-graph-set-pntr ::= SEQUENCE {
biostruc-residue-graph-set-id Biostruc-id,
residue-graph-id Residue-graph-id }
-- Residue graphs define atomic formulae, connectivity, chirality, and names.
-- For standard residue graphs from the MMDB dictionary the PDB-assigned
-- residue-type code appears as the name within the residue graph description,
-- and the full trivial name of the residue as a comment within that
-- description. For any nonstandard residue graphs provided with an MMDB
-- biostruc the PDB-assigned residue-type code similarly appears as the name
-- within the description, and any information provided on PDB HET records as
-- a pdb-comment within that description.
-- Note that nonstandard residue graphs for a PDB-derived biostruc may be
-- incomplete. Current PDB format cannot represent connectivity for groups
-- which are disordered, and for which no coordinates are given. In these
-- cases the residue graph defined in MMDB represents only the subgraph that
-- could be identified from available ATOM, HETATM and CONECT records.
Residue-graph ::= SEQUENCE {
id Residue-graph-id,
descr SEQUENCE OF Biomol-descr OPTIONAL,
residue-type INTEGER { deoxyribonucleotide(1),
ribonucleotide(2),
amino-acid(3),
other(255) } OPTIONAL,
iupac-code SEQUENCE OF VisibleString OPTIONAL,
atoms SEQUENCE OF Atom,
bonds SEQUENCE OF Intra-residue-bond,
chiral-centers SEQUENCE OF Chiral-center OPTIONAL }
Residue-graph-id ::= INTEGER
-- Atoms in residue graphs are defined by elemental symbols and names. PDB-
-- assigned atom names appear here in the name field, except in cases of known
-- PDB synonyms. In these cases atom names are mapped to the names used in the
-- MMDB standard dictionary. This occurs primarily for hydrogen atoms, where
-- PDB practice allows synonyms for several atom types. For PDB atoms the
-- elemental symbol is obtained by parsing the PDB atom name field, allowing
-- for known special-semantics cases where the atom name does not follow the
-- documented encoding rule. Ionizable protons are identified within standard
-- residue graphs in the MMDB dictionary, but not within automatically-defined
-- nonstandard graphs.
Atom ::= SEQUENCE {
id Atom-id,
name VisibleString OPTIONAL,
iupac-code SEQUENCE OF VisibleString OPTIONAL,
element ENUMERATED {
h(1), he(2), li(3), be(4), b(5),
c(6), n(7), o(8), f(9), ne(10),
na(11), mg(12), al(13), si(14), p(15),
s(16), cl(17), ar(18), k(19), ca(20),
sc(21), ti(22), v(23), cr(24), mn(25),
fe(26), co(27), ni(28), cu(29), zn(30),
ga(31), ge(32), as(33), se(34), br(35),
kr(36), rb(37), sr(38), y(39), zr(40),
nb(41), mo(42), tc(43), ru(44), rh(45),
pd(46), ag(47), cd(48), in(49), sn(50),
sb(51), te(52), i(53), xe(54), cs(55),
ba(56), la(57), ce(58), pr(59), nd(60),
pm(61), sm(62), eu(63), gd(64), tb(65),
dy(66), ho(67), er(68), tm(69), yb(70),
lu(71), hf(72), ta(73), w(74), re(75),
os(76), ir(77), pt(78), au(79), hg(80),
tl(81), pb(82), bi(83), po(84), at(85),
rn(86), fr(87), ra(88), ac(89), th(90),
pa(91), u(92), np(93), pu(94), am(95),
cm(96), bk(97), cf(98), es(99),
fm(100), md(101), no(102), lr(103),
other(254), unknown(255) },
ionizable-proton ENUMERATED {
true(1),
false(2),
unknown(255) } OPTIONAL }
Atom-id ::= INTEGER
-- Intra-residue-bond specifies connectivity between atoms in Residue-graph.
-- Unlike Inter-residue-bond defined later, its participating atoms are part of
-- a residue subgraph dictionary, not part of a specific biostruc-graph.
-- For residue graphs in the standard MMDB dictionary bonds are defined from
-- the known chemical structures of amino acids and nucleotides. For
-- nonstandard residue graphs bonds are defined from PDB CONECT records, with
-- validation for consistency with coordinate data, and from stereochemical
-- calculation to identify unreported bonds. Validation and bond identification
-- are based on comparison of inter-atomic distances to the sum of covalent
-- radii for the corresponding elements.
Intra-residue-bond ::= SEQUENCE {
atom-id-1 Atom-id,
atom-id-2 Atom-id,
bond-order INTEGER {
single(1),
partial-double(2),
aromatic(3),
double(4),
triple(5),
other(6),
unknown(255)} OPTIONAL }
-- Chiral centers are atoms with tetrahedral geometry. Chirality is defined
-- by a chiral volume involving the chiral center and 3 other atoms bonded to
-- it. For any coordinates assigned to atoms c, n1, n2, and n3, the vector
-- triple product (n1-c) dot ( (n2-c) cross (n3-c) ) must have the indicated
-- sign. The calculation assumes an orthogonal right-handed coordinate system
-- as is used for MMDB model structures.
-- Chirality is defined for standard residues in the MMDB dictionary, but is
-- not assigned automatically for PDB-derived nonstandard residues. If assigned
-- for nonstandard residues, the source of chirality information is described
-- by a citation within the residue description.
Chiral-center ::= SEQUENCE {
c Atom-id,
n1 Atom-id,
n2 Atom-id,
n3 Atom-id,
sign ENUMERATED { positive(1),
negative(2) } }
-- Inter-residue bonds are defined by a reference to two atoms. For PDB-derived
-- structures bonds are identified from biopolymer connectivity according to
-- SEQRES and from other connectivity information on SSBOND and CONECT
-- records. These data are validated and unreported bonds identified by
-- stereochemical calculation, using the same criteria as for intra-residue
-- bonds.
Inter-residue-bond ::= SEQUENCE {
atom-id-1 Atom-pntr,
atom-id-2 Atom-pntr,
bond-order INTEGER {
single(1),
partial-double(2),
aromatic(3),
double(4),
triple(5),
other(6),
unknown(255)} OPTIONAL }
-- Atoms, residues and molecules within the current biostruc are referenced
-- by hierarchical pointers.
Atom-pntr ::= SEQUENCE {
molecule-id Molecule-id,
residue-id Residue-id,
atom-id Atom-id }
Atom-pntr-set ::= SEQUENCE OF Atom-pntr
END
--$Revision: 1000.1 $
--**********************************************************************
--
-- Biological Macromolecule 3-D Structure Data Types for MMDB,
-- A Molecular Modeling Database
--
-- Definitions for structural models
--
-- By Hitomi Ohkawa, Jim Ostell, Chris Hogue and Steve Bryant
--
-- National Center for Biotechnology Information
-- National Institutes of Health
-- Bethesda, MD 20894 USA
--
-- July, 1996
--
--**********************************************************************
MMDB-Structural-model DEFINITIONS ::=
BEGIN
EXPORTS Biostruc-model, Model-id, Model-coordinate-set-id;
IMPORTS Chem-graph-pntrs, Atom-pntrs, Chem-graph-alignment,
Sphere, Cone, Cylinder, Brick, Transform FROM MMDB-Features
Biostruc-id FROM MMDB
Pub FROM NCBI-Pub;
-- A structural model maps chemical components into a measured three-
-- dimensional space. PDB-derived biostrucs generally contain 4 models,
-- corresponding to "views" of the structure of a biomolecular assemble with
-- increasing levels of complexity. Model types indicate the complexity of the
-- view.
-- The model named "NCBI all atom" represents a view suitable for most
-- computational biology applications. It provides complete atomic coordinate
-- data for a "single best" model, omitting statistical disorder information
-- and/or ensemble structure descriptions provided in the source PDB file.
-- Construction of the single best model is based on the assumption that the
-- contents of the "alternate conformation" field from pdb imply no correlation
-- among the occupancies of multiple sites assigned to sets of atoms: the best
-- site is chosen only on the basis of highest occupancy. Note, however, that
-- alternate conformation sets where correlation is implied are generally
-- constrained in crystallographic refinement to have uniform occupancy, and
-- will thus be selected as a set. For ensemble models the model which assigns
-- coordinates to the most atoms is chosen. If numbers of coordinates are the
-- same, the model occurring first in the PDB file is selected. The single
-- best model includes complete coordinates for all nonpolymer components, but
-- omits those classified as "solvent". Model type is 3 for this model.
-- The model named "NCBI backbone" represents a simple view intended for
-- graphic displays and rapid transmission over a network. It includes only
-- alpha carbon or backbone phosphate coordinates for biopolymers. It is based
-- on selection of alpha-carbon and backbone phosphate atoms from the "NCBI
-- all atom" model. The model type is set to 2. An even simpler model gives
-- only a cartoon representation, using cylinders corresponding to secondary
-- structure elements. This is named "NCBI vector", and has model type 1.
-- The models named "PDB Model 1", "PDB Model 2", etc. represent the complete
-- information provided by PDB, including full descriptions of statistical
-- disorder. The name of the model is based on the contents of the PDB MODEL
-- record, with a default name of "PDB Model 1" for PDB files which contain
-- only a single model. Construction of these models is based on the
-- assumption that contents of the PDB "alternate conformation" field are
-- intended to imply correlation among the occupancies of atom sets flagged by
-- the same identifier. The special flag " " (blank) is assumed to indicate
-- sites occupied in all alternate conformations, and sites flagged otherwise,
-- together with " ", to indicate a distinct member of an ensemble of
-- alternate conformations. Note that construction of ensemble members
-- according to these assumption requires two validation checks on PDB
-- "alternate conformation" flags: they must be unique among sites assigned to
-- the same atom, and that the special " " flag must occur only for unique
-- sites. Sites which violate the first check are flagged as "u", for
-- "unknown"; they are omitted from all ensemble definitions but are
-- nontheless retained in the coordinate list. Sites which violate the second
-- check are flagged "b" for "blank", and are included in an appropriately
-- named ensemble. The model type for pdb all models is 4.
-- Note that in the MMDB database models are stored in the ASN.1 stream in
-- order of increasing model type value. Since models occur as the last item
-- in a biostruc, parsers may avoid reading the entire stream if the desired
-- model is one of the simplified types, which occur first in the stream. This
-- can save considerable I/O time, particularly for large ensemble models from
-- NMR determinations.
Biostruc-model ::= SEQUENCE {
id Model-id,
type Model-type,
descr SEQUENCE OF Model-descr OPTIONAL,
model-space Model-space OPTIONAL,
model-coordinates SEQUENCE OF Model-coordinate-set OPTIONAL }
Model-id ::= INTEGER
Model-type ::= INTEGER {
ncbi-vector(1),
ncbi-backbone(2),
ncbi-all-atom(3),
pdb-model(4),
other(255)}
Model-descr ::= CHOICE {
name VisibleString,
pdb-reso VisibleString,
pdb-method VisibleString,
pdb-comment VisibleString,
other-comment VisibleString,
attribution Pub }
-- The model space defines measurement units and any external reference frame.
-- Coordinates refer to a right-handed orthogonal system defined on axes
-- tagged x, y and z in the coordinate and feature definitions of a biostruc.
-- Coordinates from PDB-derived structures are reported without change, in
-- angstrom units. The units of temperature and occupancy factors are not
-- defined explicitly in PDB, but are inferred from their value range.
Model-space ::= SEQUENCE {
coordinate-units ENUMERATED {
angstroms(1),
nanometers(2),
other(3),
unknown(255)},
thermal-factor-units ENUMERATED {
b(1),
u(2),
other(3),
unknown(255)} OPTIONAL,
occupancy-factor-units ENUMERATED {
fractional(1),
electrons(2),
other(3),
unknown(255)} OPTIONAL,
density-units ENUMERATED {
electrons-per-unit-volume(1),
arbitrary-scale(2),
other(3),
unknown(255)} OPTIONAL,
reference-frame Reference-frame OPTIONAL }
-- An external reference frame is a pointer to another biostruc, with an
-- optional operator to rotate and translate coordinates into its model space.
-- This item is intended for representation of homology-derived model
-- structures, and is not present for structures from PDB.
Reference-frame ::= SEQUENCE {
biostruc-id Biostruc-id,
rotation-translation Transform OPTIONAL }
-- Atomic coordinates may be assigned literally or by reference to another
-- biostruc. The reference coordinate type is used to represent homology-
-- derived model structures. PDB-derived structures have literal coordinates.
-- Referenced coordinates identify another biostruc, any transformation to be
-- applied to coordinates from that biostruc, and a mapping of the chemical
-- graph of the present biostruc onto that of the referenced biostruc. They
-- give an "alignment" of atoms in the current biostruc with those in another,
-- from which the coordinates of matched atoms may be retrieved. For non-
-- atomic models "alignment" may also be represented by molecule and residue
-- equivalence lists. Referenced coordinates are a data item inteded for
-- representation of homology models, with an explicit pointer to their source
-- information. They do not occur in PDB-derived models.
Model-coordinate-set ::= SEQUENCE {
id Model-coordinate-set-id OPTIONAL,
descr SEQUENCE OF Model-descr OPTIONAL,
coordinates CHOICE {
literal Coordinates,
reference Chem-graph-alignment } }
Model-coordinate-set-id ::= INTEGER
-- Literal coordinates map chemical components into the model space. Three
-- mapping types are allowed, atomic coordinate models, density-grid models,
-- and surface models. A model consists of a sequence of such coordinate sets,
-- and may thus combine coordinate subsets which have a different source.
-- PDB-derived models contain a single atomic coordinate set, as they by
-- definition represent information from a single source.
Coordinates ::= CHOICE {
atomic Atomic-coordinates,
surface Surface-coordinates,
density Density-coordinates }
-- Literal atomic coordinate values give location, occupancy and order
-- parameters, and a pointer to a specific atom defined in the biostruc graph.
-- Temperature and occupancy factors have their conventional crystallographic
-- definitions, with units defined in the model space declaration. Atoms,
-- sites, temperature-factors, occupancies and alternate-conformation-ids
-- are parallel arrays, i.e. the have the same number of values as given by
-- number-of-points. Conformation ensembles represent distinct correlated-
-- disorder subsets of the coordinates. They will be present only for certain
-- "views" of PDB structures, as described above. Their derivation from PDB-
-- supplied "alternate-conformation" ids is described below.
Atomic-coordinates ::= SEQUENCE {
number-of-points INTEGER,
atoms Atom-pntrs,
sites Model-space-points,
temperature-factors Atomic-temperature-factors OPTIONAL,
occupancies Atomic-occupancies OPTIONAL,
alternate-conf-ids Alternate-conformation-ids OPTIONAL,
conf-ensembles SEQUENCE OF Conformation-ensemble OPTIONAL }
-- The atoms whose location is described by each coordinate are identified
-- via a hierarchical pointer to the chemical graph of the biomolecular
-- assembly. Coordinates may be matched with atoms in the chemical structure
-- by the values of the molecule, residue and atom id's given here, which
-- match exactly the items of the same type defined in Biostruc-graph.
-- Coordinates are given as integer values, with a scale factor to convert
-- to real values for each x, y or z, in the units indicated in model-space.
-- Integer values must be divided by the the scale factor. This use of integer
-- values reduces the ASN.1 stream size. The scale factors for temperature
-- factors and occupancies are given separately, but must be used in the same
-- fashion to produce properly scaled real values.
Model-space-points ::= SEQUENCE {
scale-factor INTEGER,
x SEQUENCE OF INTEGER,
y SEQUENCE OF INTEGER,
z SEQUENCE OF INTEGER }
Atomic-temperature-factors ::= CHOICE {
isotropic Isotropic-temperature-factors,
anisotropic Anisotropic-temperature-factors }
Isotropic-temperature-factors ::= SEQUENCE {
scale-factor INTEGER,
b SEQUENCE OF INTEGER }
Anisotropic-temperature-factors ::= SEQUENCE {
scale-factor INTEGER,
b-11 SEQUENCE OF INTEGER,
b-12 SEQUENCE OF INTEGER,
b-13 SEQUENCE OF INTEGER,
b-22 SEQUENCE OF INTEGER,
b-23 SEQUENCE OF INTEGER,
b-33 SEQUENCE OF INTEGER }
Atomic-occupancies ::= SEQUENCE {
scale-factor INTEGER,
o SEQUENCE OF INTEGER }
-- An alternate conformation id is optionally associated with each coordinate.
-- Aside from corrections due to the validation checks described above, the
-- contents of MMDB Alternate-conformation-ids are identical to the PDB
-- "alternate conformation" field.
Alternate-conformation-ids ::= SEQUENCE OF Alternate-conformation-id
Alternate-conformation-id ::= VisibleString
-- Correlated disorder ensemble is defined by a set of alternate conformation
-- id's which identify coordinates relevant to that ensemble. These are
-- defined from the validated and corrected contents of the PDB "alternate
-- conformation" field as described above. A given ensemble, for example, may
-- consist of atom sites flagged by " " and "A" Alternate-conformation-ids.
-- Names for ensembles are constructed from these flags. This example would be
-- named, in its description, "PDB Ensemble blank plus A".
-- Note that this interpretation is consistent with common PDB usage of the
-- "alternate conformation" field, but that PDB specifications do not formally
-- distinguish between correlated and uncorrelated disorder in crystallographic
-- models. Ensembles identified in MMDB thus may not correspond to the meaning
-- intended by PDB or the depositor. No information is lost, however, and
-- if the intended meaning is known alternative ensemble descriptions may be
-- reconstructed directly from the Alternate-conformation-ids.
-- Note that correlated disorder as defined here is allowed within an atomic
-- coordinate set but not between the multiple sets which may define a model.
-- Multiple sets within the same model are intended as a means to represent
-- assemblies modeled from different experimentally determined structures,
-- where correlated disorder between coordinate sets is not relevant.
Conformation-ensemble ::= SEQUENCE {
name VisibleString,
alt-conf-ids SEQUENCE OF Alternate-conformation-id }
-- Literal surface coordinates define the chemical components whose structure
-- is described by a surface, and the surface itself. The surface may be
-- either a regular geometric solid or a triangle-mesh of arbitrary shape.
Surface-coordinates ::= SEQUENCE {
contents Chem-graph-pntrs,
surface CHOICE { sphere Sphere,
cone Cone,
cylinder Cylinder,
brick Brick,
tmesh T-mesh,
triangles Triangles } }
T-mesh ::= SEQUENCE {
number-of-points INTEGER,
scale-factor INTEGER,
swap SEQUENCE OF BOOLEAN,
x SEQUENCE OF INTEGER,
y SEQUENCE OF INTEGER,
z SEQUENCE OF INTEGER }
Triangles ::= SEQUENCE {
number-of-points INTEGER,
scale-factor INTEGER,
x SEQUENCE OF INTEGER,
y SEQUENCE OF INTEGER,
z SEQUENCE OF INTEGER,
number-of-triangles INTEGER,
v1 SEQUENCE OF INTEGER,
v2 SEQUENCE OF INTEGER,
v3 SEQUENCE OF INTEGER }
-- Literal density coordinates define the chemical components whose structure
-- is described by a density grid, parameters of this grid, and density values.
Density-coordinates ::= SEQUENCE {
contents Chem-graph-pntrs,
grid-corners Brick,
grid-steps-x INTEGER,
grid-steps-y INTEGER,
grid-steps-z INTEGER,
fastest-varying ENUMERATED {
x(1),
y(2),
z(3)},
slowest-varying ENUMERATED {
x(1),
y(2),
z(3)},
scale-factor INTEGER,
density SEQUENCE OF INTEGER }
END
--$Revision: 1000.1 $
--**********************************************************************
--
-- Biological Macromolecule 3-D Structure Data Types for MMDB,
-- A Molecular Modeling Database
--
-- Definitions for structural features and biostruc addressing
--
-- By Hitomi Ohkawa, Jim Ostell, Chris Hogue and Steve Bryant
--
-- National Center for Biotechnology Information
-- National Institutes of Health
-- Bethesda, MD 20894 USA
--
-- July, 1996
--
--**********************************************************************
MMDB-Features DEFINITIONS ::=
BEGIN
EXPORTS Biostruc-feature-set, Chem-graph-pntrs, Atom-pntrs,
Chem-graph-alignment, Sphere, Cone, Cylinder, Brick, Transform,
Biostruc-feature-set-id, Biostruc-feature-id;
IMPORTS Biostruc-id FROM MMDB
Molecule-id, Residue-id, Atom-id FROM MMDB-Chemical-graph
Model-id, Model-coordinate-set-id FROM MMDB-Structural-model
User-object FROM NCBI-General
Pub FROM NCBI-Pub;
-- Named model features refer to sets of residues or atoms, or a region in
-- the model space. A few specific feature types are allowed for compatibility
-- with PDB usage, but the purpose of a named model feature is simply to
-- associate various types of information with a set of atoms or
-- residues, or a spatially-defined region of the model structure. They also
-- support association of various properties with each residue or atom of a
-- set.
-- PDB-derived secondary structure defines a single feature, represented as a
-- sequence of residue motifs, as are the contents of PDB SITE and
-- FTNOTE records. NCBI-assigned core and secondary structure descriptions
-- are also represented as a sequence of residue motifs.
Biostruc-feature-set ::= SEQUENCE {
id Biostruc-feature-set-id,
descr SEQUENCE OF Biostruc-feature-set-descr OPTIONAL,
features SEQUENCE OF Biostruc-feature }
Biostruc-feature-set-id ::= INTEGER
Biostruc-feature-set-descr ::= CHOICE {
name VisibleString,
pdb-comment VisibleString,
other-comment VisibleString,
attribution Pub }
-- An explicitly specified type in Biostruc-feature allows for
-- efficient extraction and indexing of feature sets of a specific type.
-- Special types are provided for coloring and rendering, as
-- as needed by molecular graphics programs.
Biostruc-feature ::= SEQUENCE {
id Biostruc-feature-id OPTIONAL,
name VisibleString OPTIONAL,
type INTEGER { helix(1),
strand(2),
sheet(3),
turn(4),
site(5),
footnote(6),
comment(7), -- new
subgraph(100), -- NCBI domain reserved
region(101),
core(102), -- user core definition
supercore(103), -- NCBI reserved
color(150), -- new
render(151), -- new
label(152), -- new
transform(153), -- new
camera(154), -- new
script(155), -- for scripts
alignment(200), -- VAST reserved
similarity(201),
multalign(202), -- multiple alignment
indirect(203), -- new
cn3dstate(254), -- Cn3D reserved
other(255) } OPTIONAL,
property CHOICE {
color Color-prop,
render Render-prop,
transform Transform,
camera Camera,
script Biostruc-script,
user User-object } OPTIONAL,
location CHOICE {
subgraph Chem-graph-pntrs,
region Region-pntrs,
alignment Chem-graph-alignment,
similarity Region-similarity,
indirect Other-feature } OPTIONAL } -- new
-- Other-feature allows for specifying location via reference to another
-- Biostruc-feature and its location.
Other-feature ::= SEQUENCE {
biostruc-id Biostruc-id,
set Biostruc-feature-set-id,
feature Biostruc-feature-id }
Biostruc-feature-id ::= INTEGER
-- Atom, residue or molecule motifs describe a substructure defined by a set
-- of nodes from the chemical graph. PDB secondary structure features are
-- described as a residue motif, since they are not associated with any one of
-- the multiple models that may be provided in a PDB file. NCBI-assigned
-- secondary structure is represented in the same way, even though it is
-- model specific, since this allows for simple mapping of the structural
-- feature onto a sequence-only representation. This addressing mode may also
-- be used to describe features to be associated with particular atoms,
-- as, for example, the chemical shift observed in an NMR experiment.
Chem-graph-pntrs ::= CHOICE {
atoms Atom-pntrs,
residues Residue-pntrs,
molecules Molecule-pntrs }
Atom-pntrs ::= SEQUENCE {
number-of-ptrs INTEGER,
molecule-ids SEQUENCE OF Molecule-id,
residue-ids SEQUENCE OF Residue-id,
atom-ids SEQUENCE OF Atom-id }
Residue-pntrs ::= CHOICE {
explicit Residue-explicit-pntrs,
interval SEQUENCE OF Residue-interval-pntr }
Residue-explicit-pntrs ::= SEQUENCE {
number-of-ptrs INTEGER,
molecule-ids SEQUENCE OF Molecule-id,
residue-ids SEQUENCE OF Residue-id }
Residue-interval-pntr ::= SEQUENCE {
molecule-id Molecule-id,
from Residue-id,
to Residue-id }
Molecule-pntrs ::= SEQUENCE {
number-of-ptrs INTEGER,
molecule-ids SEQUENCE OF Molecule-id }
-- Region motifs describe features defined by spatial location, such as the
-- site specified by a coordinate value, or a rgeion within a bounding volume.
Region-pntrs ::= SEQUENCE {
model-id Model-id,
region CHOICE {
site SEQUENCE OF Region-coordinates,
boundary SEQUENCE OF Region-boundary } }
-- Coordinate sites describe a region in space by reference to individual
-- coordinates, in a particular model. These coordinates may be either the
-- x, y and z values of atomic coordinates, the triangles of a surface mesh,
-- or the grid points of a density model. All are addressed in the same manner,
-- as coordinate indices which give offsets from the beginning of the
-- coordinate data arrays. A coordinate-index of 5, for example, refers to
-- the 5th x, y and z values of an atomic coordinate set, the 5th v1, v2, and v3
-- values of a triangle mesh, or the 5th value in a density grid.
-- PDB SITE and FTNOTE records refer to particular atomic coordinates, and they
-- are represented as a region motif with addresses of type Region-coordinates.
-- Any names or descriptions provided by PDB are thus associated with the
-- indicated sites, in the indicated model.
Region-coordinates ::= SEQUENCE {
model-coord-set-id Model-coordinate-set-id,
number-of-coords INTEGER OPTIONAL,
coordinate-indices SEQUENCE OF INTEGER OPTIONAL }
-- Region boundaries are defined by regular solids located in the model space.
Region-boundary ::= CHOICE { sphere Sphere,
cone Cone,
cylinder Cylinder,
brick Brick }
-- A biostruc alignment establishes an equivalence of nodes in the chemical
-- graphs of two or more biostrucs. This may be mapped to a sequence
-- alignment in the case of biopolymers.
-- The 'dimension' component indicates the number of participants
-- in the alignment. For pairwise alignments, such as VAST
-- structure-structure alignments, the dimension will be always 2, with
-- biostruc-ids, alignment, and domain each containing two entries for an
-- aligned pair. The 'alignment' component contains a pair of Chem-graph-pntrs
-- specifying a like number of corresponding residues in each structure.
-- The 'domain' component specifies a region of each structure considered
-- in the alignment. Only one transform (for the second structure) and
-- one aligndata (for the pair) are provided for each VAST alignment.
--
-- For multiple alignments, a set of components are treated as
-- parallel arrays of length 'dimension'.
-- The 'transform' component moves each structure to align it with
-- the structure specified as the first element in the "parallel" array,
-- so necessarily the first transform is a NULL transform.
-- Align-stats are placeholders for scores.
Chem-graph-alignment ::= SEQUENCE {
dimension INTEGER DEFAULT 2,
biostruc-ids SEQUENCE OF Biostruc-id,
alignment SEQUENCE OF Chem-graph-pntrs,
domain SEQUENCE OF Chem-graph-pntrs OPTIONAL,
transform SEQUENCE OF Transform OPTIONAL,
aligndata SEQUENCE OF Align-stats OPTIONAL }
Align-stats ::= SEQUENCE {
descr VisibleString OPTIONAL,
scale-factor INTEGER OPTIONAL,
vast-score INTEGER OPTIONAL,
vast-mlogp INTEGER OPTIONAL,
align-res INTEGER OPTIONAL,
rmsd INTEGER OPTIONAL,
blast-score INTEGER OPTIONAL,
blast-mlogp INTEGER OPTIONAL,
other-score INTEGER OPTIONAL }
-- A biostruc similarity describes spatial features which are similar between
-- two or more biostrucs. Similarities are model dependent, and the model and
-- coordinate set ids of the biostrucs must be specified. They do not
-- necessarily map to a sequence alignment, as the regions referenced may
-- be pieces of a surface or grid, and thus not uniquely mapable to particular
-- chemical components.
Region-similarity ::= SEQUENCE {
dimension INTEGER DEFAULT 2,
biostruc-ids SEQUENCE OF Biostruc-id,
similarity SEQUENCE OF Region-pntrs,
transform SEQUENCE OF Transform }
-- Geometrical primitives are used in the definition of region motifs, and
-- also non-atomic coordinates. Spheres, cones, cylinders and bricks are
-- defined by a few points in the model space.
Sphere ::= SEQUENCE {
center Model-space-point,
radius RealValue }
Cone ::= SEQUENCE {
axis-top Model-space-point,
axis-bottom Model-space-point,
radius-bottom RealValue }
Cylinder ::= SEQUENCE {
axis-top Model-space-point,
axis-bottom Model-space-point,
radius RealValue }
-- A brick is defined by the coordinates of eight corners. These are assumed
-- to appear in the order 000, 001, 010, 011, 100, 101, 110, 111, where the
-- digits 0 and 1 refer to respectively to the x, y and z axes of a unit cube.
-- Opposite edges are assumed to be parallel.
Brick ::= SEQUENCE {
corner-000 Model-space-point,
corner-001 Model-space-point,
corner-010 Model-space-point,
corner-011 Model-space-point,
corner-100 Model-space-point,
corner-101 Model-space-point,
corner-110 Model-space-point,
corner-111 Model-space-point }
Model-space-point ::= SEQUENCE {
scale-factor INTEGER,
x INTEGER,
y INTEGER,
z INTEGER }
RealValue ::= SEQUENCE {
scale-factor INTEGER,
scaled-integer-value INTEGER }
Transform ::= SEQUENCE {
id INTEGER,
moves SEQUENCE OF Move }
Move ::= CHOICE {
rotate Rot-matrix,
translate Trans-matrix }
-- A rotation matrix is defined by 9 numbers, given by row, i.e.,
-- with column indices varying fastest.
-- Coordinates, as a matrix with columns x, y, an z, are rotated
-- via multiplication with the rotation matrix.
-- A translation matrix is defined by 3 numbers, which is added to
-- the rotated coordinates for specified amount of translation.
Rot-matrix ::= SEQUENCE {
scale-factor INTEGER,
rot-11 INTEGER,
rot-12 INTEGER,
rot-13 INTEGER,
rot-21 INTEGER,
rot-22 INTEGER,
rot-23 INTEGER,
rot-31 INTEGER,
rot-32 INTEGER,
rot-33 INTEGER }
Trans-matrix ::= SEQUENCE {
scale-factor INTEGER,
tran-1 INTEGER,
tran-2 INTEGER,
tran-3 INTEGER }
-- The camera is a position relative to the world coordinates
-- of the structure referred to by a location.
-- this is used to set the initial position of the
-- camera using OpenGL. scale is the value used to scale the
-- other values from floating point to integer
Camera ::= SEQUENCE {
x INTEGER,
y INTEGER,
distance INTEGER,
angle INTEGER,
scale INTEGER,
modelview GL-matrix }
GL-matrix ::= SEQUENCE {
scale INTEGER,
m11 INTEGER,
m12 INTEGER,
m13 INTEGER,
m14 INTEGER,
m21 INTEGER,
m22 INTEGER,
m23 INTEGER,
m24 INTEGER,
m31 INTEGER,
m32 INTEGER,
m33 INTEGER,
m34 INTEGER,
m41 INTEGER,
m42 INTEGER,
m43 INTEGER,
m44 INTEGER }
Color-prop ::= SEQUENCE {
r INTEGER OPTIONAL,
g INTEGER OPTIONAL,
b INTEGER OPTIONAL,
name VisibleString OPTIONAL }
-- Note that Render-prop is compatible with the Annmm specification,
-- i.e., its numbering schemes do not clash with those in Render-prop.
Render-prop ::= INTEGER {
default (0), -- Default view
wire (1), -- use wireframe
space (2), -- use spacefill
stick (3), -- use stick model (thin cylinders)
ballNStick (4), -- use ball & stick model
thickWire (5), -- thicker wireframe
hide (9), -- don't show this
name (10), -- display its name next to it
number (11), -- display its number next to it
pdbNumber (12), -- display its PDB number next to it
objWireFrame (150), -- display MMDB surface object as wireframe
objPolygons (151), -- display MMDB surface object as polygons
colorsetCPK (225), -- color atoms like CPK models
colorsetbyChain (226), -- color each chain different
colorsetbyTemp (227), -- color using isotropic Temp factors
colorsetbyRes (228), -- color using residue properties
colorsetbyLen (229), -- color changes along chain length
colorsetbySStru (230), -- color by secondary structure
colorsetbyHydro (231), -- color by hydrophobicity
colorsetbyObject(246), -- color each object differently
colorsetbyDomain(247), -- color each domain differently
other (255)
}
-- When a Biostruc-Feature with a Biostruc-script is initiated,
-- it should play the specified steps one at a time, setting the feature-do
-- list as the active display.
-- The camera can be set using a feature-do,
-- but it may be moved independently with
-- camera-move, which specifies how to move
-- the camera dynamically during the step along the path defined (e.g.,
-- a zoom, a rotate).
-- Any value of pause (in 1:10th's of a second) will force a pause
-- after an image is shown.
-- If waitevent is TRUE, it will await a mouse or keypress and ignore
-- the pause value.
Biostruc-script ::= SEQUENCE OF Biostruc-script-step
Biostruc-script-step ::= SEQUENCE {
step-id Step-id,
step-name VisibleString OPTIONAL,
feature-do SEQUENCE OF Other-feature OPTIONAL,
camera-move Transform OPTIONAL,
pause INTEGER DEFAULT 10,
waitevent BOOLEAN,
extra INTEGER,
jump Step-id OPTIONAL }
Step-id ::= INTEGER
END
--$Revision: 1000.1 $
--**********************************************************************
--
-- Definitions for CDD's
--
-- NCBI Structure Group
--
-- National Center for Biotechnology Information
-- National Institutes of Health
-- Bethesda, MD 20894 USA
--
-- October 1999
--
-- asntool -m cdd.asn -w 100 -o cdd.h
-- asntool -B objcdd -m cdd.asn -G -w 100 -I objseq.h objsset.h -K cdd.h -M asn.all
--**********************************************************************
NCBI-Cdd DEFINITIONS ::=
-- NCBI Conserved Domain Definition
BEGIN
EXPORTS Cdd-id, Cdd-id-set, Cdd, Cdd-set, Cdd-tree, Cdd-tree-set;
IMPORTS Date FROM NCBI-General
Pub FROM NCBI-Pub
Biostruc-annot-set FROM MMDB
Bioseq FROM NCBI-Sequence
Seq-annot FROM NCBI-Sequence
Seq-entry FROM NCBI-Seqset
Org-ref FROM NCBI-Organism
Seq-interval FROM NCBI-Seqloc
Score-set FROM NCBI-Seqalign;
-- Cdd's should not exist without a unique integer id, but alternative
-- id's may be present as well.
Global-id ::= SEQUENCE {
accession VisibleString,
release VisibleString OPTIONAL,
version INTEGER OPTIONAL, -- version 0 is the seed
database VisibleString OPTIONAL -- this is NOT the source!
} -- rather the database the
-- object resides in
Cdd-id ::= CHOICE {
uid INTEGER,
gid Global-id
}
Cdd-id-set ::= SEQUENCE OF Cdd-id
-- The description of CDD's refers to the specific set of aligned sequences,
-- the region that is being aligned and the information contained in the
-- alignment. It may contain a lengthy comment
-- describing the function of the domain as well as its origin and all
-- other anecdotal information that can't be pressed into a rigid scheme.
-- Crosslinks to reference papers available in PubMed are possible as well.
-- There can be as many of these as you want in the CDD.
Cdd-descr ::= CHOICE {
othername VisibleString, -- alternative names for the CDD
category VisibleString, -- intracellular, extracellular, etc.
comment VisibleString, -- this is where annotations go
reference Pub, -- a citation
create-date Date, -- valid for the current version
tax-source Org-ref, -- holds the highest common node
source VisibleString, -- the database the seeds were created
-- from, e.g. SMART, PFAM, etc..
status INTEGER { unassigned(0),
finished-ok(1), -- to indicate
pending-release(2), -- processing status
other-asis(3), -- or final type
matrix-only(4), --
other(255) } -- for CD production
}
Cdd-descr-set ::= SET OF Cdd-descr
-- the Cdd-tree contains the hierarchy of CDDs. This object is separate from
-- the Cdd's themselves to allow it to be retrieved separately and to
-- operate as an index.
Cdd-tree ::= SEQUENCE {
name VisibleString,
id Cdd-id-set,
description Cdd-descr-set OPTIONAL,
parents Cdd-id-set OPTIONAL,
children Cdd-id-set OPTIONAL,
siblings Cdd-id-set OPTIONAL
}
Cdd-tree-set ::= SEQUENCE OF Cdd-tree
-- Matrix definitions, these are supposed to store PSSMs and corresponding
-- matrices of relative residue frequencies.
-- the number of columns and rows is listed explicitly, values in columns
-- are stored column by column, i.e. in groups of nrows values for each column
Matrix ::= SEQUENCE {
ncolumns INTEGER,
nrows INTEGER,
row-labels SEQUENCE OF VisibleString OPTIONAL,
scale-factor INTEGER,
columns SEQUENCE OF INTEGER
}
-- definition for matrix of pairwise "distances", stored as the upper
-- triangle of a sqared n x n matrix (excluding the diagonal), this is
-- supposed to store pairwise percentages of identical residues, pairwise
-- alignment scores or E-values from pairwise BLAST sequence comparisons
Triangle ::= SEQUENCE {
nelements INTEGER,
scores Score-set
}
-- the Cdd is the basic ASN.1 object storing an annotated and curated
-- set of alignments (formulated as a set of pairwise master-slave
-- alignments).
-- The alignment data are contained in Seq-align-sets and Biostruc-feature-sets.
-- Version numbers in Global-ids are meant to be updated every time the Cdd is changed
-- in a way that does not require Global-ids to be changed (sequences added in update
-- cycle, annotation changed)
Cdd ::= SEQUENCE {
name VisibleString,
id Cdd-id-set,
description Cdd-descr-set OPTIONAL,
seqannot SEQUENCE OF Seq-annot OPTIONAL, -- contains the alignment
features Biostruc-annot-set OPTIONAL, -- contains structure alignments
-- or "core" definitions
sequences Seq-entry OPTIONAL, -- store as bioseq-set inside seq-entry
profile-range Seq-interval OPTIONAL, -- profile for this region only
-- also stores the Seq-id of the master
trunc-master Bioseq OPTIONAL, -- holds the truncated master
-- which may be something like a
-- consensus, but still refers to the
-- sequence coord. frame in profile-range
posfreq Matrix OPTIONAL, -- relative residue frequencies
scoremat Matrix OPTIONAL, -- Position dependent score matrix
distance Triangle OPTIONAL -- pairwise distances for all seqs.
}
Cdd-set ::= SET OF Cdd
END
--$Revision: 1000.1 $
--****************************************************************
--
-- NCBI MIME type (chemical/ncbi-asn1-ascii and chemical/ncbi-asn1-binary)
-- by Jonathan Epstein, February 1996
--
--****************************************************************
NCBI-Mime DEFINITIONS ::=
BEGIN
EXPORTS Ncbi-mime-asn1;
IMPORTS Biostruc, Biostruc-annot-set FROM MMDB
Seq-entry FROM NCBI-Seqset
Seq-annot FROM NCBI-Sequence
Medline-entry FROM NCBI-Medline;
Ncbi-mime-asn1 ::= CHOICE {
entrez Entrez-general, -- just a structure
alignstruc Biostruc-align, -- structures & sequences & alignments
alignseq Biostruc-align-seq, -- sequence alignment
strucseq Biostruc-seq, -- structure & sequences
strucseqs Biostruc-seqs -- structure & sequences & alignments
-- others may be added here in the future
}
Biostruc-align ::= SEQUENCE {
master Biostruc,
slaves SET OF Biostruc,
alignments Biostruc-annot-set, -- structure alignments
sequences SET OF Seq-entry, -- sequences
seqalign SET OF Seq-annot }
Biostruc-align-seq ::= SEQUENCE { -- display seq structure align only
sequences SET OF Seq-entry, -- sequences
seqalign SET OF Seq-annot }
Biostruc-seq ::= SEQUENCE { -- display structure seq added by yanli
structure Biostruc,
sequences SET OF Seq-entry }
Biostruc-seqs ::= SEQUENCE { -- display blast alignment along with neighbor's structure added by yanli
structure Biostruc,
sequences SET OF Seq-entry, -- sequences
seqalign SET OF Seq-annot }
Entrez-style ::= ENUMERATED {
docsum (1),
genbank (2) ,
genpept (3) ,
fasta (4) ,
asn1 (5) ,
graphic (6) ,
alignment (7) ,
globalview (8) ,
report (9) ,
medlars (10) ,
embl (11) ,
pdb (12) ,
kinemage (13) }
Entrez-general ::= SEQUENCE {
title VisibleString OPTIONAL,
data CHOICE {
ml Medline-entry ,
prot Seq-entry ,
nuc Seq-entry ,
genome Seq-entry ,
structure Biostruc ,
strucAnnot Biostruc-annot-set } ,
style Entrez-style ,
location VisibleString OPTIONAL }
END
--$Revision: 1000.1 $
--*********************************************************************
--
-- access.asn
--
-- messages for data access
--
--*********************************************************************
NCBI-Access DEFINITIONS ::=
BEGIN
EXPORTS Link-set;
-- links between same class = neighbors
-- links between other classes = links
Link-set ::= SEQUENCE {
num INTEGER , -- number of links to this doc type
uids SEQUENCE OF INTEGER OPTIONAL , -- the links
weights SEQUENCE OF INTEGER OPTIONAL } -- the weights
END
--$Revision: 1000.1 $
--**********************************************************************
--
-- NCBI Sequence Feature Definition Module
-- by James Ostell, 1994
--
--**********************************************************************
NCBI-FeatDef DEFINITIONS ::=
BEGIN
EXPORTS FeatDef, FeatDefSet, FeatDispGroup, FeatDispGroupSet;
FeatDef ::= SEQUENCE {
typelabel VisibleString , -- short label for type eg "CDS"
menulabel VisibleString , -- label for a menu eg "Coding Region"
featdef-key INTEGER , -- unique for this feature definition
seqfeat-key INTEGER , -- SeqFeat.data.choice from objfeat.h
entrygroup INTEGER , -- Group for data entry
displaygroup INTEGER , -- Group for data display
molgroup FeatMolType -- Type of Molecule used for
}
FeatMolType ::= ENUMERATED {
aa (1), -- proteins
na (2), -- nucleic acids
both (3) } -- both
FeatDefSet ::= SEQUENCE OF FeatDef -- collections of defintions
FeatDispGroup ::= SEQUENCE {
groupkey INTEGER ,
groupname VisibleString }
FeatDispGroupSet ::= SEQUENCE OF FeatDispGroup
FeatDefGroupSet ::= SEQUENCE {
groups FeatDispGroupSet ,
defs FeatDefSet }
END
--$Revision: 1000.1 $
--********************************************************************
--
-- Print Templates
-- James Ostell, 1993
--
--
--********************************************************************
NCBI-ObjPrt DEFINITIONS ::=
BEGIN
EXPORTS PrintTemplate, PrintTemplateSet;
PrintTemplate ::= SEQUENCE {
name TemplateName , -- name for this template
labelfrom VisibleString OPTIONAL, -- ASN.1 path to get label from
format PrintFormat }
TemplateName ::= VisibleString
PrintTemplateSet ::= SEQUENCE OF PrintTemplate
PrintFormat ::= SEQUENCE {
asn1 VisibleString , -- ASN.1 partial path for this
label VisibleString OPTIONAL , -- printable label
prefix VisibleString OPTIONAL,
suffix VisibleString OPTIONAL,
form PrintForm }
PrintForm ::= CHOICE { -- Forms for various ASN.1 components
block PrintFormBlock,
boolean PrintFormBoolean,
enum PrintFormEnum,
text PrintFormText,
use-template TemplateName,
user UserFormat ,
null NULL } -- rarely used
UserFormat ::= SEQUENCE {
printfunc VisibleString ,
defaultfunc VisibleString OPTIONAL }
PrintFormBlock ::= SEQUENCE { -- for SEQUENCE, SET
separator VisibleString OPTIONAL ,
components SEQUENCE OF PrintFormat }
PrintFormBoolean ::= SEQUENCE {
true VisibleString OPTIONAL ,
false VisibleString OPTIONAL }
PrintFormEnum ::= SEQUENCE {
values SEQUENCE OF VisibleString OPTIONAL }
PrintFormText ::= SEQUENCE {
textfunc VisibleString OPTIONAL }
END
--$Revision: 1000.1 $
-- *********************************************************************
--
-- These are code and conversion tables for NCBI sequence codes
-- ASN.1 for the sequences themselves are define in seq.asn
--
-- Seq-map-table and Seq-code-table REQUIRE that codes start with 0
-- and increase continuously. So IUPAC codes, which are upper case
-- letters will always have 65 0 cells before the codes begin. This
-- allows all codes to do indexed lookups for things
--
-- Valid names for code tables are:
-- IUPACna
-- IUPACaa
-- IUPACeaa
-- IUPACaa3 3 letter amino acid codes : parallels IUPACeaa
-- display only, not a data exchange type
-- NCBI2na
-- NCBI4na
-- NCBI8na
-- NCBI8aa
-- NCBIstdaa
-- probability types map to IUPAC types for display as characters
NCBI-SeqCode DEFINITIONS ::=
BEGIN
EXPORTS Seq-code-table, Seq-map-table, Seq-code-set;
Seq-code-type ::= ENUMERATED { -- sequence representations
iupacna (1) , -- IUPAC 1 letter nuc acid code
iupacaa (2) , -- IUPAC 1 letter amino acid code
ncbi2na (3) , -- 2 bit nucleic acid code
ncbi4na (4) , -- 4 bit nucleic acid code
ncbi8na (5) , -- 8 bit extended nucleic acid code
ncbipna (6) , -- nucleic acid probabilities
ncbi8aa (7) , -- 8 bit extended amino acid codes
ncbieaa (8) , -- extended ASCII 1 letter aa codes
ncbipaa (9) , -- amino acid probabilities
iupacaa3 (10) , -- 3 letter code only for display
ncbistdaa (11) } -- consecutive codes for std aas, 0-25
Seq-map-table ::= SEQUENCE { -- for tables of sequence mappings
from Seq-code-type , -- code to map from
to Seq-code-type , -- code to map to
num INTEGER , -- number of rows in table
start-at INTEGER DEFAULT 0 , -- index offset of first element
table SEQUENCE OF INTEGER } -- table of values, in from-to order
Seq-code-table ::= SEQUENCE { -- for names of coded values
code Seq-code-type , -- name of code
num INTEGER , -- number of rows in table
one-letter BOOLEAN , -- symbol is ALWAYS 1 letter?
start-at INTEGER DEFAULT 0 , -- index offset of first element
table SEQUENCE OF
SEQUENCE {
symbol VisibleString , -- the printed symbol or letter
name VisibleString } , -- an explanatory name or string
comps SEQUENCE OF INTEGER OPTIONAL } -- pointers to complement nuc acid
Seq-code-set ::= SEQUENCE { -- for distribution
codes SET OF Seq-code-table OPTIONAL ,
maps SET OF Seq-map-table OPTIONAL }
END